From 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 19 Dec 2016 23:05:39 +0100 Subject: Reorganize source tree to use single autotools instance Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion --- src/vpp/api/api.c | 4922 ++++++++++++++++++++++++++++++++++++ src/vpp/api/api_format.c | 1 + src/vpp/api/api_main.c | 192 ++ src/vpp/api/custom_dump.c | 3139 +++++++++++++++++++++++ src/vpp/api/gmon.c | 319 +++ src/vpp/api/json_format.c | 304 +++ src/vpp/api/json_format.h | 254 ++ src/vpp/api/summary_stats_client.c | 302 +++ src/vpp/api/test_client.c | 1531 +++++++++++ src/vpp/api/test_ha.c | 249 ++ src/vpp/api/vat.h | 1 + src/vpp/api/vpe.api | 2782 ++++++++++++++++++++ src/vpp/api/vpe_all_api_h.h | 37 + src/vpp/api/vpe_msg_enum.h | 37 + src/vpp/api/vpp_get_metrics.c | 253 ++ 15 files changed, 14323 insertions(+) create mode 100644 src/vpp/api/api.c create mode 120000 src/vpp/api/api_format.c create mode 100644 src/vpp/api/api_main.c create mode 100644 src/vpp/api/custom_dump.c create mode 100644 src/vpp/api/gmon.c create mode 100644 src/vpp/api/json_format.c create mode 100644 src/vpp/api/json_format.h create mode 100644 src/vpp/api/summary_stats_client.c create mode 100644 src/vpp/api/test_client.c create mode 100644 src/vpp/api/test_ha.c create mode 120000 src/vpp/api/vat.h create mode 100644 src/vpp/api/vpe.api create mode 100644 src/vpp/api/vpe_all_api_h.h create mode 100644 src/vpp/api/vpe_msg_enum.h create mode 100644 src/vpp/api/vpp_get_metrics.c (limited to 'src/vpp/api') diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c new file mode 100644 index 00000000..6289249c --- /dev/null +++ b/src/vpp/api/api.c @@ -0,0 +1,4922 @@ +/* + *------------------------------------------------------------------ + * api.c - message handler registration + * + * Copyright (c) 2010-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if IPV6SR > 0 +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef BIHASH_TYPE +#undef __included_bihash_template_h__ +#include + +#if DPDK > 0 +#include +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun +#include +#define foreach_vpe_api_msg \ +_(WANT_OAM_EVENTS, want_oam_events) \ +_(OAM_ADD_DEL, oam_add_del) \ +_(MPLS_ROUTE_ADD_DEL, mpls_route_add_del) \ +_(MPLS_IP_BIND_UNBIND, mpls_ip_bind_unbind) \ +_(IS_ADDRESS_REACHABLE, is_address_reachable) \ +_(SW_INTERFACE_SET_MPLS_ENABLE, sw_interface_set_mpls_enable) \ +_(SW_INTERFACE_SET_VPATH, sw_interface_set_vpath) \ +_(SW_INTERFACE_SET_VXLAN_BYPASS, sw_interface_set_vxlan_bypass) \ +_(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \ +_(SW_INTERFACE_SET_L2_BRIDGE, sw_interface_set_l2_bridge) \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) \ +_(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ +_(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ +_(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ +_(BRIDGE_DOMAIN_SW_IF_DETAILS, bridge_domain_sw_if_details) \ +_(L2FIB_ADD_DEL, l2fib_add_del) \ +_(L2_FLAGS, l2_flags) \ +_(BRIDGE_FLAGS, bridge_flags) \ +_(CREATE_VLAN_SUBIF, create_vlan_subif) \ +_(CREATE_SUBIF, create_subif) \ +_(MPLS_TUNNEL_ADD_DEL, mpls_tunnel_add_del) \ +_(PROXY_ARP_ADD_DEL, proxy_arp_add_del) \ +_(PROXY_ARP_INTFC_ENABLE_DISABLE, proxy_arp_intfc_enable_disable) \ +_(VNET_GET_SUMMARY_STATS, vnet_get_summary_stats) \ +_(RESET_FIB, reset_fib) \ +_(DHCP_PROXY_CONFIG,dhcp_proxy_config) \ +_(DHCP_PROXY_CONFIG_2,dhcp_proxy_config_2) \ +_(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss) \ +_(DHCP_CLIENT_CONFIG, dhcp_client_config) \ +_(CREATE_LOOPBACK, create_loopback) \ +_(CONTROL_PING, control_ping) \ +_(CLI_REQUEST, cli_request) \ +_(CLI_INBAND, cli_inband) \ +_(SET_ARP_NEIGHBOR_LIMIT, set_arp_neighbor_limit) \ +_(L2_PATCH_ADD_DEL, l2_patch_add_del) \ +_(CLASSIFY_ADD_DEL_TABLE, classify_add_del_table) \ +_(CLASSIFY_ADD_DEL_SESSION, classify_add_del_session) \ +_(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table) \ +_(CLASSIFY_SET_INTERFACE_L2_TABLES, classify_set_interface_l2_tables) \ +_(GET_NODE_INDEX, get_node_index) \ +_(ADD_NODE_NEXT, add_node_next) \ +_(VXLAN_ADD_DEL_TUNNEL, vxlan_add_del_tunnel) \ +_(VXLAN_TUNNEL_DUMP, vxlan_tunnel_dump) \ +_(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \ +_(L2_INTERFACE_EFP_FILTER, l2_interface_efp_filter) \ +_(L2_INTERFACE_VLAN_TAG_REWRITE, l2_interface_vlan_tag_rewrite) \ +_(SHOW_VERSION, show_version) \ +_(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \ +_(L2_FIB_TABLE_ENTRY, l2_fib_table_entry) \ +_(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \ +_(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \ +_(INTERFACE_NAME_RENUMBER, interface_name_renumber) \ +_(WANT_IP4_ARP_EVENTS, want_ip4_arp_events) \ +_(WANT_IP6_ND_EVENTS, want_ip6_nd_events) \ +_(INPUT_ACL_SET_INTERFACE, input_acl_set_interface) \ +_(DELETE_LOOPBACK, delete_loopback) \ +_(BD_IP_MAC_ADD_DEL, bd_ip_mac_add_del) \ +_(COP_INTERFACE_ENABLE_DISABLE, cop_interface_enable_disable) \ +_(COP_WHITELIST_ENABLE_DISABLE, cop_whitelist_enable_disable) \ +_(GET_NODE_GRAPH, get_node_graph) \ +_(IOAM_ENABLE, ioam_enable) \ +_(IOAM_DISABLE, ioam_disable) \ +_(SR_MULTICAST_MAP_ADD_DEL, sr_multicast_map_add_del) \ +_(POLICER_ADD_DEL, policer_add_del) \ +_(POLICER_DUMP, policer_dump) \ +_(POLICER_CLASSIFY_SET_INTERFACE, policer_classify_set_interface) \ +_(POLICER_CLASSIFY_DUMP, policer_classify_dump) \ +_(MPLS_TUNNEL_DUMP, mpls_tunnel_dump) \ +_(MPLS_TUNNEL_DETAILS, mpls_tunnel_details) \ +_(MPLS_FIB_DUMP, mpls_fib_dump) \ +_(MPLS_FIB_DETAILS, mpls_fib_details) \ +_(CLASSIFY_TABLE_IDS,classify_table_ids) \ +_(CLASSIFY_TABLE_BY_INTERFACE, classify_table_by_interface) \ +_(CLASSIFY_TABLE_INFO,classify_table_info) \ +_(CLASSIFY_SESSION_DUMP,classify_session_dump) \ +_(CLASSIFY_SESSION_DETAILS,classify_session_details) \ +_(SET_IPFIX_EXPORTER, set_ipfix_exporter) \ +_(IPFIX_EXPORTER_DUMP, ipfix_exporter_dump) \ +_(SET_IPFIX_CLASSIFY_STREAM, set_ipfix_classify_stream) \ +_(IPFIX_CLASSIFY_STREAM_DUMP, ipfix_classify_stream_dump) \ +_(IPFIX_CLASSIFY_TABLE_ADD_DEL, ipfix_classify_table_add_del) \ +_(IPFIX_CLASSIFY_TABLE_DUMP, ipfix_classify_table_dump) \ +_(GET_NEXT_INDEX, get_next_index) \ +_(PG_CREATE_INTERFACE, pg_create_interface) \ +_(PG_CAPTURE, pg_capture) \ +_(PG_ENABLE_DISABLE, pg_enable_disable) \ +_(IP_SOURCE_AND_PORT_RANGE_CHECK_ADD_DEL, \ + ip_source_and_port_range_check_add_del) \ +_(IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL, \ + ip_source_and_port_range_check_interface_add_del) \ +_(DELETE_SUBIF, delete_subif) \ +_(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) \ +_(PUNT, punt) \ +_(FLOW_CLASSIFY_SET_INTERFACE, flow_classify_set_interface) \ +_(FLOW_CLASSIFY_DUMP, flow_classify_dump) \ +_(FEATURE_ENABLE_DISABLE, feature_enable_disable) + +#define QUOTE_(x) #x +#define QUOTE(x) QUOTE_(x) +typedef enum +{ + RESOLVE_IP4_ADD_DEL_ROUTE = 1, + RESOLVE_IP6_ADD_DEL_ROUTE, +} resolve_t; + +static vlib_node_registration_t vpe_resolver_process_node; +vpe_api_main_t vpe_api_main; + +static int arp_change_delete_callback (u32 pool_index, u8 * notused); +static int nd_change_delete_callback (u32 pool_index, u8 * notused); + +/* Clean up all registrations belonging to the indicated client */ +int +vl_api_memclnt_delete_callback (u32 client_index) +{ + vpe_api_main_t *vam = &vpe_api_main; + vpe_client_registration_t *rp; + uword *p; + int stats_memclnt_delete_callback (u32 client_index); + + stats_memclnt_delete_callback (client_index); + +#define _(a) \ + p = hash_get (vam->a##_registration_hash, client_index); \ + if (p) { \ + rp = pool_elt_at_index (vam->a##_registrations, p[0]); \ + pool_put (vam->a##_registrations, rp); \ + hash_unset (vam->a##_registration_hash, client_index); \ + } + foreach_registration_hash; +#undef _ + return 0; +} + +pub_sub_handler (oam_events, OAM_EVENTS); + +#define RESOLUTION_EVENT 1 +#define RESOLUTION_PENDING_EVENT 2 +#define IP4_ARP_EVENT 3 +#define IP6_ND_EVENT 4 + +int ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp); + +int ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp); + +void +handle_ip4_arp_event (u32 pool_index) +{ + vpe_api_main_t *vam = &vpe_api_main; + vnet_main_t *vnm = vam->vnet_main; + vlib_main_t *vm = vam->vlib_main; + vl_api_ip4_arp_event_t *event; + vl_api_ip4_arp_event_t *mp; + unix_shared_memory_queue_t *q; + + /* Client can cancel, die, etc. */ + if (pool_is_free_index (vam->arp_events, pool_index)) + return; + + event = pool_elt_at_index (vam->arp_events, pool_index); + + q = vl_api_client_index_to_input_queue (event->client_index); + if (!q) + { + (void) vnet_add_del_ip4_arp_change_event + (vnm, arp_change_delete_callback, + event->pid, &event->address, + vpe_resolver_process_node.index, IP4_ARP_EVENT, + ~0 /* pool index, notused */ , 0 /* is_add */ ); + return; + } + + if (q->cursize < q->maxsize) + { + mp = vl_msg_api_alloc (sizeof (*mp)); + clib_memcpy (mp, event, sizeof (*mp)); + vl_msg_api_send_shmem (q, (u8 *) & mp); + } + else + { + static f64 last_time; + /* + * Throttle syslog msgs. + * It's pretty tempting to just revoke the registration... + */ + if (vlib_time_now (vm) > last_time + 10.0) + { + clib_warning ("arp event for %U to pid %d: queue stuffed!", + format_ip4_address, &event->address, event->pid); + last_time = vlib_time_now (vm); + } + } +} + +void +handle_ip6_nd_event (u32 pool_index) +{ + vpe_api_main_t *vam = &vpe_api_main; + vnet_main_t *vnm = vam->vnet_main; + vlib_main_t *vm = vam->vlib_main; + vl_api_ip6_nd_event_t *event; + vl_api_ip6_nd_event_t *mp; + unix_shared_memory_queue_t *q; + + /* Client can cancel, die, etc. */ + if (pool_is_free_index (vam->nd_events, pool_index)) + return; + + event = pool_elt_at_index (vam->nd_events, pool_index); + + q = vl_api_client_index_to_input_queue (event->client_index); + if (!q) + { + (void) vnet_add_del_ip6_nd_change_event + (vnm, nd_change_delete_callback, + event->pid, &event->address, + vpe_resolver_process_node.index, IP6_ND_EVENT, + ~0 /* pool index, notused */ , 0 /* is_add */ ); + return; + } + + if (q->cursize < q->maxsize) + { + mp = vl_msg_api_alloc (sizeof (*mp)); + clib_memcpy (mp, event, sizeof (*mp)); + vl_msg_api_send_shmem (q, (u8 *) & mp); + } + else + { + static f64 last_time; + /* + * Throttle syslog msgs. + * It's pretty tempting to just revoke the registration... + */ + if (vlib_time_now (vm) > last_time + 10.0) + { + clib_warning ("ip6 nd event for %U to pid %d: queue stuffed!", + format_ip6_address, &event->address, event->pid); + last_time = vlib_time_now (vm); + } + } +} + +static uword +resolver_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + uword event_type; + uword *event_data = 0; + f64 timeout = 100.0; + int i; + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, timeout); + + event_type = vlib_process_get_events (vm, &event_data); + + switch (event_type) + { + case RESOLUTION_PENDING_EVENT: + timeout = 1.0; + break; + + case RESOLUTION_EVENT: + clib_warning ("resolver: BOGUS TYPE"); + break; + + case IP4_ARP_EVENT: + for (i = 0; i < vec_len (event_data); i++) + handle_ip4_arp_event (event_data[i]); + break; + + case IP6_ND_EVENT: + for (i = 0; i < vec_len (event_data); i++) + handle_ip6_nd_event (event_data[i]); + break; + + case ~0: /* timeout */ + break; + } + + vec_reset_length (event_data); + } + return 0; /* or not */ +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vpe_resolver_process_node,static) = { + .function = resolver_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "vpe-route-resolver-process", +}; +/* *INDENT-ON* */ + +static int +mpls_route_add_del_t_handler (vnet_main_t * vnm, + vl_api_mpls_route_add_del_t * mp) +{ + u32 fib_index, next_hop_fib_index; + mpls_label_t *label_stack = NULL; + int rv, ii, n_labels;; + + fib_prefix_t pfx = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_eos = mp->mr_eos, + .fp_label = ntohl (mp->mr_label), + }; + if (pfx.fp_eos) + { + if (mp->mr_next_hop_proto_is_ip4) + { + pfx.fp_payload_proto = DPO_PROTO_IP4; + } + else + { + pfx.fp_payload_proto = DPO_PROTO_IP6; + } + } + else + { + pfx.fp_payload_proto = DPO_PROTO_MPLS; + } + + rv = add_del_route_check (FIB_PROTOCOL_MPLS, + mp->mr_table_id, + mp->mr_next_hop_sw_if_index, + dpo_proto_to_fib (pfx.fp_payload_proto), + mp->mr_next_hop_table_id, + mp->mr_create_table_if_needed, + &fib_index, &next_hop_fib_index); + + if (0 != rv) + return (rv); + + ip46_address_t nh; + memset (&nh, 0, sizeof (nh)); + + if (mp->mr_next_hop_proto_is_ip4) + memcpy (&nh.ip4, mp->mr_next_hop, sizeof (nh.ip4)); + else + memcpy (&nh.ip6, mp->mr_next_hop, sizeof (nh.ip6)); + + n_labels = mp->mr_next_hop_n_out_labels; + if (n_labels == 0) + ; + else if (1 == n_labels) + vec_add1 (label_stack, ntohl (mp->mr_next_hop_out_label_stack[0])); + else + { + vec_validate (label_stack, n_labels - 1); + for (ii = 0; ii < n_labels; ii++) + label_stack[ii] = ntohl (mp->mr_next_hop_out_label_stack[ii]); + } + + return (add_del_route_t_handler (mp->mr_is_multipath, mp->mr_is_add, 0, // mp->is_drop, + 0, // mp->is_unreach, + 0, // mp->is_prohibit, + 0, // mp->is_local, + mp->mr_is_classify, + mp->mr_classify_table_index, + mp->mr_is_resolve_host, + mp->mr_is_resolve_attached, + fib_index, &pfx, + mp->mr_next_hop_proto_is_ip4, + &nh, ntohl (mp->mr_next_hop_sw_if_index), + next_hop_fib_index, + mp->mr_next_hop_weight, + ntohl (mp->mr_next_hop_via_label), + label_stack)); +} + +void +vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp) +{ + vl_api_mpls_route_add_del_reply_t *rmp; + vnet_main_t *vnm; + int rv; + + vnm = vnet_get_main (); + vnm->api_errno = 0; + + rv = mpls_route_add_del_t_handler (vnm, mp); + + rv = (rv == 0) ? vnm->api_errno : rv; + + REPLY_MACRO (VL_API_MPLS_ROUTE_ADD_DEL_REPLY); +} + +static int +mpls_ip_bind_unbind_handler (vnet_main_t * vnm, + vl_api_mpls_ip_bind_unbind_t * mp) +{ + u32 mpls_fib_index, ip_fib_index; + + mpls_fib_index = + fib_table_find (FIB_PROTOCOL_MPLS, ntohl (mp->mb_mpls_table_id)); + + if (~0 == mpls_fib_index) + { + if (mp->mb_create_table_if_needed) + { + mpls_fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS, + ntohl (mp->mb_mpls_table_id)); + } + else + return VNET_API_ERROR_NO_SUCH_FIB; + } + + ip_fib_index = fib_table_find ((mp->mb_is_ip4 ? + FIB_PROTOCOL_IP4 : + FIB_PROTOCOL_IP6), + ntohl (mp->mb_ip_table_id)); + if (~0 == ip_fib_index) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib_prefix_t pfx = { + .fp_len = mp->mb_address_length, + }; + + if (mp->mb_is_ip4) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&pfx.fp_addr.ip4, mp->mb_address, + sizeof (pfx.fp_addr.ip4)); + } + else + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&pfx.fp_addr.ip6, mp->mb_address, + sizeof (pfx.fp_addr.ip6)); + } + + if (mp->mb_is_bind) + fib_table_entry_local_label_add (ip_fib_index, &pfx, + ntohl (mp->mb_label)); + else + fib_table_entry_local_label_remove (ip_fib_index, &pfx, + ntohl (mp->mb_label)); + + return (0); +} + +void +vl_api_mpls_ip_bind_unbind_t_handler (vl_api_mpls_ip_bind_unbind_t * mp) +{ + vl_api_mpls_route_add_del_reply_t *rmp; + vnet_main_t *vnm; + int rv; + + vnm = vnet_get_main (); + vnm->api_errno = 0; + + rv = mpls_ip_bind_unbind_handler (vnm, mp); + + rv = (rv == 0) ? vnm->api_errno : rv; + + REPLY_MACRO (VL_API_MPLS_ROUTE_ADD_DEL_REPLY); +} + +static void +vl_api_sw_interface_set_vpath_t_handler (vl_api_sw_interface_set_vpath_t * mp) +{ + vl_api_sw_interface_set_vpath_reply_t *rmp; + int rv = 0; + u32 sw_if_index = ntohl (mp->sw_if_index); + + VALIDATE_SW_IF_INDEX (mp); + + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_VPATH, mp->enable); + vnet_feature_enable_disable ("ip4-unicast", "vpath-input-ip4", + sw_if_index, mp->enable, 0, 0); + vnet_feature_enable_disable ("ip4-multicast", "vpath-input-ip4", + sw_if_index, mp->enable, 0, 0); + vnet_feature_enable_disable ("ip6-unicast", "vpath-input-ip6", + sw_if_index, mp->enable, 0, 0); + vnet_feature_enable_disable ("ip6-multicast", "vpath-input-ip6", + sw_if_index, mp->enable, 0, 0); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_VPATH_REPLY); +} + +static void + vl_api_sw_interface_set_vxlan_bypass_t_handler + (vl_api_sw_interface_set_vxlan_bypass_t * mp) +{ + vl_api_sw_interface_set_vxlan_bypass_reply_t *rmp; + int rv = 0; + u32 sw_if_index = ntohl (mp->sw_if_index); + + VALIDATE_SW_IF_INDEX (mp); + + if (mp->is_ipv6) + { + /* not yet implemented */ + } + else + vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-bypass", + sw_if_index, mp->enable, 0, 0); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_BYPASS_REPLY); +} + +static void + vl_api_sw_interface_set_l2_xconnect_t_handler + (vl_api_sw_interface_set_l2_xconnect_t * mp) +{ + vl_api_sw_interface_set_l2_xconnect_reply_t *rmp; + int rv = 0; + u32 rx_sw_if_index = ntohl (mp->rx_sw_if_index); + u32 tx_sw_if_index = ntohl (mp->tx_sw_if_index); + vlib_main_t *vm = vlib_get_main (); + vnet_main_t *vnm = vnet_get_main (); + + VALIDATE_RX_SW_IF_INDEX (mp); + + if (mp->enable) + { + VALIDATE_TX_SW_IF_INDEX (mp); + rv = set_int_l2_mode (vm, vnm, MODE_L2_XC, + rx_sw_if_index, 0, 0, 0, tx_sw_if_index); + } + else + { + rv = set_int_l2_mode (vm, vnm, MODE_L3, rx_sw_if_index, 0, 0, 0, 0); + } + + BAD_RX_SW_IF_INDEX_LABEL; + BAD_TX_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_L2_XCONNECT_REPLY); +} + +static void + vl_api_sw_interface_set_l2_bridge_t_handler + (vl_api_sw_interface_set_l2_bridge_t * mp) +{ + bd_main_t *bdm = &bd_main; + vl_api_sw_interface_set_l2_bridge_reply_t *rmp; + int rv = 0; + u32 rx_sw_if_index = ntohl (mp->rx_sw_if_index); + u32 bd_id = ntohl (mp->bd_id); + u32 bd_index; + u32 bvi = mp->bvi; + u8 shg = mp->shg; + vlib_main_t *vm = vlib_get_main (); + vnet_main_t *vnm = vnet_get_main (); + + VALIDATE_RX_SW_IF_INDEX (mp); + + bd_index = bd_find_or_add_bd_index (bdm, bd_id); + + if (mp->enable) + { + //VALIDATE_TX_SW_IF_INDEX(mp); + rv = set_int_l2_mode (vm, vnm, MODE_L2_BRIDGE, + rx_sw_if_index, bd_index, bvi, shg, 0); + } + else + { + rv = set_int_l2_mode (vm, vnm, MODE_L3, rx_sw_if_index, 0, 0, 0, 0); + } + + BAD_RX_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_L2_BRIDGE_REPLY); +} + +static void + vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; + int rv = 0; + +#if DPDK > 0 + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + u32 pipe = ntohl (mp->pipe); + u32 profile = ntohl (mp->profile); + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); + + BAD_SW_IF_INDEX_LABEL; +#else + clib_warning ("setting HQoS pipe parameters without DPDK not implemented"); + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif /* DPDK */ + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); +} + +static void + vl_api_sw_interface_set_dpdk_hqos_subport_t_handler + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; + int rv = 0; + +#if DPDK > 0 + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + struct rte_sched_subport_params p; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + p.tb_rate = ntohl (mp->tb_rate); + p.tb_size = ntohl (mp->tb_size); + p.tc_rate[0] = ntohl (mp->tc_rate[0]); + p.tc_rate[1] = ntohl (mp->tc_rate[1]); + p.tc_rate[2] = ntohl (mp->tc_rate[2]); + p.tc_rate[3] = ntohl (mp->tc_rate[3]); + p.tc_period = ntohl (mp->tc_period); + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); + + BAD_SW_IF_INDEX_LABEL; +#else + clib_warning + ("setting HQoS subport parameters without DPDK not implemented"); + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif /* DPDK */ + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); +} + +static void + vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; + int rv = 0; + +#if DPDK > 0 + dpdk_main_t *dm = &dpdk_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 entry = ntohl (mp->entry); + u32 tc = ntohl (mp->tc); + u32 queue = ntohl (mp->queue); + u32 val, i; + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + { + clib_warning ("invalid traffic class !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + { + clib_warning ("invalid queue !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + + if (p == 0) + { + clib_warning ("worker thread registration AWOL !!"); + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto done; + } + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + for (i = 0; i < worker_thread_count; i++) + xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; + + BAD_SW_IF_INDEX_LABEL; +done: +#else + clib_warning ("setting HQoS DSCP table entry without DPDK not implemented"); + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif /* DPDK */ + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); +} + +static void +vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + bd_main_t *bdm = &bd_main; + vl_api_bridge_domain_add_del_reply_t *rmp; + int rv = 0; + u32 enable_flags = 0, disable_flags = 0; + u32 bd_id = ntohl (mp->bd_id); + u32 bd_index; + + if (mp->is_add) + { + bd_index = bd_find_or_add_bd_index (bdm, bd_id); + + if (mp->flood) + enable_flags |= L2_FLOOD; + else + disable_flags |= L2_FLOOD; + + if (mp->uu_flood) + enable_flags |= L2_UU_FLOOD; + else + disable_flags |= L2_UU_FLOOD; + + if (mp->forward) + enable_flags |= L2_FWD; + else + disable_flags |= L2_FWD; + + if (mp->arp_term) + enable_flags |= L2_ARP_TERM; + else + disable_flags |= L2_ARP_TERM; + + if (mp->learn) + enable_flags |= L2_LEARN; + else + disable_flags |= L2_LEARN; + + if (enable_flags) + bd_set_flags (vm, bd_index, enable_flags, 1 /* enable */ ); + + if (disable_flags) + bd_set_flags (vm, bd_index, disable_flags, 0 /* disable */ ); + + bd_set_mac_age (vm, bd_index, mp->mac_age); + } + else + rv = bd_delete_bd_index (bdm, bd_id); + + REPLY_MACRO (VL_API_BRIDGE_DOMAIN_ADD_DEL_REPLY); +} + +static void +vl_api_bridge_domain_details_t_handler (vl_api_bridge_domain_details_t * mp) +{ + clib_warning ("BUG"); +} + +static void + vl_api_bridge_domain_sw_if_details_t_handler + (vl_api_bridge_domain_sw_if_details_t * mp) +{ + clib_warning ("BUG"); +} + +static void +send_bridge_domain_details (unix_shared_memory_queue_t * q, + l2_bridge_domain_t * bd_config, + u32 n_sw_ifs, u32 context) +{ + vl_api_bridge_domain_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_BRIDGE_DOMAIN_DETAILS); + mp->bd_id = ntohl (bd_config->bd_id); + mp->flood = bd_feature_flood (bd_config); + mp->uu_flood = bd_feature_uu_flood (bd_config); + mp->forward = bd_feature_forward (bd_config); + mp->learn = bd_feature_learn (bd_config); + mp->arp_term = bd_feature_arp_term (bd_config); + mp->bvi_sw_if_index = ntohl (bd_config->bvi_sw_if_index); + mp->mac_age = bd_config->mac_age; + mp->n_sw_ifs = ntohl (n_sw_ifs); + mp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +send_bd_sw_if_details (l2input_main_t * l2im, + unix_shared_memory_queue_t * q, + l2_flood_member_t * member, u32 bd_id, u32 context) +{ + vl_api_bridge_domain_sw_if_details_t *mp; + l2_input_config_t *input_cfg; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_BRIDGE_DOMAIN_SW_IF_DETAILS); + mp->bd_id = ntohl (bd_id); + mp->sw_if_index = ntohl (member->sw_if_index); + input_cfg = vec_elt_at_index (l2im->configs, member->sw_if_index); + mp->shg = input_cfg->shg; + mp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_bridge_domain_dump_t_handler (vl_api_bridge_domain_dump_t * mp) +{ + bd_main_t *bdm = &bd_main; + l2input_main_t *l2im = &l2input_main; + unix_shared_memory_queue_t *q; + l2_bridge_domain_t *bd_config; + u32 bd_id, bd_index; + u32 end; + + q = vl_api_client_index_to_input_queue (mp->client_index); + + if (q == 0) + return; + + bd_id = ntohl (mp->bd_id); + + bd_index = (bd_id == ~0) ? 0 : bd_find_or_add_bd_index (bdm, bd_id); + end = (bd_id == ~0) ? vec_len (l2im->bd_configs) : bd_index + 1; + for (; bd_index < end; bd_index++) + { + bd_config = l2input_bd_config_from_index (l2im, bd_index); + /* skip dummy bd_id 0 */ + if (bd_config && (bd_config->bd_id > 0)) + { + u32 n_sw_ifs; + l2_flood_member_t *m; + + n_sw_ifs = vec_len (bd_config->members); + send_bridge_domain_details (q, bd_config, n_sw_ifs, mp->context); + + vec_foreach (m, bd_config->members) + { + send_bd_sw_if_details (l2im, q, m, bd_config->bd_id, mp->context); + } + } + } +} + +static void +vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp) +{ + bd_main_t *bdm = &bd_main; + l2input_main_t *l2im = &l2input_main; + vl_api_l2fib_add_del_reply_t *rmp; + int rv = 0; + u64 mac = 0; + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 bd_id = ntohl (mp->bd_id); + u32 bd_index; + u32 static_mac; + u32 filter_mac; + u32 bvi_mac; + uword *p; + + mac = mp->mac; + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (!p) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto bad_sw_if_index; + } + bd_index = p[0]; + + if (mp->is_add) + { + filter_mac = mp->filter_mac ? 1 : 0; + if (filter_mac == 0) + { + VALIDATE_SW_IF_INDEX (mp); + if (vec_len (l2im->configs) <= sw_if_index) + { + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + goto bad_sw_if_index; + } + else + { + l2_input_config_t *config; + config = vec_elt_at_index (l2im->configs, sw_if_index); + if (config->bridge == 0) + { + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + goto bad_sw_if_index; + } + } + } + static_mac = mp->static_mac ? 1 : 0; + bvi_mac = mp->bvi_mac ? 1 : 0; + l2fib_add_entry (mac, bd_index, sw_if_index, static_mac, filter_mac, + bvi_mac); + } + else + { + l2fib_del_entry (mac, bd_index); + } + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_L2FIB_ADD_DEL_REPLY); +} + +static void +vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp) +{ + vl_api_l2_flags_reply_t *rmp; + int rv = 0; + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 flags = ntohl (mp->feature_bitmap); + u32 rbm = 0; + + VALIDATE_SW_IF_INDEX (mp); + +#define _(a,b) \ + if (flags & L2INPUT_FEAT_ ## a) \ + rbm = l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_ ## a, mp->is_set); + foreach_l2input_feat; +#undef _ + + BAD_SW_IF_INDEX_LABEL; + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_L2_FLAGS_REPLY, + ({ + rmp->resulting_feature_bitmap = ntohl(rbm); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_bridge_flags_t_handler (vl_api_bridge_flags_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + bd_main_t *bdm = &bd_main; + vl_api_bridge_flags_reply_t *rmp; + int rv = 0; + u32 bd_id = ntohl (mp->bd_id); + u32 bd_index; + u32 flags = ntohl (mp->feature_bitmap); + uword *p; + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p == 0) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto out; + } + + bd_index = p[0]; + + bd_set_flags (vm, bd_index, flags, mp->is_set); + +out: + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_BRIDGE_FLAGS_REPLY, + ({ + rmp->resulting_feature_bitmap = ntohl(flags); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_bd_ip_mac_add_del_t_handler (vl_api_bd_ip_mac_add_del_t * mp) +{ + bd_main_t *bdm = &bd_main; + vl_api_bd_ip_mac_add_del_reply_t *rmp; + int rv = 0; + u32 bd_id = ntohl (mp->bd_id); + u32 bd_index; + uword *p; + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p == 0) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto out; + } + + bd_index = p[0]; + if (bd_add_del_ip_mac (bd_index, mp->ip_address, + mp->mac_address, mp->is_ipv6, mp->is_add)) + rv = VNET_API_ERROR_UNSPECIFIED; + +out: + REPLY_MACRO (VL_API_BD_IP_MAC_ADD_DEL_REPLY); +} + +static void +vl_api_create_vlan_subif_t_handler (vl_api_create_vlan_subif_t * mp) +{ + vl_api_create_vlan_subif_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + u32 hw_if_index, sw_if_index = (u32) ~ 0; + vnet_hw_interface_t *hi; + int rv = 0; + u32 id; + vnet_sw_interface_t template; + uword *p; + vnet_interface_main_t *im = &vnm->interface_main; + u64 sup_and_sub_key; + u64 *kp; + unix_shared_memory_queue_t *q; + clib_error_t *error; + + VALIDATE_SW_IF_INDEX (mp); + + hw_if_index = ntohl (mp->sw_if_index); + hi = vnet_get_hw_interface (vnm, hw_if_index); + + id = ntohl (mp->vlan_id); + if (id == 0 || id > 4095) + { + rv = VNET_API_ERROR_INVALID_VLAN; + goto out; + } + + sup_and_sub_key = ((u64) (hi->sw_if_index) << 32) | (u64) id; + + p = hash_get_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key); + if (p) + { + rv = VNET_API_ERROR_VLAN_ALREADY_EXISTS; + goto out; + } + + kp = clib_mem_alloc (sizeof (*kp)); + *kp = sup_and_sub_key; + + memset (&template, 0, sizeof (template)); + template.type = VNET_SW_INTERFACE_TYPE_SUB; + template.sup_sw_if_index = hi->sw_if_index; + template.sub.id = id; + template.sub.eth.raw_flags = 0; + template.sub.eth.flags.one_tag = 1; + template.sub.eth.outer_vlan_id = id; + template.sub.eth.flags.exact_match = 1; + + error = vnet_create_sw_interface (vnm, &template, &sw_if_index); + if (error) + { + clib_error_report (error); + rv = VNET_API_ERROR_INVALID_REGISTRATION; + goto out; + } + hash_set (hi->sub_interface_sw_if_index_by_id, id, sw_if_index); + hash_set_mem (im->sw_if_index_by_sup_and_sub, kp, sw_if_index); + + BAD_SW_IF_INDEX_LABEL; + +out: + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_CREATE_VLAN_SUBIF_REPLY); + rmp->context = mp->context; + rmp->retval = ntohl (rv); + rmp->sw_if_index = ntohl (sw_if_index); + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_create_subif_t_handler (vl_api_create_subif_t * mp) +{ + vl_api_create_subif_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + u32 sw_if_index = ~0; + int rv = 0; + u32 sub_id; + vnet_sw_interface_t *si; + vnet_hw_interface_t *hi; + vnet_sw_interface_t template; + uword *p; + vnet_interface_main_t *im = &vnm->interface_main; + u64 sup_and_sub_key; + u64 *kp; + clib_error_t *error; + + VALIDATE_SW_IF_INDEX (mp); + + si = vnet_get_sup_sw_interface (vnm, ntohl (mp->sw_if_index)); + hi = vnet_get_sup_hw_interface (vnm, ntohl (mp->sw_if_index)); + + if (hi->bond_info == VNET_HW_INTERFACE_BOND_INFO_SLAVE) + { + rv = VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED; + goto out; + } + + sw_if_index = si->sw_if_index; + sub_id = ntohl (mp->sub_id); + + sup_and_sub_key = ((u64) (sw_if_index) << 32) | (u64) sub_id; + + p = hash_get_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key); + if (p) + { + if (CLIB_DEBUG > 0) + clib_warning ("sup sw_if_index %d, sub id %d already exists\n", + sw_if_index, sub_id); + rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + goto out; + } + + kp = clib_mem_alloc (sizeof (*kp)); + *kp = sup_and_sub_key; + + memset (&template, 0, sizeof (template)); + template.type = VNET_SW_INTERFACE_TYPE_SUB; + template.sup_sw_if_index = sw_if_index; + template.sub.id = sub_id; + template.sub.eth.flags.no_tags = mp->no_tags; + template.sub.eth.flags.one_tag = mp->one_tag; + template.sub.eth.flags.two_tags = mp->two_tags; + template.sub.eth.flags.dot1ad = mp->dot1ad; + template.sub.eth.flags.exact_match = mp->exact_match; + template.sub.eth.flags.default_sub = mp->default_sub; + template.sub.eth.flags.outer_vlan_id_any = mp->outer_vlan_id_any; + template.sub.eth.flags.inner_vlan_id_any = mp->inner_vlan_id_any; + template.sub.eth.outer_vlan_id = ntohs (mp->outer_vlan_id); + template.sub.eth.inner_vlan_id = ntohs (mp->inner_vlan_id); + + error = vnet_create_sw_interface (vnm, &template, &sw_if_index); + if (error) + { + clib_error_report (error); + rv = VNET_API_ERROR_SUBIF_CREATE_FAILED; + goto out; + } + + hash_set (hi->sub_interface_sw_if_index_by_id, sub_id, sw_if_index); + hash_set_mem (im->sw_if_index_by_sup_and_sub, kp, sw_if_index); + + BAD_SW_IF_INDEX_LABEL; + +out: + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_CREATE_SUBIF_REPLY, + ({ + rmp->sw_if_index = ntohl(sw_if_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) +{ + vl_api_mpls_tunnel_add_del_reply_t *rmp; + int rv = 0; + stats_main_t *sm = &stats_main; + u32 tunnel_sw_if_index; + int ii; + + dslock (sm, 1 /* release hint */ , 5 /* tag */ ); + + if (mp->mt_is_add) + { + fib_route_path_t rpath, *rpaths = NULL; + mpls_label_t *label_stack = NULL; + + memset (&rpath, 0, sizeof (rpath)); + + if (mp->mt_next_hop_proto_is_ip4) + { + rpath.frp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&rpath.frp_addr.ip4, + mp->mt_next_hop, sizeof (rpath.frp_addr.ip4)); + } + else + { + rpath.frp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&rpath.frp_addr.ip6, + mp->mt_next_hop, sizeof (rpath.frp_addr.ip6)); + } + rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index); + + for (ii = 0; ii < mp->mt_next_hop_n_out_labels; ii++) + vec_add1 (label_stack, ntohl (mp->mt_next_hop_out_label_stack[ii])); + + vec_add1 (rpaths, rpath); + + vnet_mpls_tunnel_add (rpaths, label_stack, + mp->mt_l2_only, &tunnel_sw_if_index); + vec_free (rpaths); + vec_free (label_stack); + } + else + { + tunnel_sw_if_index = ntohl (mp->mt_sw_if_index); + vnet_mpls_tunnel_del (tunnel_sw_if_index); + } + + dsunlock (sm); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_MPLS_TUNNEL_ADD_DEL_REPLY, + ({ + rmp->sw_if_index = ntohl(tunnel_sw_if_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_proxy_arp_add_del_t_handler (vl_api_proxy_arp_add_del_t * mp) +{ + vl_api_proxy_arp_add_del_reply_t *rmp; + u32 fib_index; + int rv; + ip4_main_t *im = &ip4_main; + stats_main_t *sm = &stats_main; + int vnet_proxy_arp_add_del (ip4_address_t * lo_addr, + ip4_address_t * hi_addr, + u32 fib_index, int is_del); + uword *p; + + dslock (sm, 1 /* release hint */ , 6 /* tag */ ); + + p = hash_get (im->fib_index_by_table_id, ntohl (mp->vrf_id)); + + if (!p) + { + rv = VNET_API_ERROR_NO_SUCH_FIB; + goto out; + } + + fib_index = p[0]; + + rv = vnet_proxy_arp_add_del ((ip4_address_t *) mp->low_address, + (ip4_address_t *) mp->hi_address, + fib_index, mp->is_add == 0); + +out: + dsunlock (sm); + REPLY_MACRO (VL_API_PROXY_ARP_ADD_DEL_REPLY); +} + +static void + vl_api_proxy_arp_intfc_enable_disable_t_handler + (vl_api_proxy_arp_intfc_enable_disable_t * mp) +{ + int rv = 0; + vnet_main_t *vnm = vnet_get_main (); + vl_api_proxy_arp_intfc_enable_disable_reply_t *rmp; + vnet_sw_interface_t *si; + u32 sw_if_index; + + VALIDATE_SW_IF_INDEX (mp); + + sw_if_index = ntohl (mp->sw_if_index); + + if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + { + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + goto out; + } + + si = vnet_get_sw_interface (vnm, sw_if_index); + + ASSERT (si); + + if (mp->enable_disable) + si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP; + else + si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP; + + BAD_SW_IF_INDEX_LABEL; + +out: + REPLY_MACRO (VL_API_PROXY_ARP_INTFC_ENABLE_DISABLE_REPLY); +} + +static void +vl_api_is_address_reachable_t_handler (vl_api_is_address_reachable_t * mp) +{ +#if 0 + vpe_main_t *rm = &vpe_main; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + ip_lookup_main_t *lm; + union + { + ip4_address_t ip4; + ip6_address_t ip6; + } addr; + u32 adj_index, sw_if_index; + vl_api_is_address_reachable_t *rmp; + ip_adjacency_t *adj; + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + { + increment_missing_api_client_counter (rm->vlib_main); + return; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memcpy (rmp, mp, sizeof (*rmp)); + + sw_if_index = mp->next_hop_sw_if_index; + clib_memcpy (&addr, mp->address, sizeof (addr)); + if (mp->is_ipv6) + { + lm = &im6->lookup_main; + adj_index = ip6_fib_lookup (im6, sw_if_index, &addr.ip6); + } + else + { + lm = &im4->lookup_main; + // FIXME NOT an ADJ + adj_index = ip4_fib_lookup (im4, sw_if_index, &addr.ip4); + } + if (adj_index == ~0) + { + rmp->is_error = 1; + goto send; + } + adj = ip_get_adjacency (lm, adj_index); + + if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE + && adj->rewrite_header.sw_if_index == sw_if_index) + { + rmp->is_known = 1; + } + else + { + if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP + && adj->rewrite_header.sw_if_index == sw_if_index) + { + if (mp->is_ipv6) + ip6_probe_neighbor (rm->vlib_main, &addr.ip6, sw_if_index); + else + ip4_probe_neighbor (rm->vlib_main, &addr.ip4, sw_if_index); + } + else if (adj->lookup_next_index == IP_LOOKUP_NEXT_DROP) + { + rmp->is_known = 1; + goto send; + } + rmp->is_known = 0; + } + +send: + vl_msg_api_send_shmem (q, (u8 *) & rmp); +#endif +} + +static void + vl_api_sw_interface_set_mpls_enable_t_handler + (vl_api_sw_interface_set_mpls_enable_t * mp) +{ + vl_api_sw_interface_set_mpls_enable_reply_t *rmp; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + mpls_sw_interface_enable_disable (&mpls_main, + ntohl (mp->sw_if_index), mp->enable); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_SW_INTERFACE_SET_MPLS_ENABLE_REPLY); +} + +void +send_oam_event (oam_target_t * t) +{ + vpe_api_main_t *vam = &vpe_api_main; + unix_shared_memory_queue_t *q; + vpe_client_registration_t *reg; + vl_api_oam_event_t *mp; + + /* *INDENT-OFF* */ + pool_foreach(reg, vam->oam_events_registrations, + ({ + q = vl_api_client_index_to_input_queue (reg->client_index); + if (q) + { + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_OAM_EVENT); + clib_memcpy (mp->dst_address, &t->dst_address, + sizeof (mp->dst_address)); + mp->state = t->state; + vl_msg_api_send_shmem (q, (u8 *)&mp); + } + })); + /* *INDENT-ON* */ +} + +static void +vl_api_oam_add_del_t_handler (vl_api_oam_add_del_t * mp) +{ + vl_api_oam_add_del_reply_t *rmp; + int rv; + + rv = vpe_oam_add_del_target ((ip4_address_t *) mp->src_address, + (ip4_address_t *) mp->dst_address, + ntohl (mp->vrf_id), (int) (mp->is_add)); + + REPLY_MACRO (VL_API_OAM_ADD_DEL_REPLY); +} + +static void +vl_api_vnet_get_summary_stats_t_handler (vl_api_vnet_get_summary_stats_t * mp) +{ + stats_main_t *sm = &stats_main; + vnet_interface_main_t *im = sm->interface_main; + vl_api_vnet_summary_stats_reply_t *rmp; + vlib_combined_counter_main_t *cm; + vlib_counter_t v; + int i, which; + u64 total_pkts[VLIB_N_RX_TX]; + u64 total_bytes[VLIB_N_RX_TX]; + + unix_shared_memory_queue_t *q = + vl_api_client_index_to_input_queue (mp->client_index); + + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_VNET_SUMMARY_STATS_REPLY); + rmp->context = mp->context; + rmp->retval = 0; + + memset (total_pkts, 0, sizeof (total_pkts)); + memset (total_bytes, 0, sizeof (total_bytes)); + + vnet_interface_counter_lock (im); + + vec_foreach (cm, im->combined_sw_if_counters) + { + which = cm - im->combined_sw_if_counters; + + for (i = 0; i < vec_len (cm->maxi); i++) + { + vlib_get_combined_counter (cm, i, &v); + total_pkts[which] += v.packets; + total_bytes[which] += v.bytes; + } + } + vnet_interface_counter_unlock (im); + + rmp->total_pkts[VLIB_RX] = clib_host_to_net_u64 (total_pkts[VLIB_RX]); + rmp->total_bytes[VLIB_RX] = clib_host_to_net_u64 (total_bytes[VLIB_RX]); + rmp->total_pkts[VLIB_TX] = clib_host_to_net_u64 (total_pkts[VLIB_TX]); + rmp->total_bytes[VLIB_TX] = clib_host_to_net_u64 (total_bytes[VLIB_TX]); + rmp->vector_rate = + clib_host_to_net_u64 (vlib_last_vector_length_per_node (sm->vlib_main)); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + ip4_address_t address; + u32 address_length: 6; + u32 index:26; +}) ip4_route_t; +/* *INDENT-ON* */ + +static int +ip4_reset_fib_t_handler (vl_api_reset_fib_t * mp) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + ip4_main_t *im4 = &ip4_main; + static u32 *sw_if_indices_to_shut; + stats_main_t *sm = &stats_main; + fib_table_t *fib_table; + ip4_fib_t *fib; + u32 sw_if_index; + int i; + int rv = VNET_API_ERROR_NO_SUCH_FIB; + u32 target_fib_id = ntohl (mp->vrf_id); + + dslock (sm, 1 /* release hint */ , 8 /* tag */ ); + + /* *INDENT-OFF* */ + pool_foreach (fib_table, im4->fibs, + ({ + fib = &fib_table->v4; + vnet_sw_interface_t * si; + + if (fib->table_id != target_fib_id) + continue; + + /* remove any mpls encap/decap labels */ + mpls_fib_reset_labels (fib->table_id); + + /* remove any proxy arps in this fib */ + vnet_proxy_arp_fib_reset (fib->table_id); + + /* Set the flow hash for this fib to the default */ + vnet_set_ip4_flow_hash (fib->table_id, IP_FLOW_HASH_DEFAULT); + + vec_reset_length (sw_if_indices_to_shut); + + /* Shut down interfaces in this FIB / clean out intfc routes */ + pool_foreach (si, im->sw_interfaces, + ({ + u32 sw_if_index = si->sw_if_index; + + if (sw_if_index < vec_len (im4->fib_index_by_sw_if_index) + && (im4->fib_index_by_sw_if_index[si->sw_if_index] == + fib->index)) + vec_add1 (sw_if_indices_to_shut, si->sw_if_index); + })); + + for (i = 0; i < vec_len (sw_if_indices_to_shut); i++) { + sw_if_index = sw_if_indices_to_shut[i]; + // vec_foreach (sw_if_index, sw_if_indices_to_shut) { + + u32 flags = vnet_sw_interface_get_flags (vnm, sw_if_index); + flags &= ~(VNET_SW_INTERFACE_FLAG_ADMIN_UP); + vnet_sw_interface_set_flags (vnm, sw_if_index, flags); + } + + fib_table_flush(fib->index, FIB_PROTOCOL_IP4, FIB_SOURCE_API); + fib_table_flush(fib->index, FIB_PROTOCOL_IP4, FIB_SOURCE_INTERFACE); + + rv = 0; + break; + })); /* pool_foreach (fib) */ + /* *INDENT-ON* */ + + dsunlock (sm); + return rv; +} + +static int +ip6_reset_fib_t_handler (vl_api_reset_fib_t * mp) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + ip6_main_t *im6 = &ip6_main; + stats_main_t *sm = &stats_main; + static u32 *sw_if_indices_to_shut; + fib_table_t *fib_table; + ip6_fib_t *fib; + u32 sw_if_index; + int i; + int rv = VNET_API_ERROR_NO_SUCH_FIB; + u32 target_fib_id = ntohl (mp->vrf_id); + + dslock (sm, 1 /* release hint */ , 9 /* tag */ ); + + /* *INDENT-OFF* */ + pool_foreach (fib_table, im6->fibs, + ({ + vnet_sw_interface_t * si; + fib = &(fib_table->v6); + + if (fib->table_id != target_fib_id) + continue; + + vec_reset_length (sw_if_indices_to_shut); + + /* Shut down interfaces in this FIB / clean out intfc routes */ + pool_foreach (si, im->sw_interfaces, + ({ + if (im6->fib_index_by_sw_if_index[si->sw_if_index] == + fib->index) + vec_add1 (sw_if_indices_to_shut, si->sw_if_index); + })); + + for (i = 0; i < vec_len (sw_if_indices_to_shut); i++) { + sw_if_index = sw_if_indices_to_shut[i]; + // vec_foreach (sw_if_index, sw_if_indices_to_shut) { + + u32 flags = vnet_sw_interface_get_flags (vnm, sw_if_index); + flags &= ~(VNET_SW_INTERFACE_FLAG_ADMIN_UP); + vnet_sw_interface_set_flags (vnm, sw_if_index, flags); + } + + fib_table_flush(fib->index, FIB_PROTOCOL_IP6, FIB_SOURCE_API); + fib_table_flush(fib->index, FIB_PROTOCOL_IP6, FIB_SOURCE_INTERFACE); + + rv = 0; + break; + })); /* pool_foreach (fib) */ + /* *INDENT-ON* */ + + dsunlock (sm); + return rv; +} + +static void +vl_api_reset_fib_t_handler (vl_api_reset_fib_t * mp) +{ + int rv; + vl_api_reset_fib_reply_t *rmp; + + if (mp->is_ipv6) + rv = ip6_reset_fib_t_handler (mp); + else + rv = ip4_reset_fib_t_handler (mp); + + REPLY_MACRO (VL_API_RESET_FIB_REPLY); +} + + +static void +dhcpv4_proxy_config (vl_api_dhcp_proxy_config_t * mp) +{ + vl_api_dhcp_proxy_config_reply_t *rmp; + int rv; + + rv = dhcp_proxy_set_server ((ip4_address_t *) (&mp->dhcp_server), + (ip4_address_t *) (&mp->dhcp_src_address), + (u32) ntohl (mp->vrf_id), + (int) mp->insert_circuit_id, + (int) (mp->is_add == 0)); + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); +} + + +static void +dhcpv6_proxy_config (vl_api_dhcp_proxy_config_t * mp) +{ + vl_api_dhcp_proxy_config_reply_t *rmp; + int rv = -1; + + rv = dhcpv6_proxy_set_server ((ip6_address_t *) (&mp->dhcp_server), + (ip6_address_t *) (&mp->dhcp_src_address), + (u32) ntohl (mp->vrf_id), + (int) mp->insert_circuit_id, + (int) (mp->is_add == 0)); + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); +} + +static void +dhcpv4_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp) +{ + vl_api_dhcp_proxy_config_reply_t *rmp; + int rv; + + rv = dhcp_proxy_set_server_2 ((ip4_address_t *) (&mp->dhcp_server), + (ip4_address_t *) (&mp->dhcp_src_address), + (u32) ntohl (mp->rx_vrf_id), + (u32) ntohl (mp->server_vrf_id), + (int) mp->insert_circuit_id, + (int) (mp->is_add == 0)); + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY); +} + + +static void +dhcpv6_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp) +{ + vl_api_dhcp_proxy_config_reply_t *rmp; + int rv = -1; + + rv = dhcpv6_proxy_set_server_2 ((ip6_address_t *) (&mp->dhcp_server), + (ip6_address_t *) (&mp->dhcp_src_address), + (u32) ntohl (mp->rx_vrf_id), + (u32) ntohl (mp->server_vrf_id), + (int) mp->insert_circuit_id, + (int) (mp->is_add == 0)); + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY); +} + + +static void +vl_api_dhcp_proxy_set_vss_t_handler (vl_api_dhcp_proxy_set_vss_t * mp) +{ + vl_api_dhcp_proxy_set_vss_reply_t *rmp; + int rv; + if (!mp->is_ipv6) + rv = dhcp_proxy_set_option82_vss (ntohl (mp->tbl_id), + ntohl (mp->oui), + ntohl (mp->fib_id), + (int) mp->is_add == 0); + else + rv = dhcpv6_proxy_set_vss (ntohl (mp->tbl_id), + ntohl (mp->oui), + ntohl (mp->fib_id), (int) mp->is_add == 0); + + REPLY_MACRO (VL_API_DHCP_PROXY_SET_VSS_REPLY); +} + + +static void vl_api_dhcp_proxy_config_t_handler + (vl_api_dhcp_proxy_config_t * mp) +{ + if (mp->is_ipv6 == 0) + dhcpv4_proxy_config (mp); + else + dhcpv6_proxy_config (mp); +} + +static void vl_api_dhcp_proxy_config_2_t_handler + (vl_api_dhcp_proxy_config_2_t * mp) +{ + if (mp->is_ipv6 == 0) + dhcpv4_proxy_config_2 (mp); + else + dhcpv6_proxy_config_2 (mp); +} + +void +dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, + u8 is_ipv6, u8 * host_address, u8 * router_address, + u8 * host_mac) +{ + unix_shared_memory_queue_t *q; + vl_api_dhcp_compl_event_t *mp; + + q = vl_api_client_index_to_input_queue (client_index); + if (!q) + return; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->client_index = client_index; + mp->pid = pid; + mp->is_ipv6 = is_ipv6; + clib_memcpy (&mp->hostname, hostname, vec_len (hostname)); + mp->hostname[vec_len (hostname) + 1] = '\n'; + clib_memcpy (&mp->host_address[0], host_address, 16); + clib_memcpy (&mp->router_address[0], router_address, 16); + + if (NULL != host_mac) + clib_memcpy (&mp->host_mac[0], host_mac, 6); + + mp->_vl_msg_id = ntohs (VL_API_DHCP_COMPL_EVENT); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void vl_api_dhcp_client_config_t_handler + (vl_api_dhcp_client_config_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_dhcp_client_config_reply_t *rmp; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + rv = dhcp_client_config (vm, ntohl (mp->sw_if_index), + mp->hostname, mp->is_add, mp->client_index, + mp->want_dhcp_event ? dhcp_compl_event_callback : + NULL, mp->pid); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_DHCP_CLIENT_CONFIG_REPLY); +} + +static void +vl_api_create_loopback_t_handler (vl_api_create_loopback_t * mp) +{ + vl_api_create_loopback_reply_t *rmp; + u32 sw_if_index; + int rv; + + rv = vnet_create_loopback_interface (&sw_if_index, mp->mac_address); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_CREATE_LOOPBACK_REPLY, + ({ + rmp->sw_if_index = ntohl (sw_if_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_delete_loopback_t_handler (vl_api_delete_loopback_t * mp) +{ + vl_api_delete_loopback_reply_t *rmp; + u32 sw_if_index; + int rv; + + sw_if_index = ntohl (mp->sw_if_index); + rv = vnet_delete_loopback_interface (sw_if_index); + + REPLY_MACRO (VL_API_DELETE_LOOPBACK_REPLY); +} + +static void +vl_api_control_ping_t_handler (vl_api_control_ping_t * mp) +{ + vl_api_control_ping_reply_t *rmp; + int rv = 0; + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_CONTROL_PING_REPLY, + ({ + rmp->vpe_pid = ntohl (getpid()); + })); + /* *INDENT-ON* */ +} + +static void +shmem_cli_output (uword arg, u8 * buffer, uword buffer_bytes) +{ + u8 **shmem_vecp = (u8 **) arg; + u8 *shmem_vec; + void *oldheap; + api_main_t *am = &api_main; + u32 offset; + + shmem_vec = *shmem_vecp; + + offset = vec_len (shmem_vec); + + pthread_mutex_lock (&am->vlib_rp->mutex); + oldheap = svm_push_data_heap (am->vlib_rp); + + vec_validate (shmem_vec, offset + buffer_bytes - 1); + + clib_memcpy (shmem_vec + offset, buffer, buffer_bytes); + + svm_pop_heap (oldheap); + pthread_mutex_unlock (&am->vlib_rp->mutex); + + *shmem_vecp = shmem_vec; +} + + +static void +vl_api_cli_request_t_handler (vl_api_cli_request_t * mp) +{ + vl_api_cli_reply_t *rp; + unix_shared_memory_queue_t *q; + vlib_main_t *vm = vlib_get_main (); + api_main_t *am = &api_main; + unformat_input_t input; + u8 *shmem_vec = 0; + void *oldheap; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rp = vl_msg_api_alloc (sizeof (*rp)); + rp->_vl_msg_id = ntohs (VL_API_CLI_REPLY); + rp->context = mp->context; + + unformat_init_vector (&input, (u8 *) (uword) mp->cmd_in_shmem); + + vlib_cli_input (vm, &input, shmem_cli_output, (uword) & shmem_vec); + + pthread_mutex_lock (&am->vlib_rp->mutex); + oldheap = svm_push_data_heap (am->vlib_rp); + + vec_add1 (shmem_vec, 0); + + svm_pop_heap (oldheap); + pthread_mutex_unlock (&am->vlib_rp->mutex); + + rp->reply_in_shmem = (uword) shmem_vec; + + vl_msg_api_send_shmem (q, (u8 *) & rp); +} + +static void +inband_cli_output (uword arg, u8 * buffer, uword buffer_bytes) +{ + u8 **mem_vecp = (u8 **) arg; + u8 *mem_vec = *mem_vecp; + u32 offset = vec_len (mem_vec); + + vec_validate (mem_vec, offset + buffer_bytes - 1); + clib_memcpy (mem_vec + offset, buffer, buffer_bytes); + *mem_vecp = mem_vec; +} + +static void +vl_api_cli_inband_t_handler (vl_api_cli_inband_t * mp) +{ + vl_api_cli_inband_reply_t *rmp; + int rv = 0; + unix_shared_memory_queue_t *q; + vlib_main_t *vm = vlib_get_main (); + unformat_input_t input; + u8 *out_vec = 0; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + unformat_init_string (&input, (char *) mp->cmd, ntohl (mp->length)); + vlib_cli_input (vm, &input, inband_cli_output, (uword) & out_vec); + + u32 len = vec_len (out_vec); + /* *INDENT-OFF* */ + REPLY_MACRO3(VL_API_CLI_INBAND_REPLY, len, + ({ + rmp->length = htonl (len); + clib_memcpy (rmp->reply, out_vec, len); + })); + /* *INDENT-ON* */ + vec_free (out_vec); +} + +static void +vl_api_set_arp_neighbor_limit_t_handler (vl_api_set_arp_neighbor_limit_t * mp) +{ + int rv; + vl_api_set_arp_neighbor_limit_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error; + + vnm->api_errno = 0; + + if (mp->is_ipv6) + error = ip6_set_neighbor_limit (ntohl (mp->arp_neighbor_limit)); + else + error = ip4_set_arp_limit (ntohl (mp->arp_neighbor_limit)); + + if (error) + { + clib_error_report (error); + rv = VNET_API_ERROR_UNSPECIFIED; + } + else + { + rv = vnm->api_errno; + } + + REPLY_MACRO (VL_API_SET_ARP_NEIGHBOR_LIMIT_REPLY); +} + +static void vl_api_sr_tunnel_add_del_t_handler + (vl_api_sr_tunnel_add_del_t * mp) +{ +#if IP6SR == 0 + clib_warning ("unimplemented"); +#else + ip6_sr_add_del_tunnel_args_t _a, *a = &_a; + int rv = 0; + vl_api_sr_tunnel_add_del_reply_t *rmp; + ip6_address_t *segments = 0, *seg; + ip6_address_t *tags = 0, *tag; + ip6_address_t *this_address; + int i; + + if (mp->n_segments == 0) + { + rv = -11; + goto out; + } + + memset (a, 0, sizeof (*a)); + a->src_address = (ip6_address_t *) & mp->src_address; + a->dst_address = (ip6_address_t *) & mp->dst_address; + a->dst_mask_width = mp->dst_mask_width; + a->flags_net_byte_order = mp->flags_net_byte_order; + a->is_del = (mp->is_add == 0); + a->rx_table_id = ntohl (mp->outer_vrf_id); + a->tx_table_id = ntohl (mp->inner_vrf_id); + + a->name = format (0, "%s", mp->name); + if (!(vec_len (a->name))) + a->name = 0; + + a->policy_name = format (0, "%s", mp->policy_name); + if (!(vec_len (a->policy_name))) + a->policy_name = 0; + + /* Yank segments and tags out of the API message */ + this_address = (ip6_address_t *) mp->segs_and_tags; + for (i = 0; i < mp->n_segments; i++) + { + vec_add2 (segments, seg, 1); + clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); + this_address++; + } + for (i = 0; i < mp->n_tags; i++) + { + vec_add2 (tags, tag, 1); + clib_memcpy (tag->as_u8, this_address->as_u8, sizeof (*this_address)); + this_address++; + } + + a->segments = segments; + a->tags = tags; + + rv = ip6_sr_add_del_tunnel (a); + +out: + + REPLY_MACRO (VL_API_SR_TUNNEL_ADD_DEL_REPLY); +#endif +} + +static void vl_api_sr_policy_add_del_t_handler + (vl_api_sr_policy_add_del_t * mp) +{ +#if IP6SR == 0 + clib_warning ("unimplemented"); +#else + ip6_sr_add_del_policy_args_t _a, *a = &_a; + int rv = 0; + vl_api_sr_policy_add_del_reply_t *rmp; + int i; + + memset (a, 0, sizeof (*a)); + a->is_del = (mp->is_add == 0); + + a->name = format (0, "%s", mp->name); + if (!(vec_len (a->name))) + { + rv = VNET_API_ERROR_NO_SUCH_NODE2; + goto out; + } + + if (!(mp->tunnel_names[0])) + { + rv = VNET_API_ERROR_NO_SUCH_NODE2; + goto out; + } + + // start deserializing tunnel_names + int num_tunnels = mp->tunnel_names[0]; //number of tunnels + u8 *deser_tun_names = mp->tunnel_names; + deser_tun_names += 1; //moving along + + u8 *tun_name = 0; + int tun_name_len = 0; + + for (i = 0; i < num_tunnels; i++) + { + tun_name_len = *deser_tun_names; + deser_tun_names += 1; + vec_resize (tun_name, tun_name_len); + memcpy (tun_name, deser_tun_names, tun_name_len); + vec_add1 (a->tunnel_names, tun_name); + deser_tun_names += tun_name_len; + tun_name = 0; + } + + rv = ip6_sr_add_del_policy (a); + +out: + + REPLY_MACRO (VL_API_SR_POLICY_ADD_DEL_REPLY); +#endif +} + +static void vl_api_sr_multicast_map_add_del_t_handler + (vl_api_sr_multicast_map_add_del_t * mp) +{ +#if IP6SR == 0 + clib_warning ("unimplemented"); +#else + ip6_sr_add_del_multicastmap_args_t _a, *a = &_a; + int rv = 0; + vl_api_sr_multicast_map_add_del_reply_t *rmp; + + memset (a, 0, sizeof (*a)); + a->is_del = (mp->is_add == 0); + + a->multicast_address = (ip6_address_t *) & mp->multicast_address; + a->policy_name = format (0, "%s", mp->policy_name); + + if (a->multicast_address == 0) + { + rv = -1; + goto out; + } + + if (!(a->policy_name)) + { + rv = -2; + goto out; + } + +#if DPDK > 0 /* Cannot call replicate without DPDK */ + rv = ip6_sr_add_del_multicastmap (a); +#else + clib_warning ("multicast replication without DPDK not implemented"); + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif /* DPDK */ + +out: + + REPLY_MACRO (VL_API_SR_MULTICAST_MAP_ADD_DEL_REPLY); +#endif +} + +#define foreach_classify_add_del_table_field \ +_(table_index) \ +_(nbuckets) \ +_(memory_size) \ +_(skip_n_vectors) \ +_(match_n_vectors) \ +_(next_table_index) \ +_(miss_next_index) \ +_(current_data_flag) \ +_(current_data_offset) + +static void vl_api_classify_add_del_table_t_handler + (vl_api_classify_add_del_table_t * mp) +{ + vl_api_classify_add_del_table_reply_t *rmp; + vnet_classify_main_t *cm = &vnet_classify_main; + vnet_classify_table_t *t; + int rv; + +#define _(a) u32 a; + foreach_classify_add_del_table_field; +#undef _ + +#define _(a) a = ntohl(mp->a); + foreach_classify_add_del_table_field; +#undef _ + + /* The underlying API fails silently, on purpose, so check here */ + if (mp->is_add == 0) /* delete */ + { + if (pool_is_free_index (cm->tables, table_index)) + { + rv = VNET_API_ERROR_NO_SUCH_TABLE; + goto out; + } + } + else /* add or update */ + { + if (table_index != ~0 && pool_is_free_index (cm->tables, table_index)) + table_index = ~0; + } + + rv = vnet_classify_add_del_table + (cm, mp->mask, nbuckets, memory_size, + skip_n_vectors, match_n_vectors, + next_table_index, miss_next_index, &table_index, + current_data_flag, current_data_offset, mp->is_add, mp->del_chain); + +out: + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_CLASSIFY_ADD_DEL_TABLE_REPLY, + ({ + if (rv == 0 && mp->is_add) + { + t = pool_elt_at_index (cm->tables, table_index); + rmp->skip_n_vectors = ntohl(t->skip_n_vectors); + rmp->match_n_vectors = ntohl(t->match_n_vectors); + rmp->new_table_index = ntohl(table_index); + } + else + { + rmp->skip_n_vectors = ~0; + rmp->match_n_vectors = ~0; + rmp->new_table_index = ~0; + } + })); + /* *INDENT-ON* */ +} + +static void vl_api_classify_add_del_session_t_handler + (vl_api_classify_add_del_session_t * mp) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + vl_api_classify_add_del_session_reply_t *rmp; + int rv; + u32 table_index, hit_next_index, opaque_index, metadata; + i32 advance; + u8 action; + + table_index = ntohl (mp->table_index); + hit_next_index = ntohl (mp->hit_next_index); + opaque_index = ntohl (mp->opaque_index); + advance = ntohl (mp->advance); + action = mp->action; + metadata = ntohl (mp->metadata); + + rv = vnet_classify_add_del_session + (cm, table_index, mp->match, hit_next_index, opaque_index, + advance, action, metadata, mp->is_add); + + REPLY_MACRO (VL_API_CLASSIFY_ADD_DEL_SESSION_REPLY); +} + +static void vl_api_classify_set_interface_ip_table_t_handler + (vl_api_classify_set_interface_ip_table_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_classify_set_interface_ip_table_reply_t *rmp; + int rv; + u32 table_index, sw_if_index; + + table_index = ntohl (mp->table_index); + sw_if_index = ntohl (mp->sw_if_index); + + VALIDATE_SW_IF_INDEX (mp); + + if (mp->is_ipv6) + rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index); + else + rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_CLASSIFY_SET_INTERFACE_IP_TABLE_REPLY); +} + +static void vl_api_classify_set_interface_l2_tables_t_handler + (vl_api_classify_set_interface_l2_tables_t * mp) +{ + vl_api_classify_set_interface_l2_tables_reply_t *rmp; + int rv; + u32 sw_if_index, ip4_table_index, ip6_table_index, other_table_index; + int enable; + + ip4_table_index = ntohl (mp->ip4_table_index); + ip6_table_index = ntohl (mp->ip6_table_index); + other_table_index = ntohl (mp->other_table_index); + sw_if_index = ntohl (mp->sw_if_index); + + VALIDATE_SW_IF_INDEX (mp); + + if (mp->is_input) + rv = vnet_l2_input_classify_set_tables (sw_if_index, ip4_table_index, + ip6_table_index, + other_table_index); + else + rv = vnet_l2_output_classify_set_tables (sw_if_index, ip4_table_index, + ip6_table_index, + other_table_index); + + if (rv == 0) + { + if (ip4_table_index != ~0 || ip6_table_index != ~0 + || other_table_index != ~0) + enable = 1; + else + enable = 0; + + if (mp->is_input) + vnet_l2_input_classify_enable_disable (sw_if_index, enable); + else + vnet_l2_output_classify_enable_disable (sw_if_index, enable); + } + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_CLASSIFY_SET_INTERFACE_L2_TABLES_REPLY); +} + +static void +vl_api_l2_fib_clear_table_t_handler (vl_api_l2_fib_clear_table_t * mp) +{ + int rv = 0; + vl_api_l2_fib_clear_table_reply_t *rmp; + + /* DAW-FIXME: This API should only clear non-static l2fib entries, but + * that is not currently implemented. When that TODO is fixed + * this call should be changed to pass 1 instead of 0. + */ + l2fib_clear_table (0); + + REPLY_MACRO (VL_API_L2_FIB_CLEAR_TABLE_REPLY); +} + +extern void l2_efp_filter_configure (vnet_main_t * vnet_main, + u32 sw_if_index, u32 enable); + +static void +vl_api_l2_interface_efp_filter_t_handler (vl_api_l2_interface_efp_filter_t * + mp) +{ + int rv; + vl_api_l2_interface_efp_filter_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + + // enable/disable the feature + l2_efp_filter_configure (vnm, mp->sw_if_index, mp->enable_disable); + rv = vnm->api_errno; + + REPLY_MACRO (VL_API_L2_INTERFACE_EFP_FILTER_REPLY); +} + +static void + vl_api_l2_interface_vlan_tag_rewrite_t_handler + (vl_api_l2_interface_vlan_tag_rewrite_t * mp) +{ + int rv = 0; + vl_api_l2_interface_vlan_tag_rewrite_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + u32 vtr_op; + + VALIDATE_SW_IF_INDEX (mp); + + vtr_op = ntohl (mp->vtr_op); + + /* The L2 code is unsuspicious */ + switch (vtr_op) + { + case L2_VTR_DISABLED: + case L2_VTR_PUSH_1: + case L2_VTR_PUSH_2: + case L2_VTR_POP_1: + case L2_VTR_POP_2: + case L2_VTR_TRANSLATE_1_1: + case L2_VTR_TRANSLATE_1_2: + case L2_VTR_TRANSLATE_2_1: + case L2_VTR_TRANSLATE_2_2: + break; + + default: + rv = VNET_API_ERROR_INVALID_VALUE; + goto bad_sw_if_index; + } + + rv = l2vtr_configure (vm, vnm, ntohl (mp->sw_if_index), vtr_op, + ntohl (mp->push_dot1q), ntohl (mp->tag1), + ntohl (mp->tag2)); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_L2_INTERFACE_VLAN_TAG_REWRITE_REPLY); +} + +static void +vl_api_l2_fib_table_entry_t_handler (vl_api_l2_fib_table_entry_t * mp) +{ + clib_warning ("BUG"); +} + +static void +send_l2fib_table_entry (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + l2fib_entry_key_t * l2fe_key, + l2fib_entry_result_t * l2fe_res, u32 context) +{ + vl_api_l2_fib_table_entry_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_L2_FIB_TABLE_ENTRY); + + mp->bd_id = + ntohl (l2input_main.bd_configs[l2fe_key->fields.bd_index].bd_id); + + mp->mac = l2fib_make_key (l2fe_key->fields.mac, 0); + mp->sw_if_index = ntohl (l2fe_res->fields.sw_if_index); + mp->static_mac = l2fe_res->fields.static_mac; + mp->filter_mac = l2fe_res->fields.filter; + mp->bvi_mac = l2fe_res->fields.bvi; + mp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_l2_fib_table_dump_t_handler (vl_api_l2_fib_table_dump_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + bd_main_t *bdm = &bd_main; + l2fib_entry_key_t *l2fe_key = NULL; + l2fib_entry_result_t *l2fe_res = NULL; + u32 ni, bd_id = ntohl (mp->bd_id); + u32 bd_index; + unix_shared_memory_queue_t *q; + uword *p; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* see l2fib_table_dump: ~0 means "any" */ + if (bd_id == ~0) + bd_index = ~0; + else + { + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p == 0) + return; + + bd_index = p[0]; + } + + l2fib_table_dump (bd_index, &l2fe_key, &l2fe_res); + + vec_foreach_index (ni, l2fe_key) + { + send_l2fib_table_entry (am, q, vec_elt_at_index (l2fe_key, ni), + vec_elt_at_index (l2fe_res, ni), mp->context); + } + vec_free (l2fe_key); + vec_free (l2fe_res); +} + +static void +vl_api_show_version_t_handler (vl_api_show_version_t * mp) +{ + vl_api_show_version_reply_t *rmp; + int rv = 0; + char *vpe_api_get_build_directory (void); + char *vpe_api_get_version (void); + char *vpe_api_get_build_date (void); + + unix_shared_memory_queue_t *q = + vl_api_client_index_to_input_queue (mp->client_index); + + if (!q) + return; + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_SHOW_VERSION_REPLY, + ({ + strncpy ((char *) rmp->program, "vpe", ARRAY_LEN(rmp->program)-1); + strncpy ((char *) rmp->build_directory, vpe_api_get_build_directory(), + ARRAY_LEN(rmp->build_directory)-1); + strncpy ((char *) rmp->version, vpe_api_get_version(), + ARRAY_LEN(rmp->version)-1); + strncpy ((char *) rmp->build_date, vpe_api_get_build_date(), + ARRAY_LEN(rmp->build_date)-1); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_get_node_index_t_handler (vl_api_get_node_index_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_get_node_index_reply_t *rmp; + vlib_node_t *n; + int rv = 0; + u32 node_index = ~0; + + n = vlib_get_node_by_name (vm, mp->node_name); + + if (n == 0) + rv = VNET_API_ERROR_NO_SUCH_NODE; + else + node_index = n->index; + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_GET_NODE_INDEX_REPLY, + ({ + rmp->node_index = ntohl(node_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_get_next_index_t_handler (vl_api_get_next_index_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_get_next_index_reply_t *rmp; + vlib_node_t *node, *next_node; + int rv = 0; + u32 next_node_index = ~0, next_index = ~0; + uword *p; + + node = vlib_get_node_by_name (vm, mp->node_name); + + if (node == 0) + { + rv = VNET_API_ERROR_NO_SUCH_NODE; + goto out; + } + + next_node = vlib_get_node_by_name (vm, mp->next_name); + + if (next_node == 0) + { + rv = VNET_API_ERROR_NO_SUCH_NODE2; + goto out; + } + else + next_node_index = next_node->index; + + p = hash_get (node->next_slot_by_node, next_node_index); + + if (p == 0) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto out; + } + else + next_index = p[0]; + +out: + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_GET_NEXT_INDEX_REPLY, + ({ + rmp->next_index = ntohl(next_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_add_node_next_t_handler (vl_api_add_node_next_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_add_node_next_reply_t *rmp; + vlib_node_t *n, *next; + int rv = 0; + u32 next_index = ~0; + + n = vlib_get_node_by_name (vm, mp->node_name); + + if (n == 0) + { + rv = VNET_API_ERROR_NO_SUCH_NODE; + goto out; + } + + next = vlib_get_node_by_name (vm, mp->next_name); + + if (next == 0) + rv = VNET_API_ERROR_NO_SUCH_NODE2; + else + next_index = vlib_node_add_next (vm, n->index, next->index); + +out: + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_GET_NODE_INDEX_REPLY, + ({ + rmp->next_index = ntohl(next_index); + })); + /* *INDENT-ON* */ +} + +static void vl_api_vxlan_add_del_tunnel_t_handler + (vl_api_vxlan_add_del_tunnel_t * mp) +{ + vl_api_vxlan_add_del_tunnel_reply_t *rmp; + int rv = 0; + vnet_vxlan_add_del_tunnel_args_t _a, *a = &_a; + u32 encap_fib_index; + uword *p; + ip4_main_t *im = &ip4_main; + vnet_main_t *vnm = vnet_get_main (); + u32 sw_if_index = ~0; + + p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id)); + if (!p) + { + rv = VNET_API_ERROR_NO_SUCH_FIB; + goto out; + } + encap_fib_index = p[0]; + memset (a, 0, sizeof (*a)); + + a->is_add = mp->is_add; + a->is_ip6 = mp->is_ipv6; + + /* ip addresses sent in network byte order */ + ip46_from_addr_buf (mp->is_ipv6, mp->dst_address, &a->dst); + ip46_from_addr_buf (mp->is_ipv6, mp->src_address, &a->src); + + /* Check src & dst are different */ + if (ip46_address_cmp (&a->dst, &a->src) == 0) + { + rv = VNET_API_ERROR_SAME_SRC_DST; + goto out; + } + a->mcast_sw_if_index = ntohl (mp->mcast_sw_if_index); + if (ip46_address_is_multicast (&a->dst) && + pool_is_free_index (vnm->interface_main.sw_interfaces, + a->mcast_sw_if_index)) + { + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + goto out; + } + a->encap_fib_index = encap_fib_index; + a->decap_next_index = ntohl (mp->decap_next_index); + a->vni = ntohl (mp->vni); + rv = vnet_vxlan_add_del_tunnel (a, &sw_if_index); + +out: + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_VXLAN_ADD_DEL_TUNNEL_REPLY, + ({ + rmp->sw_if_index = ntohl (sw_if_index); + })); + /* *INDENT-ON* */ +} + +static void send_vxlan_tunnel_details + (vxlan_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_vxlan_tunnel_details_t *rmp; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + u8 is_ipv6 = !ip46_address_is_ip4 (&t->dst); + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_VXLAN_TUNNEL_DETAILS); + if (is_ipv6) + { + memcpy (rmp->src_address, t->src.ip6.as_u8, 16); + memcpy (rmp->dst_address, t->dst.ip6.as_u8, 16); + rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id); + } + else + { + memcpy (rmp->src_address, t->src.ip4.as_u8, 4); + memcpy (rmp->dst_address, t->dst.ip4.as_u8, 4); + rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id); + } + rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index); + rmp->vni = htonl (t->vni); + rmp->decap_next_index = htonl (t->decap_next_index); + rmp->sw_if_index = htonl (t->sw_if_index); + rmp->is_ipv6 = is_ipv6; + rmp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void vl_api_vxlan_tunnel_dump_t_handler + (vl_api_vxlan_tunnel_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + vxlan_main_t *vxm = &vxlan_main; + vxlan_tunnel_t *t; + u32 sw_if_index; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + sw_if_index = ntohl (mp->sw_if_index); + + if (~0 == sw_if_index) + { + /* *INDENT-OFF* */ + pool_foreach (t, vxm->tunnels, + ({ + send_vxlan_tunnel_details(t, q, mp->context); + })); + /* *INDENT-ON* */ + } + else + { + if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) || + (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index])) + { + return; + } + t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]]; + send_vxlan_tunnel_details (t, q, mp->context); + } +} + +static void +vl_api_l2_patch_add_del_t_handler (vl_api_l2_patch_add_del_t * mp) +{ + extern int vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, + int is_add); + vl_api_l2_patch_add_del_reply_t *rmp; + int vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, + int is_add); + int rv = 0; + + VALIDATE_RX_SW_IF_INDEX (mp); + VALIDATE_TX_SW_IF_INDEX (mp); + + rv = vnet_l2_patch_add_del (ntohl (mp->rx_sw_if_index), + ntohl (mp->tx_sw_if_index), + (int) (mp->is_add != 0)); + + BAD_RX_SW_IF_INDEX_LABEL; + BAD_TX_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_L2_PATCH_ADD_DEL_REPLY); +} + +static void + vl_api_vxlan_gpe_add_del_tunnel_t_handler + (vl_api_vxlan_gpe_add_del_tunnel_t * mp) +{ + vl_api_vxlan_gpe_add_del_tunnel_reply_t *rmp; + int rv = 0; + vnet_vxlan_gpe_add_del_tunnel_args_t _a, *a = &_a; + u32 encap_fib_index, decap_fib_index; + u8 protocol; + uword *p; + ip4_main_t *im = &ip4_main; + u32 sw_if_index = ~0; + + + p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id)); + if (!p) + { + rv = VNET_API_ERROR_NO_SUCH_FIB; + goto out; + } + encap_fib_index = p[0]; + + protocol = mp->protocol; + + /* Interpret decap_vrf_id as an opaque if sending to other-than-ip4-input */ + if (protocol == VXLAN_GPE_INPUT_NEXT_IP4_INPUT) + { + p = hash_get (im->fib_index_by_table_id, ntohl (mp->decap_vrf_id)); + if (!p) + { + rv = VNET_API_ERROR_NO_SUCH_INNER_FIB; + goto out; + } + decap_fib_index = p[0]; + } + else + { + decap_fib_index = ntohl (mp->decap_vrf_id); + } + + /* Check src & dst are different */ + if ((mp->is_ipv6 && memcmp (mp->local, mp->remote, 16) == 0) || + (!mp->is_ipv6 && memcmp (mp->local, mp->remote, 4) == 0)) + { + rv = VNET_API_ERROR_SAME_SRC_DST; + goto out; + } + memset (a, 0, sizeof (*a)); + + a->is_add = mp->is_add; + a->is_ip6 = mp->is_ipv6; + /* ip addresses sent in network byte order */ + if (a->is_ip6) + { + clib_memcpy (&(a->local.ip6), mp->local, 16); + clib_memcpy (&(a->remote.ip6), mp->remote, 16); + } + else + { + clib_memcpy (&(a->local.ip4), mp->local, 4); + clib_memcpy (&(a->remote.ip4), mp->remote, 4); + } + a->encap_fib_index = encap_fib_index; + a->decap_fib_index = decap_fib_index; + a->protocol = protocol; + a->vni = ntohl (mp->vni); + rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index); + +out: + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_REPLY, + ({ + rmp->sw_if_index = ntohl (sw_if_index); + })); + /* *INDENT-ON* */ +} + +static void send_vxlan_gpe_tunnel_details + (vxlan_gpe_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_vxlan_gpe_tunnel_details_t *rmp; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + u8 is_ipv6 = !(t->flags & VXLAN_GPE_TUNNEL_IS_IPV4); + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_VXLAN_GPE_TUNNEL_DETAILS); + if (is_ipv6) + { + memcpy (rmp->local, &(t->local.ip6), 16); + memcpy (rmp->remote, &(t->remote.ip6), 16); + rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id); + rmp->decap_vrf_id = htonl (im6->fibs[t->decap_fib_index].ft_table_id); + } + else + { + memcpy (rmp->local, &(t->local.ip4), 4); + memcpy (rmp->remote, &(t->remote.ip4), 4); + rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id); + rmp->decap_vrf_id = htonl (im4->fibs[t->decap_fib_index].ft_table_id); + } + rmp->vni = htonl (t->vni); + rmp->protocol = t->protocol; + rmp->sw_if_index = htonl (t->sw_if_index); + rmp->is_ipv6 = is_ipv6; + rmp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void vl_api_vxlan_gpe_tunnel_dump_t_handler + (vl_api_vxlan_gpe_tunnel_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + vxlan_gpe_main_t *vgm = &vxlan_gpe_main; + vxlan_gpe_tunnel_t *t; + u32 sw_if_index; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + sw_if_index = ntohl (mp->sw_if_index); + + if (~0 == sw_if_index) + { + /* *INDENT-OFF* */ + pool_foreach (t, vgm->tunnels, + ({ + send_vxlan_gpe_tunnel_details(t, q, mp->context); + })); + /* *INDENT-ON* */ + } + else + { + if ((sw_if_index >= vec_len (vgm->tunnel_index_by_sw_if_index)) || + (~0 == vgm->tunnel_index_by_sw_if_index[sw_if_index])) + { + return; + } + t = &vgm->tunnels[vgm->tunnel_index_by_sw_if_index[sw_if_index]]; + send_vxlan_gpe_tunnel_details (t, q, mp->context); + } +} + +static void +vl_api_interface_name_renumber_t_handler (vl_api_interface_name_renumber_t * + mp) +{ + vl_api_interface_name_renumber_reply_t *rmp; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + rv = vnet_interface_name_renumber + (ntohl (mp->sw_if_index), ntohl (mp->new_show_dev_instance)); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_INTERFACE_NAME_RENUMBER_REPLY); +} + +static int +arp_change_data_callback (u32 pool_index, u8 * new_mac, + u32 sw_if_index, u32 address) +{ + vpe_api_main_t *am = &vpe_api_main; + vlib_main_t *vm = am->vlib_main; + vl_api_ip4_arp_event_t *event; + static f64 arp_event_last_time; + f64 now = vlib_time_now (vm); + + if (pool_is_free_index (am->arp_events, pool_index)) + return 1; + + event = pool_elt_at_index (am->arp_events, pool_index); + /* *INDENT-OFF* */ + if (memcmp (&event->new_mac, new_mac, sizeof (event->new_mac))) + { + clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac)); + } + else + { /* same mac */ + if (sw_if_index == event->sw_if_index && + (!event->mac_ip || + /* for BD case, also check IP address with 10 sec timeout */ + (address == event->address && + (now - arp_event_last_time) < 10.0))) + return 1; + } + /* *INDENT-ON* */ + + arp_event_last_time = now; + event->sw_if_index = sw_if_index; + if (event->mac_ip) + event->address = address; + return 0; +} + +static int +nd_change_data_callback (u32 pool_index, u8 * new_mac, + u32 sw_if_index, ip6_address_t * address) +{ + vpe_api_main_t *am = &vpe_api_main; + vlib_main_t *vm = am->vlib_main; + vl_api_ip6_nd_event_t *event; + static f64 nd_event_last_time; + f64 now = vlib_time_now (vm); + + if (pool_is_free_index (am->nd_events, pool_index)) + return 1; + + event = pool_elt_at_index (am->nd_events, pool_index); + + /* *INDENT-OFF* */ + if (memcmp (&event->new_mac, new_mac, sizeof (event->new_mac))) + { + clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac)); + } + else + { /* same mac */ + if (sw_if_index == event->sw_if_index && + (!event->mac_ip || + /* for BD case, also check IP address with 10 sec timeout */ + (ip6_address_is_equal (address, + (ip6_address_t *) event->address) && + (now - nd_event_last_time) < 10.0))) + return 1; + } + /* *INDENT-ON* */ + + nd_event_last_time = now; + event->sw_if_index = sw_if_index; + if (event->mac_ip) + clib_memcpy (event->address, address, sizeof (event->address)); + return 0; +} + +static int +arp_change_delete_callback (u32 pool_index, u8 * notused) +{ + vpe_api_main_t *am = &vpe_api_main; + + if (pool_is_free_index (am->arp_events, pool_index)) + return 1; + + pool_put_index (am->arp_events, pool_index); + return 0; +} + +static int +nd_change_delete_callback (u32 pool_index, u8 * notused) +{ + vpe_api_main_t *am = &vpe_api_main; + + if (pool_is_free_index (am->nd_events, pool_index)) + return 1; + + pool_put_index (am->nd_events, pool_index); + return 0; +} + +static void +vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + vnet_main_t *vnm = vnet_get_main (); + vl_api_want_ip4_arp_events_reply_t *rmp; + vl_api_ip4_arp_event_t *event; + int rv; + + if (mp->enable_disable) + { + pool_get (am->arp_events, event); + memset (event, 0, sizeof (*event)); + + event->_vl_msg_id = ntohs (VL_API_IP4_ARP_EVENT); + event->client_index = mp->client_index; + event->context = mp->context; + event->address = mp->address; + event->pid = mp->pid; + if (mp->address == 0) + event->mac_ip = 1; + + rv = vnet_add_del_ip4_arp_change_event + (vnm, arp_change_data_callback, + mp->pid, &mp->address /* addr, in net byte order */ , + vpe_resolver_process_node.index, + IP4_ARP_EVENT, event - am->arp_events, 1 /* is_add */ ); + } + else + { + rv = vnet_add_del_ip4_arp_change_event + (vnm, arp_change_delete_callback, + mp->pid, &mp->address /* addr, in net byte order */ , + vpe_resolver_process_node.index, + IP4_ARP_EVENT, ~0 /* pool index */ , 0 /* is_add */ ); + } + REPLY_MACRO (VL_API_WANT_IP4_ARP_EVENTS_REPLY); +} + +static void +vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + vnet_main_t *vnm = vnet_get_main (); + vl_api_want_ip6_nd_events_reply_t *rmp; + vl_api_ip6_nd_event_t *event; + int rv; + + if (mp->enable_disable) + { + pool_get (am->nd_events, event); + memset (event, 0, sizeof (*event)); + + event->_vl_msg_id = ntohs (VL_API_IP6_ND_EVENT); + event->client_index = mp->client_index; + event->context = mp->context; + clib_memcpy (event->address, mp->address, 16); + event->pid = mp->pid; + if (ip6_address_is_zero ((ip6_address_t *) mp->address)) + event->mac_ip = 1; + + rv = vnet_add_del_ip6_nd_change_event + (vnm, nd_change_data_callback, + mp->pid, mp->address /* addr, in net byte order */ , + vpe_resolver_process_node.index, + IP6_ND_EVENT, event - am->nd_events, 1 /* is_add */ ); + } + else + { + rv = vnet_add_del_ip6_nd_change_event + (vnm, nd_change_delete_callback, + mp->pid, mp->address /* addr, in net byte order */ , + vpe_resolver_process_node.index, + IP6_ND_EVENT, ~0 /* pool index */ , 0 /* is_add */ ); + } + REPLY_MACRO (VL_API_WANT_IP6_ND_EVENTS_REPLY); +} + +static void vl_api_input_acl_set_interface_t_handler + (vl_api_input_acl_set_interface_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_input_acl_set_interface_reply_t *rmp; + int rv; + u32 sw_if_index, ip4_table_index, ip6_table_index, l2_table_index; + + ip4_table_index = ntohl (mp->ip4_table_index); + ip6_table_index = ntohl (mp->ip6_table_index); + l2_table_index = ntohl (mp->l2_table_index); + sw_if_index = ntohl (mp->sw_if_index); + + VALIDATE_SW_IF_INDEX (mp); + + rv = vnet_set_input_acl_intfc (vm, sw_if_index, ip4_table_index, + ip6_table_index, l2_table_index, mp->is_add); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_INPUT_ACL_SET_INTERFACE_REPLY); +} + +static void vl_api_cop_interface_enable_disable_t_handler + (vl_api_cop_interface_enable_disable_t * mp) +{ + vl_api_cop_interface_enable_disable_reply_t *rmp; + int rv; + u32 sw_if_index = ntohl (mp->sw_if_index); + int enable_disable; + + VALIDATE_SW_IF_INDEX (mp); + + enable_disable = (int) mp->enable_disable; + + rv = cop_interface_enable_disable (sw_if_index, enable_disable); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_COP_INTERFACE_ENABLE_DISABLE_REPLY); +} + +static void vl_api_cop_whitelist_enable_disable_t_handler + (vl_api_cop_whitelist_enable_disable_t * mp) +{ + vl_api_cop_whitelist_enable_disable_reply_t *rmp; + cop_whitelist_enable_disable_args_t _a, *a = &_a; + u32 sw_if_index = ntohl (mp->sw_if_index); + int rv; + + VALIDATE_SW_IF_INDEX (mp); + + a->sw_if_index = sw_if_index; + a->ip4 = mp->ip4; + a->ip6 = mp->ip6; + a->default_cop = mp->default_cop; + a->fib_id = ntohl (mp->fib_id); + + rv = cop_whitelist_enable_disable (a); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_COP_WHITELIST_ENABLE_DISABLE_REPLY); +} + +static void +vl_api_get_node_graph_t_handler (vl_api_get_node_graph_t * mp) +{ + int rv = 0; + u8 *vector = 0; + api_main_t *am = &api_main; + vlib_main_t *vm = vlib_get_main (); + void *oldheap; + vl_api_get_node_graph_reply_t *rmp; + + pthread_mutex_lock (&am->vlib_rp->mutex); + oldheap = svm_push_data_heap (am->vlib_rp); + + /* + * Keep the number of memcpy ops to a minimum (e.g. 1). + */ + vec_validate (vector, 16384); + vec_reset_length (vector); + + /* $$$$ FIXME */ + vector = vlib_node_serialize (&vm->node_main, vector, + (u32) ~ 0 /* all threads */ , + 1 /* include nexts */ , + 1 /* include stats */ ); + + svm_pop_heap (oldheap); + pthread_mutex_unlock (&am->vlib_rp->mutex); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_GET_NODE_GRAPH_REPLY, + ({ + rmp->reply_in_shmem = (uword) vector; + })); + /* *INDENT-ON* */ +} + +static void +vl_api_ioam_enable_t_handler (vl_api_ioam_enable_t * mp) +{ + int rv = 0; + vl_api_ioam_enable_reply_t *rmp; + clib_error_t *error; + + /* Ignoring the profile id as currently a single profile + * is supported */ + error = ip6_ioam_enable (mp->trace_enable, mp->pot_enable, + mp->seqno, mp->analyse); + if (error) + { + clib_error_report (error); + rv = clib_error_get_code (error); + } + + REPLY_MACRO (VL_API_IOAM_ENABLE_REPLY); +} + +static void +vl_api_ioam_disable_t_handler (vl_api_ioam_disable_t * mp) +{ + int rv = 0; + vl_api_ioam_disable_reply_t *rmp; + clib_error_t *error; + + error = clear_ioam_rewrite_fn (); + if (error) + { + clib_error_report (error); + rv = clib_error_get_code (error); + } + + REPLY_MACRO (VL_API_IOAM_DISABLE_REPLY); +} + +static void +vl_api_policer_add_del_t_handler (vl_api_policer_add_del_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_policer_add_del_reply_t *rmp; + int rv = 0; + u8 *name = NULL; + sse2_qos_pol_cfg_params_st cfg; + clib_error_t *error; + u32 policer_index; + + name = format (0, "%s", mp->name); + + memset (&cfg, 0, sizeof (cfg)); + cfg.rfc = mp->type; + cfg.rnd_type = mp->round_type; + cfg.rate_type = mp->rate_type; + cfg.rb.kbps.cir_kbps = mp->cir; + cfg.rb.kbps.eir_kbps = mp->eir; + cfg.rb.kbps.cb_bytes = mp->cb; + cfg.rb.kbps.eb_bytes = mp->eb; + cfg.conform_action.action_type = mp->conform_action_type; + cfg.conform_action.dscp = mp->conform_dscp; + cfg.exceed_action.action_type = mp->exceed_action_type; + cfg.exceed_action.dscp = mp->exceed_dscp; + cfg.violate_action.action_type = mp->violate_action_type; + cfg.violate_action.dscp = mp->violate_dscp; + cfg.color_aware = mp->color_aware; + + error = policer_add_del (vm, name, &cfg, &policer_index, mp->is_add); + + if (error) + rv = VNET_API_ERROR_UNSPECIFIED; + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_POLICER_ADD_DEL_REPLY, + ({ + if (rv == 0 && mp->is_add) + rmp->policer_index = ntohl(policer_index); + else + rmp->policer_index = ~0; + })); + /* *INDENT-ON* */ +} + +static void +send_policer_details (u8 * name, + sse2_qos_pol_cfg_params_st * config, + policer_read_response_type_st * templ, + unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_policer_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_POLICER_DETAILS); + mp->context = context; + mp->cir = htonl (config->rb.kbps.cir_kbps); + mp->eir = htonl (config->rb.kbps.eir_kbps); + mp->cb = htonl (config->rb.kbps.cb_bytes); + mp->eb = htonl (config->rb.kbps.eb_bytes); + mp->rate_type = config->rate_type; + mp->round_type = config->rnd_type; + mp->type = config->rfc; + mp->conform_action_type = config->conform_action.action_type; + mp->conform_dscp = config->conform_action.dscp; + mp->exceed_action_type = config->exceed_action.action_type; + mp->exceed_dscp = config->exceed_action.dscp; + mp->violate_action_type = config->violate_action.action_type; + mp->violate_dscp = config->violate_action.dscp; + mp->single_rate = templ->single_rate ? 1 : 0; + mp->color_aware = templ->color_aware ? 1 : 0; + mp->scale = htonl (templ->scale); + mp->cir_tokens_per_period = htonl (templ->cir_tokens_per_period); + mp->pir_tokens_per_period = htonl (templ->pir_tokens_per_period); + mp->current_limit = htonl (templ->current_limit); + mp->current_bucket = htonl (templ->current_bucket); + mp->extended_limit = htonl (templ->extended_limit); + mp->extended_bucket = htonl (templ->extended_bucket); + mp->last_update_time = clib_host_to_net_u64 (templ->last_update_time); + + strncpy ((char *) mp->name, (char *) name, ARRAY_LEN (mp->name) - 1); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_policer_dump_t_handler (vl_api_policer_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + vnet_policer_main_t *pm = &vnet_policer_main; + hash_pair_t *hp; + uword *p; + u32 pool_index; + u8 *match_name = 0; + u8 *name; + sse2_qos_pol_cfg_params_st *config; + policer_read_response_type_st *templ; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + if (mp->match_name_valid) + { + match_name = format (0, "%s%c", mp->match_name, 0); + } + + if (mp->match_name_valid) + { + p = hash_get_mem (pm->policer_config_by_name, match_name); + if (p) + { + pool_index = p[0]; + config = pool_elt_at_index (pm->configs, pool_index); + templ = pool_elt_at_index (pm->policer_templates, pool_index); + send_policer_details (match_name, config, templ, q, mp->context); + } + } + else + { + /* *INDENT-OFF* */ + hash_foreach_pair (hp, pm->policer_config_by_name, + ({ + name = (u8 *) hp->key; + pool_index = hp->value[0]; + config = pool_elt_at_index (pm->configs, pool_index); + templ = pool_elt_at_index (pm->policer_templates, pool_index); + send_policer_details(name, config, templ, q, mp->context); + })); + /* *INDENT-ON* */ + } +} + +static void + vl_api_policer_classify_set_interface_t_handler + (vl_api_policer_classify_set_interface_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_policer_classify_set_interface_reply_t *rmp; + int rv; + u32 sw_if_index, ip4_table_index, ip6_table_index, l2_table_index; + + ip4_table_index = ntohl (mp->ip4_table_index); + ip6_table_index = ntohl (mp->ip6_table_index); + l2_table_index = ntohl (mp->l2_table_index); + sw_if_index = ntohl (mp->sw_if_index); + + VALIDATE_SW_IF_INDEX (mp); + + rv = vnet_set_policer_classify_intfc (vm, sw_if_index, ip4_table_index, + ip6_table_index, l2_table_index, + mp->is_add); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_POLICER_CLASSIFY_SET_INTERFACE_REPLY); +} + +static void +send_policer_classify_details (u32 sw_if_index, + u32 table_index, + unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_policer_classify_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_POLICER_CLASSIFY_DETAILS); + mp->context = context; + mp->sw_if_index = htonl (sw_if_index); + mp->table_index = htonl (table_index); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_policer_classify_dump_t_handler (vl_api_policer_classify_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + policer_classify_main_t *pcm = &policer_classify_main; + u32 *vec_tbl; + int i; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + vec_tbl = pcm->classify_table_index_by_sw_if_index[mp->type]; + + if (vec_len (vec_tbl)) + { + for (i = 0; i < vec_len (vec_tbl); i++) + { + if (vec_elt (vec_tbl, i) == ~0) + continue; + + send_policer_classify_details (i, vec_elt (vec_tbl, i), q, + mp->context); + } + } +} + +static void +vl_api_mpls_tunnel_details_t_handler (vl_api_mpls_fib_details_t * mp) +{ + clib_warning ("BUG"); +} + +typedef struct mpls_tunnel_send_walk_ctx_t_ +{ + unix_shared_memory_queue_t *q; + u32 index; + u32 context; +} mpls_tunnel_send_walk_ctx_t; + +static void +send_mpls_tunnel_entry (u32 mti, void *arg) +{ + mpls_tunnel_send_walk_ctx_t *ctx; + vl_api_mpls_tunnel_details_t *mp; + const mpls_tunnel_t *mt; + u32 nlabels; + + ctx = arg; + + if (~0 != ctx->index && mti != ctx->index) + return; + + mt = mpls_tunnel_get (mti); + nlabels = vec_len (mt->mt_label_stack); + + mp = vl_msg_api_alloc (sizeof (*mp) + nlabels * sizeof (u32)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_MPLS_TUNNEL_DETAILS); + mp->context = ctx->context; + + mp->tunnel_index = ntohl (mti); + memcpy (mp->mt_next_hop_out_labels, + mt->mt_label_stack, nlabels * sizeof (u32)); + + // FIXME + + vl_msg_api_send_shmem (ctx->q, (u8 *) & mp); +} + +static void +vl_api_mpls_tunnel_dump_t_handler (vl_api_mpls_tunnel_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + mpls_tunnel_send_walk_ctx_t ctx = { + .q = q, + .index = ntohl (mp->tunnel_index), + .context = mp->context, + }; + mpls_tunnel_walk (send_mpls_tunnel_entry, &ctx); +} + +static void +vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp) +{ + clib_warning ("BUG"); +} + +static void +vl_api_mpls_fib_details_t_endian (vl_api_mpls_fib_details_t * mp) +{ + clib_warning ("BUG"); +} + +static void +vl_api_mpls_fib_details_t_print (vl_api_mpls_fib_details_t * mp) +{ + clib_warning ("BUG"); +} + +static void +send_mpls_fib_details (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + u32 table_id, u32 label, u32 eos, + fib_route_path_encode_t * api_rpaths, u32 context) +{ + vl_api_mpls_fib_details_t *mp; + fib_route_path_encode_t *api_rpath; + vl_api_fib_path2_t *fp; + int path_count; + + path_count = vec_len (api_rpaths); + mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp)); + if (!mp) + return; + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_MPLS_FIB_DETAILS); + mp->context = context; + + mp->table_id = htonl (table_id); + mp->eos_bit = eos; + mp->label = htonl (label); + + mp->count = htonl (path_count); + fp = mp->path; + vec_foreach (api_rpath, api_rpaths) + { + memset (fp, 0, sizeof (*fp)); + fp->weight = htonl (api_rpath->rpath.frp_weight); + fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index); + copy_fib_next_hop (api_rpath, fp); + fp++; + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + unix_shared_memory_queue_t *q; + mpls_main_t *mm = &mpls_main; + fib_table_t *fib_table; + fib_node_index_t lfei, *lfeip, *lfeis = NULL; + mpls_label_t key; + fib_prefix_t pfx; + u32 fib_index; + fib_route_path_encode_t *api_rpaths; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* *INDENT-OFF* */ + pool_foreach (fib_table, mm->fibs, + ({ + hash_foreach(key, lfei, fib_table->mpls.mf_entries, + ({ + vec_add1(lfeis, lfei); + })); + })); + vec_sort_with_function(lfeis, fib_entry_cmp_for_sort); + + vec_foreach(lfeip, lfeis) + { + fib_entry_get_prefix(*lfeip, &pfx); + fib_index = fib_entry_get_fib_index(*lfeip); + fib_table = fib_table_get(fib_index, pfx.fp_proto); + api_rpaths = NULL; + fib_entry_encode(*lfeip, &api_rpaths); + send_mpls_fib_details (am, q, + fib_table->ft_table_id, + pfx.fp_label, + pfx.fp_eos, + api_rpaths, + mp->context); + vec_free(api_rpaths); + } + + vec_free (lfeis); +} + +static void +vl_api_classify_table_ids_t_handler (vl_api_classify_table_ids_t * mp) +{ + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + vnet_classify_main_t *cm = &vnet_classify_main; + vnet_classify_table_t *t; + u32 *table_ids = 0; + u32 count; + + /* *INDENT-OFF* */ + pool_foreach (t, cm->tables, + ({ + vec_add1 (table_ids, ntohl(t - cm->tables)); + })); + /* *INDENT-ON* */ + count = vec_len (table_ids); + + vl_api_classify_table_ids_reply_t *rmp; + rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp) + count * sizeof (u32)); + rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_TABLE_IDS_REPLY); + rmp->context = mp->context; + rmp->count = ntohl (count); + clib_memcpy (rmp->ids, table_ids, count * sizeof (u32)); + rmp->retval = 0; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); + + vec_free (table_ids); +} + +static void + vl_api_classify_table_by_interface_t_handler + (vl_api_classify_table_by_interface_t * mp) +{ + vl_api_classify_table_by_interface_reply_t *rmp; + int rv = 0; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 *acl = 0; + + vec_validate (acl, INPUT_ACL_N_TABLES - 1); + vec_set (acl, ~0); + + VALIDATE_SW_IF_INDEX (mp); + + input_acl_main_t *am = &input_acl_main; + + int if_idx; + u32 type; + + for (type = 0; type < INPUT_ACL_N_TABLES; type++) + { + u32 *vec_tbl = am->classify_table_index_by_sw_if_index[type]; + if (vec_len (vec_tbl)) + { + for (if_idx = 0; if_idx < vec_len (vec_tbl); if_idx++) + { + if (vec_elt (vec_tbl, if_idx) == ~0 || sw_if_index != if_idx) + { + continue; + } + acl[type] = vec_elt (vec_tbl, if_idx); + } + } + } + + BAD_SW_IF_INDEX_LABEL; + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_CLASSIFY_TABLE_BY_INTERFACE_REPLY, + ({ + rmp->sw_if_index = ntohl(sw_if_index); + rmp->l2_table_id = ntohl(acl[INPUT_ACL_TABLE_L2]); + rmp->ip4_table_id = ntohl(acl[INPUT_ACL_TABLE_IP4]); + rmp->ip6_table_id = ntohl(acl[INPUT_ACL_TABLE_IP6]); + })); + /* *INDENT-ON* */ + vec_free (acl); +} + +static void +vl_api_classify_table_info_t_handler (vl_api_classify_table_info_t * mp) +{ + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + vl_api_classify_table_info_reply_t *rmp = 0; + + vnet_classify_main_t *cm = &vnet_classify_main; + u32 table_id = ntohl (mp->table_id); + vnet_classify_table_t *t; + + /* *INDENT-OFF* */ + pool_foreach (t, cm->tables, + ({ + if (table_id == t - cm->tables) + { + rmp = vl_msg_api_alloc_as_if_client + (sizeof (*rmp) + t->match_n_vectors * sizeof (u32x4)); + rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_TABLE_INFO_REPLY); + rmp->context = mp->context; + rmp->table_id = ntohl(table_id); + rmp->nbuckets = ntohl(t->nbuckets); + rmp->match_n_vectors = ntohl(t->match_n_vectors); + rmp->skip_n_vectors = ntohl(t->skip_n_vectors); + rmp->active_sessions = ntohl(t->active_elements); + rmp->next_table_index = ntohl(t->next_table_index); + rmp->miss_next_index = ntohl(t->miss_next_index); + rmp->mask_length = ntohl(t->match_n_vectors * sizeof (u32x4)); + clib_memcpy(rmp->mask, t->mask, t->match_n_vectors * sizeof(u32x4)); + rmp->retval = 0; + break; + } + })); + /* *INDENT-ON* */ + + if (rmp == 0) + { + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs ((VL_API_CLASSIFY_TABLE_INFO_REPLY)); + rmp->context = mp->context; + rmp->retval = ntohl (VNET_API_ERROR_CLASSIFY_TABLE_NOT_FOUND); + } + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_classify_session_details_t_handler (vl_api_classify_session_details_t * + mp) +{ + clib_warning ("BUG"); +} + +static void +send_classify_session_details (unix_shared_memory_queue_t * q, + u32 table_id, + u32 match_length, + vnet_classify_entry_t * e, u32 context) +{ + vl_api_classify_session_details_t *rmp; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_SESSION_DETAILS); + rmp->context = context; + rmp->table_id = ntohl (table_id); + rmp->hit_next_index = ntohl (e->next_index); + rmp->advance = ntohl (e->advance); + rmp->opaque_index = ntohl (e->opaque_index); + rmp->match_length = ntohl (match_length); + clib_memcpy (rmp->match, e->key, match_length); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_classify_session_dump_t_handler (vl_api_classify_session_dump_t * mp) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + unix_shared_memory_queue_t *q; + + u32 table_id = ntohl (mp->table_id); + vnet_classify_table_t *t; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + /* *INDENT-OFF* */ + pool_foreach (t, cm->tables, + ({ + if (table_id == t - cm->tables) + { + vnet_classify_bucket_t * b; + vnet_classify_entry_t * v, * save_v; + int i, j, k; + + for (i = 0; i < t->nbuckets; i++) + { + b = &t->buckets [i]; + if (b->offset == 0) + continue; + + save_v = vnet_classify_get_entry (t, b->offset); + for (j = 0; j < (1<log2_pages); j++) + { + for (k = 0; k < t->entries_per_page; k++) + { + v = vnet_classify_entry_at_index + (t, save_v, j*t->entries_per_page + k); + if (vnet_classify_entry_is_free (v)) + continue; + + send_classify_session_details + (q, table_id, t->match_n_vectors * sizeof (u32x4), + v, mp->context); + } + } + } + break; + } + })); + /* *INDENT-ON* */ +} + +static void +vl_api_set_ipfix_exporter_t_handler (vl_api_set_ipfix_exporter_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + flow_report_main_t *frm = &flow_report_main; + vl_api_set_ipfix_exporter_reply_t *rmp; + ip4_address_t collector, src; + u16 collector_port = UDP_DST_PORT_ipfix; + u32 path_mtu; + u32 template_interval; + u8 udp_checksum; + u32 fib_id; + u32 fib_index = ~0; + int rv = 0; + + memcpy (collector.data, mp->collector_address, sizeof (collector.data)); + collector_port = ntohs (mp->collector_port); + if (collector_port == (u16) ~ 0) + collector_port = UDP_DST_PORT_ipfix; + memcpy (src.data, mp->src_address, sizeof (src.data)); + fib_id = ntohl (mp->vrf_id); + + ip4_main_t *im = &ip4_main; + if (fib_id == ~0) + { + fib_index = ~0; + } + else + { + uword *p = hash_get (im->fib_index_by_table_id, fib_id); + if (!p) + { + rv = VNET_API_ERROR_NO_SUCH_FIB; + goto out; + } + fib_index = p[0]; + } + + path_mtu = ntohl (mp->path_mtu); + if (path_mtu == ~0) + path_mtu = 512; // RFC 7011 section 10.3.3. + template_interval = ntohl (mp->template_interval); + if (template_interval == ~0) + template_interval = 20; + udp_checksum = mp->udp_checksum; + + if (collector.as_u32 == 0) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + if (src.as_u32 == 0) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + if (path_mtu > 1450 /* vpp does not support fragmentation */ ) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + if (path_mtu < 68) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + /* Reset report streams if we are reconfiguring IP addresses */ + if (frm->ipfix_collector.as_u32 != collector.as_u32 || + frm->src_address.as_u32 != src.as_u32 || + frm->collector_port != collector_port) + vnet_flow_reports_reset (frm); + + frm->ipfix_collector.as_u32 = collector.as_u32; + frm->collector_port = collector_port; + frm->src_address.as_u32 = src.as_u32; + frm->fib_index = fib_index; + frm->path_mtu = path_mtu; + frm->template_interval = template_interval; + frm->udp_checksum = udp_checksum; + + /* Turn on the flow reporting process */ + vlib_process_signal_event (vm, flow_report_process_node.index, 1, 0); + +out: + REPLY_MACRO (VL_API_SET_IPFIX_EXPORTER_REPLY); +} + +static void +vl_api_ipfix_exporter_dump_t_handler (vl_api_ipfix_exporter_dump_t * mp) +{ + flow_report_main_t *frm = &flow_report_main; + unix_shared_memory_queue_t *q; + vl_api_ipfix_exporter_details_t *rmp; + ip4_main_t *im = &ip4_main; + u32 vrf_id; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_IPFIX_EXPORTER_DETAILS); + rmp->context = mp->context; + memcpy (rmp->collector_address, frm->ipfix_collector.data, + sizeof (frm->ipfix_collector.data)); + rmp->collector_port = htons (frm->collector_port); + memcpy (rmp->src_address, frm->src_address.data, + sizeof (frm->src_address.data)); + if (frm->fib_index == ~0) + vrf_id = ~0; + else + vrf_id = im->fibs[frm->fib_index].ft_table_id; + rmp->vrf_id = htonl (vrf_id); + rmp->path_mtu = htonl (frm->path_mtu); + rmp->template_interval = htonl (frm->template_interval); + rmp->udp_checksum = (frm->udp_checksum != 0); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void + vl_api_set_ipfix_classify_stream_t_handler + (vl_api_set_ipfix_classify_stream_t * mp) +{ + vl_api_set_ipfix_classify_stream_reply_t *rmp; + flow_report_classify_main_t *fcm = &flow_report_classify_main; + flow_report_main_t *frm = &flow_report_main; + u32 domain_id = 0; + u32 src_port = UDP_DST_PORT_ipfix; + int rv = 0; + + domain_id = ntohl (mp->domain_id); + src_port = ntohs (mp->src_port); + + if (fcm->src_port != 0 && + (fcm->domain_id != domain_id || fcm->src_port != (u16) src_port)) + { + int rv = vnet_stream_change (frm, fcm->domain_id, fcm->src_port, + domain_id, (u16) src_port); + ASSERT (rv == 0); + } + + fcm->domain_id = domain_id; + fcm->src_port = (u16) src_port; + + REPLY_MACRO (VL_API_SET_IPFIX_CLASSIFY_STREAM_REPLY); +} + +static void + vl_api_ipfix_classify_stream_dump_t_handler + (vl_api_ipfix_classify_stream_dump_t * mp) +{ + flow_report_classify_main_t *fcm = &flow_report_classify_main; + unix_shared_memory_queue_t *q; + vl_api_ipfix_classify_stream_details_t *rmp; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_IPFIX_CLASSIFY_STREAM_DETAILS); + rmp->context = mp->context; + rmp->domain_id = htonl (fcm->domain_id); + rmp->src_port = htons (fcm->src_port); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void + vl_api_ipfix_classify_table_add_del_t_handler + (vl_api_ipfix_classify_table_add_del_t * mp) +{ + vl_api_ipfix_classify_table_add_del_reply_t *rmp; + flow_report_classify_main_t *fcm = &flow_report_classify_main; + flow_report_main_t *frm = &flow_report_main; + vnet_flow_report_add_del_args_t args; + ipfix_classify_table_t *table; + int is_add; + u32 classify_table_index; + u8 ip_version; + u8 transport_protocol; + int rv = 0; + + classify_table_index = ntohl (mp->table_id); + ip_version = mp->ip_version; + transport_protocol = mp->transport_protocol; + is_add = mp->is_add; + + if (fcm->src_port == 0) + { + /* call set_ipfix_classify_stream first */ + rv = VNET_API_ERROR_UNSPECIFIED; + goto out; + } + + memset (&args, 0, sizeof (args)); + + table = 0; + int i; + for (i = 0; i < vec_len (fcm->tables); i++) + if (ipfix_classify_table_index_valid (i)) + if (fcm->tables[i].classify_table_index == classify_table_index) + { + table = &fcm->tables[i]; + break; + } + + if (is_add) + { + if (table) + { + rv = VNET_API_ERROR_VALUE_EXIST; + goto out; + } + table = ipfix_classify_add_table (); + table->classify_table_index = classify_table_index; + } + else + { + if (!table) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto out; + } + } + + table->ip_version = ip_version; + table->transport_protocol = transport_protocol; + + args.opaque.as_uword = table - fcm->tables; + args.rewrite_callback = ipfix_classify_template_rewrite; + args.flow_data_callback = ipfix_classify_send_flows; + args.is_add = is_add; + args.domain_id = fcm->domain_id; + args.src_port = fcm->src_port; + + rv = vnet_flow_report_add_del (frm, &args); + + /* If deleting, or add failed */ + if (is_add == 0 || (rv && is_add)) + ipfix_classify_delete_table (table - fcm->tables); + +out: + REPLY_MACRO (VL_API_SET_IPFIX_CLASSIFY_STREAM_REPLY); +} + +static void +send_ipfix_classify_table_details (u32 table_index, + unix_shared_memory_queue_t * q, + u32 context) +{ + flow_report_classify_main_t *fcm = &flow_report_classify_main; + vl_api_ipfix_classify_table_details_t *mp; + + ipfix_classify_table_t *table = &fcm->tables[table_index]; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IPFIX_CLASSIFY_TABLE_DETAILS); + mp->context = context; + mp->table_id = htonl (table->classify_table_index); + mp->ip_version = table->ip_version; + mp->transport_protocol = table->transport_protocol; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void + vl_api_ipfix_classify_table_dump_t_handler + (vl_api_ipfix_classify_table_dump_t * mp) +{ + flow_report_classify_main_t *fcm = &flow_report_classify_main; + unix_shared_memory_queue_t *q; + u32 i; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + for (i = 0; i < vec_len (fcm->tables); i++) + if (ipfix_classify_table_index_valid (i)) + send_ipfix_classify_table_details (i, q, mp->context); +} + +static void +vl_api_pg_create_interface_t_handler (vl_api_pg_create_interface_t * mp) +{ + vl_api_pg_create_interface_reply_t *rmp; + int rv = 0; + + pg_main_t *pg = &pg_main; + u32 pg_if_id = pg_interface_add_or_get (pg, ntohl (mp->interface_id)); + pg_interface_t *pi = pool_elt_at_index (pg->interfaces, pg_if_id); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_PG_CREATE_INTERFACE_REPLY, + ({ + rmp->sw_if_index = ntohl(pi->sw_if_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_pg_capture_t_handler (vl_api_pg_capture_t * mp) +{ + vl_api_pg_capture_reply_t *rmp; + int rv = 0; + + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + vnet_hw_interface_t *hi = 0; + + u8 *intf_name = format (0, "pg%d", ntohl (mp->interface_id), 0); + u32 hw_if_index = ~0; + uword *p = hash_get_mem (im->hw_interface_by_name, intf_name); + if (p) + hw_if_index = *p; + vec_free (intf_name); + + if (hw_if_index != ~0) + { + pg_capture_args_t _a, *a = &_a; + + u32 len = ntohl (mp->pcap_name_length); + u8 *pcap_file_name = vec_new (u8, len); + clib_memcpy (pcap_file_name, mp->pcap_file_name, len); + + hi = vnet_get_sup_hw_interface (vnm, hw_if_index); + a->hw_if_index = hw_if_index; + a->dev_instance = hi->dev_instance; + a->is_enabled = mp->is_enabled; + a->pcap_file_name = pcap_file_name; + a->count = ntohl (mp->count); + + clib_error_t *e = pg_capture (a); + if (e) + { + clib_error_report (e); + rv = VNET_API_ERROR_CANNOT_CREATE_PCAP_FILE; + } + + vec_free (pcap_file_name); + } + REPLY_MACRO (VL_API_PG_CAPTURE_REPLY); +} + +static void +vl_api_pg_enable_disable_t_handler (vl_api_pg_enable_disable_t * mp) +{ + vl_api_pg_enable_disable_reply_t *rmp; + int rv = 0; + + pg_main_t *pg = &pg_main; + u32 stream_index = ~0; + + int is_enable = mp->is_enabled != 0; + u32 len = ntohl (mp->stream_name_length) - 1; + + if (len > 0) + { + u8 *stream_name = vec_new (u8, len); + clib_memcpy (stream_name, mp->stream_name, len); + uword *p = hash_get_mem (pg->stream_index_by_name, stream_name); + if (p) + stream_index = *p; + vec_free (stream_name); + } + + pg_enable_disable (stream_index, is_enable); + + REPLY_MACRO (VL_API_PG_ENABLE_DISABLE_REPLY); +} + +static void + vl_api_ip_source_and_port_range_check_add_del_t_handler + (vl_api_ip_source_and_port_range_check_add_del_t * mp) +{ + vl_api_ip_source_and_port_range_check_add_del_reply_t *rmp; + int rv = 0; + + u8 is_ipv6 = mp->is_ipv6; + u8 is_add = mp->is_add; + u8 mask_length = mp->mask_length; + ip4_address_t ip4_addr; + ip6_address_t ip6_addr; + u16 *low_ports = 0; + u16 *high_ports = 0; + u32 vrf_id; + u16 tmp_low, tmp_high; + u8 num_ranges; + int i; + + // Validate port range + num_ranges = mp->number_of_ranges; + if (num_ranges > 32) + { // This is size of array in VPE.API + rv = VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY; + goto reply; + } + + vec_reset_length (low_ports); + vec_reset_length (high_ports); + + for (i = 0; i < num_ranges; i++) + { + tmp_low = mp->low_ports[i]; + tmp_high = mp->high_ports[i]; + // If tmp_low <= tmp_high then only need to check tmp_low = 0 + // If tmp_low <= tmp_high then only need to check tmp_high > 65535 + if (tmp_low > tmp_high || tmp_low == 0 || tmp_high > 65535) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto reply; + } + vec_add1 (low_ports, tmp_low); + vec_add1 (high_ports, tmp_high + 1); + } + + // Validate mask_length + if ((is_ipv6 && mask_length > 128) || (!is_ipv6 && mask_length > 32)) + { + rv = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH; + goto reply; + } + + vrf_id = ntohl (mp->vrf_id); + + if (vrf_id < 1) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto reply; + } + + + if (is_ipv6) + { + clib_memcpy (ip6_addr.as_u8, mp->address, sizeof (ip6_addr.as_u8)); + rv = ip6_source_and_port_range_check_add_del (&ip6_addr, + mask_length, + vrf_id, + low_ports, + high_ports, is_add); + } + else + { + clib_memcpy (ip4_addr.data, mp->address, sizeof (ip4_addr)); + rv = ip4_source_and_port_range_check_add_del (&ip4_addr, + mask_length, + vrf_id, + low_ports, + high_ports, is_add); + } + +reply: + vec_free (low_ports); + vec_free (high_ports); + REPLY_MACRO (VL_API_IP_SOURCE_AND_PORT_RANGE_CHECK_ADD_DEL_REPLY); +} + +static void + vl_api_ip_source_and_port_range_check_interface_add_del_t_handler + (vl_api_ip_source_and_port_range_check_interface_add_del_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_ip_source_and_port_range_check_interface_add_del_reply_t *rmp; + ip4_main_t *im = &ip4_main; + int rv; + u32 sw_if_index; + u32 fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS]; + u32 vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS]; + uword *p = 0; + int i; + + vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT] = + ntohl (mp->tcp_out_vrf_id); + vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT] = + ntohl (mp->udp_out_vrf_id); + vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN] = + ntohl (mp->tcp_in_vrf_id); + vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN] = + ntohl (mp->udp_in_vrf_id); + + + for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) + { + if (vrf_id[i] != 0 && vrf_id[i] != ~0) + { + p = hash_get (im->fib_index_by_table_id, vrf_id[i]); + + if (p == 0) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto reply; + } + + fib_index[i] = p[0]; + } + else + fib_index[i] = ~0; + } + sw_if_index = ntohl (mp->sw_if_index); + + VALIDATE_SW_IF_INDEX (mp); + + rv = + set_ip_source_and_port_range_check (vm, fib_index, sw_if_index, + mp->is_add); + + BAD_SW_IF_INDEX_LABEL; +reply: + + REPLY_MACRO (VL_API_IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL_REPLY); +} + +static void +vl_api_delete_subif_t_handler (vl_api_delete_subif_t * mp) +{ + vl_api_delete_subif_reply_t *rmp; + int rv; + + rv = vnet_delete_sub_interface (ntohl (mp->sw_if_index)); + + REPLY_MACRO (VL_API_DELETE_SUBIF_REPLY); +} + +static void + vl_api_l2_interface_pbb_tag_rewrite_t_handler + (vl_api_l2_interface_pbb_tag_rewrite_t * mp) +{ + vl_api_l2_interface_pbb_tag_rewrite_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + u32 vtr_op; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + vtr_op = ntohl (mp->vtr_op); + + switch (vtr_op) + { + case L2_VTR_DISABLED: + case L2_VTR_PUSH_2: + case L2_VTR_POP_2: + case L2_VTR_TRANSLATE_2_1: + break; + + default: + rv = VNET_API_ERROR_INVALID_VALUE; + goto bad_sw_if_index; + } + + rv = l2pbb_configure (vm, vnm, ntohl (mp->sw_if_index), vtr_op, + mp->b_dmac, mp->b_smac, ntohs (mp->b_vlanid), + ntohl (mp->i_sid), ntohs (mp->outer_tag)); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_L2_INTERFACE_PBB_TAG_REWRITE_REPLY); + +} + +static void +vl_api_punt_t_handler (vl_api_punt_t * mp) +{ + vl_api_punt_reply_t *rmp; + vlib_main_t *vm = vlib_get_main (); + int rv = 0; + clib_error_t *error; + + error = vnet_punt_add_del (vm, mp->ipv, mp->l4_protocol, + ntohs (mp->l4_port), mp->is_add); + if (error) + { + rv = -1; + clib_error_report (error); + } + + REPLY_MACRO (VL_API_PUNT_REPLY); +} + +static void + vl_api_flow_classify_set_interface_t_handler + (vl_api_flow_classify_set_interface_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_flow_classify_set_interface_reply_t *rmp; + int rv; + u32 sw_if_index, ip4_table_index, ip6_table_index; + + ip4_table_index = ntohl (mp->ip4_table_index); + ip6_table_index = ntohl (mp->ip6_table_index); + sw_if_index = ntohl (mp->sw_if_index); + + VALIDATE_SW_IF_INDEX (mp); + + rv = vnet_set_flow_classify_intfc (vm, sw_if_index, ip4_table_index, + ip6_table_index, mp->is_add); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_FLOW_CLASSIFY_SET_INTERFACE_REPLY); +} + +static void +send_flow_classify_details (u32 sw_if_index, + u32 table_index, + unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_flow_classify_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_FLOW_CLASSIFY_DETAILS); + mp->context = context; + mp->sw_if_index = htonl (sw_if_index); + mp->table_index = htonl (table_index); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_flow_classify_dump_t_handler (vl_api_flow_classify_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + flow_classify_main_t *pcm = &flow_classify_main; + u32 *vec_tbl; + int i; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + vec_tbl = pcm->classify_table_index_by_sw_if_index[mp->type]; + + if (vec_len (vec_tbl)) + { + for (i = 0; i < vec_len (vec_tbl); i++) + { + if (vec_elt (vec_tbl, i) == ~0) + continue; + + send_flow_classify_details (i, vec_elt (vec_tbl, i), q, + mp->context); + } + } +} + +static void +vl_api_feature_enable_disable_t_handler (vl_api_feature_enable_disable_t * mp) +{ + vl_api_feature_enable_disable_reply_t *rmp; + int rv = 0; + u8 *arc_name, *feature_name; + + VALIDATE_SW_IF_INDEX (mp); + + arc_name = format (0, "%s%c", mp->arc_name, 0); + feature_name = format (0, "%s%c", mp->feature_name, 0); + + vnet_feature_registration_t *reg; + reg = + vnet_get_feature_reg ((const char *) arc_name, + (const char *) feature_name); + if (reg == 0) + rv = VNET_API_ERROR_INVALID_VALUE; + else + { + u32 sw_if_index; + clib_error_t *error = 0; + + sw_if_index = ntohl (mp->sw_if_index); + if (reg->enable_disable_cb) + error = reg->enable_disable_cb (sw_if_index, mp->enable); + if (!error) + vnet_feature_enable_disable ((const char *) arc_name, + (const char *) feature_name, + sw_if_index, mp->enable, 0, 0); + else + { + clib_error_report (error); + rv = VNET_API_ERROR_CANNOT_ENABLE_DISABLE_FEATURE; + } + } + + vec_free (feature_name); + vec_free (arc_name); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_FEATURE_ENABLE_DISABLE_REPLY); +} + +#define BOUNCE_HANDLER(nn) \ +static void vl_api_##nn##_t_handler ( \ + vl_api_##nn##_t *mp) \ +{ \ + vpe_client_registration_t *reg; \ + vpe_api_main_t * vam = &vpe_api_main; \ + unix_shared_memory_queue_t * q; \ + \ + /* One registration only... */ \ + pool_foreach(reg, vam->nn##_registrations, \ + ({ \ + q = vl_api_client_index_to_input_queue (reg->client_index); \ + if (q) { \ + /* \ + * If the queue is stuffed, turf the msg and complain \ + * It's unlikely that the intended recipient is \ + * alive; avoid deadlock at all costs. \ + */ \ + if (q->cursize == q->maxsize) { \ + clib_warning ("ERROR: receiver queue full, drop msg"); \ + vl_msg_api_free (mp); \ + return; \ + } \ + vl_msg_api_send_shmem (q, (u8 *)&mp); \ + return; \ + } \ + })); \ + vl_msg_api_free (mp); \ +} + +static void setup_message_id_table (api_main_t * am); + +/* + * vpe_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../open-repo/vlib/memclnt_vlib.c:memclnt_process() + */ +static clib_error_t * +vpe_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Manually register the sr tunnel add del msg, so we trace + * enough bytes to capture a typical segment list + */ + vl_msg_api_set_handlers (VL_API_SR_TUNNEL_ADD_DEL, + "sr_tunnel_add_del", + vl_api_sr_tunnel_add_del_t_handler, + vl_noop_handler, + vl_api_sr_tunnel_add_del_t_endian, + vl_api_sr_tunnel_add_del_t_print, 256, 1); + + + /* + * Manually register the sr policy add del msg, so we trace + * enough bytes to capture a typical tunnel name list + */ + vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD_DEL, + "sr_policy_add_del", + vl_api_sr_policy_add_del_t_handler, + vl_noop_handler, + vl_api_sr_policy_add_del_t_endian, + vl_api_sr_policy_add_del_t_print, 256, 1); + + /* + * Trace space for 8 MPLS encap labels, classifier mask+match + */ + am->api_trace_cfg[VL_API_MPLS_TUNNEL_ADD_DEL].size += 8 * sizeof (u32); + am->api_trace_cfg[VL_API_CLASSIFY_ADD_DEL_TABLE].size += 5 * sizeof (u32x4); + am->api_trace_cfg[VL_API_CLASSIFY_ADD_DEL_SESSION].size + += 5 * sizeof (u32x4); + am->api_trace_cfg[VL_API_VXLAN_ADD_DEL_TUNNEL].size += 16 * sizeof (u32); + + /* + * Thread-safe API messages + */ + am->is_mp_safe[VL_API_IP_ADD_DEL_ROUTE] = 1; + am->is_mp_safe[VL_API_GET_NODE_GRAPH] = 1; + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (vpe_api_hookup); + +static clib_error_t * +vpe_api_init (vlib_main_t * vm) +{ + vpe_api_main_t *am = &vpe_api_main; + + am->vlib_main = vm; + am->vnet_main = vnet_get_main (); + am->interface_events_registration_hash = hash_create (0, sizeof (uword)); + am->to_netconf_server_registration_hash = hash_create (0, sizeof (uword)); + am->from_netconf_server_registration_hash = hash_create (0, sizeof (uword)); + am->to_netconf_client_registration_hash = hash_create (0, sizeof (uword)); + am->from_netconf_client_registration_hash = hash_create (0, sizeof (uword)); + am->oam_events_registration_hash = hash_create (0, sizeof (uword)); + am->bfd_events_registration_hash = hash_create (0, sizeof (uword)); + + vl_api_init (vm); + vl_set_memory_region_name ("/vpe-api"); + vl_enable_disable_memory_api (vm, 1 /* enable it */ ); + + return 0; +} + +VLIB_INIT_FUNCTION (vpe_api_init); + + +static clib_error_t * +api_segment_config (vlib_main_t * vm, unformat_input_t * input) +{ + u8 *chroot_path; + u64 baseva, size, pvt_heap_size; + int uid, gid, rv; + const int max_buf_size = 4096; + char *s, *buf; + struct passwd _pw, *pw; + struct group _grp, *grp; + clib_error_t *e; + buf = vec_new (char, 128); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "prefix %s", &chroot_path)) + { + vec_add1 (chroot_path, 0); + vl_set_memory_root_path ((char *) chroot_path); + } + else if (unformat (input, "uid %d", &uid)) + vl_set_memory_uid (uid); + else if (unformat (input, "gid %d", &gid)) + vl_set_memory_gid (gid); + else if (unformat (input, "baseva %llx", &baseva)) + vl_set_global_memory_baseva (baseva); + else if (unformat (input, "global-size %lldM", &size)) + vl_set_global_memory_size (size * (1ULL << 20)); + else if (unformat (input, "global-size %lldG", &size)) + vl_set_global_memory_size (size * (1ULL << 30)); + else if (unformat (input, "global-size %lld", &size)) + vl_set_global_memory_size (size); + else if (unformat (input, "global-pvt-heap-size %lldM", &pvt_heap_size)) + vl_set_global_pvt_heap_size (pvt_heap_size * (1ULL << 20)); + else if (unformat (input, "global-pvt-heap-size size %lld", + &pvt_heap_size)) + vl_set_global_pvt_heap_size (pvt_heap_size); + else if (unformat (input, "api-pvt-heap-size %lldM", &pvt_heap_size)) + vl_set_api_pvt_heap_size (pvt_heap_size * (1ULL << 20)); + else if (unformat (input, "api-pvt-heap-size size %lld", + &pvt_heap_size)) + vl_set_api_pvt_heap_size (pvt_heap_size); + else if (unformat (input, "api-size %lldM", &size)) + vl_set_api_memory_size (size * (1ULL << 20)); + else if (unformat (input, "api-size %lldG", &size)) + vl_set_api_memory_size (size * (1ULL << 30)); + else if (unformat (input, "api-size %lld", &size)) + vl_set_api_memory_size (size); + else if (unformat (input, "uid %s", &s)) + { + /* lookup the username */ + pw = NULL; + while (((rv = + getpwnam_r (s, &_pw, buf, vec_len (buf), &pw)) == ERANGE) + && (vec_len (buf) <= max_buf_size)) + { + vec_resize (buf, vec_len (buf) * 2); + } + if (rv < 0) + { + e = clib_error_return_code (0, rv, + CLIB_ERROR_ERRNO_VALID | + CLIB_ERROR_FATAL, + "cannot fetch username %s", s); + vec_free (s); + vec_free (buf); + return e; + } + if (pw == NULL) + { + e = + clib_error_return_fatal (0, "username %s does not exist", s); + vec_free (s); + vec_free (buf); + return e; + } + vec_free (s); + vl_set_memory_uid (pw->pw_uid); + } + else if (unformat (input, "gid %s", &s)) + { + /* lookup the group name */ + grp = NULL; + while (((rv = + getgrnam_r (s, &_grp, buf, vec_len (buf), &grp)) == ERANGE) + && (vec_len (buf) <= max_buf_size)) + { + vec_resize (buf, vec_len (buf) * 2); + } + if (rv != 0) + { + e = clib_error_return_code (0, rv, + CLIB_ERROR_ERRNO_VALID | + CLIB_ERROR_FATAL, + "cannot fetch group %s", s); + vec_free (s); + vec_free (buf); + return e; + } + if (grp == NULL) + { + e = clib_error_return_fatal (0, "group %s does not exist", s); + vec_free (s); + vec_free (buf); + return e; + } + vec_free (s); + vec_free (buf); + vl_set_memory_gid (grp->gr_gid); + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (api_segment_config, "api-segment"); + +void * +get_unformat_vnet_sw_interface (void) +{ + return (void *) &unformat_vnet_sw_interface; +} + +static u8 * +format_arp_event (u8 * s, va_list * args) +{ + vl_api_ip4_arp_event_t *event = va_arg (*args, vl_api_ip4_arp_event_t *); + + s = format (s, "pid %d: ", event->pid); + if (event->mac_ip) + s = format (s, "bd mac/ip4 binding events"); + else + s = format (s, "resolution for %U", format_ip4_address, &event->address); + return s; +} + +static u8 * +format_nd_event (u8 * s, va_list * args) +{ + vl_api_ip6_nd_event_t *event = va_arg (*args, vl_api_ip6_nd_event_t *); + + s = format (s, "pid %d: ", event->pid); + if (event->mac_ip) + s = format (s, "bd mac/ip6 binding events"); + else + s = format (s, "resolution for %U", format_ip6_address, event->address); + return s; +} + +static clib_error_t * +show_ip_arp_nd_events_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vpe_api_main_t *am = &vpe_api_main; + vl_api_ip4_arp_event_t *arp_event; + vl_api_ip6_nd_event_t *nd_event; + + if ((pool_elts (am->arp_events) == 0) && (pool_elts (am->nd_events) == 0)) + { + vlib_cli_output (vm, "No active arp or nd event registrations"); + return 0; + } + + /* *INDENT-OFF* */ + pool_foreach (arp_event, am->arp_events, + ({ + vlib_cli_output (vm, "%U", format_arp_event, arp_event); + })); + + pool_foreach (nd_event, am->nd_events, + ({ + vlib_cli_output (vm, "%U", format_nd_event, nd_event); + })); + /* *INDENT-ON* */ + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_ip_arp_nd_events, static) = { + .path = "show arp-nd-event registrations", + .function = show_ip_arp_nd_events_fn, + .short_help = "Show ip4 arp and ip6 nd event registrations", +}; +/* *INDENT-ON* */ + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_memclnt; + foreach_vl_msg_name_crc_vpe; +#undef _ +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/api_format.c b/src/vpp/api/api_format.c new file mode 120000 index 00000000..cec72cc0 --- /dev/null +++ b/src/vpp/api/api_format.c @@ -0,0 +1 @@ +../../vat/api_format.c \ No newline at end of file diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c new file mode 100644 index 00000000..db532061 --- /dev/null +++ b/src/vpp/api/api_main.c @@ -0,0 +1,192 @@ +#include "vat.h" + +vat_main_t vat_main; + +void +vat_suspend (vlib_main_t * vm, f64 interval) +{ + vlib_process_suspend (vm, interval); +} + +static u8 * +format_api_error (u8 * s, va_list * args) +{ + vat_main_t *vam = va_arg (*args, vat_main_t *); + i32 error = va_arg (*args, u32); + uword *p; + + p = hash_get (vam->error_string_by_error_number, -error); + + if (p) + s = format (s, "%s", p[0]); + else + s = format (s, "%d", error); + return s; +} + + +static void +init_error_string_table (vat_main_t * vam) +{ + + vam->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (vam->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (vam->error_string_by_error_number, 99, "Misc"); +} + +static clib_error_t * +api_main_init (vlib_main_t * vm) +{ + vat_main_t *vam = &vat_main; + + vam->vlib_main = vm; + vam->my_client_index = (u32) ~ 0; + init_error_string_table (vam); + vat_api_hookup (vam); + return 0; +} + +VLIB_INIT_FUNCTION (api_main_init); + +static clib_error_t * +api_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vat_main_t *vam = &vat_main; + unformat_input_t _input; + uword c; + u8 *cmdp, *argsp, *this_cmd; + uword *p; + u32 arg_len; + int rv; + int (*fp) (vat_main_t *); + api_main_t *am = &api_main; + + vam->vl_input_queue = am->shmem_hdr->vl_input_queue; + + vec_reset_length (vam->inbuf); + vam->input = &_input; + + while (((c = unformat_get_input (input)) != '\n') && + (c != UNFORMAT_END_OF_INPUT)) + vec_add1 (vam->inbuf, c); + + /* Add 1 octet's worth of extra space in case there are no args... */ + vec_add1 (vam->inbuf, 0); + + /*$$$$ reinstall macro evaluator */ + + /* Split input into cmd + args */ + this_cmd = cmdp = vam->inbuf; + + while (cmdp < (this_cmd + vec_len (this_cmd))) + { + if (*cmdp == ' ' || *cmdp == '\t' || *cmdp == '\n') + { + cmdp++; + } + else + break; + } + + argsp = cmdp; + while (argsp < (this_cmd + vec_len (this_cmd))) + { + if (*argsp != ' ' && *argsp != '\t' && *argsp != '\n') + { + argsp++; + } + else + break; + } + *argsp++ = 0; + + while (argsp < (this_cmd + vec_len (this_cmd))) + { + if (*argsp == ' ' || *argsp == '\t' || *argsp == '\n') + { + argsp++; + } + else + break; + } + + /* Blank input line? */ + if (*cmdp == 0) + return 0; + + p = hash_get_mem (vam->function_by_name, cmdp); + if (p == 0) + { + return clib_error_return (0, "'%s': function not found\n", cmdp); + } + + arg_len = strlen ((char *) argsp); + + unformat_init_string (vam->input, (char *) argsp, arg_len); + fp = (void *) p[0]; + + rv = (*fp) (vam); + + if (rv < 0) + { + unformat_free (vam->input); + return clib_error_return (0, + "%s error: %U\n", cmdp, + format_api_error, vam, rv); + + if (vam->regenerate_interface_table) + { + vam->regenerate_interface_table = 0; + api_sw_interface_dump (vam); + } + } + unformat_free (vam->input); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (api_command, static) = +{ + .path = "binary-api", + .short_help = "binary-api []", + .function = api_command_fn, +}; +/* *INDENT-ON* */ + +void +api_cli_output (void *notused, const char *fmt, ...) +{ + va_list va; + vat_main_t *vam = &vat_main; + vlib_main_t *vm = vam->vlib_main; + vlib_process_t *cp = vlib_get_current_process (vm); + u8 *s; + + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + + /* Terminate with \n if not present. */ + if (vec_len (s) > 0 && s[vec_len (s) - 1] != '\n') + vec_add1 (s, '\n'); + + if ((!cp) || (!cp->output_function)) + fformat (stdout, "%v", s); + else + cp->output_function (cp->output_function_arg, s, vec_len (s)); + + vec_free (s); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c new file mode 100644 index 00000000..1964533e --- /dev/null +++ b/src/vpp/api/custom_dump.c @@ -0,0 +1,3139 @@ +/* + *------------------------------------------------------------------ + * custom_dump.c - pretty-print API messages for replay + * + * Copyright (c) 2014-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) + +#define FINISH \ + vec_add1 (s, 0); \ + vl_print (handle, (char *)s); \ + vec_free (s); \ + return handle; + + +static void *vl_api_create_loopback_t_print + (vl_api_create_loopback_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: create_loopback "); + s = format (s, "mac %U ", format_ethernet_address, &mp->mac_address); + + FINISH; +} + +static void *vl_api_delete_loopback_t_print + (vl_api_delete_loopback_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: delete_loopback "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_sw_interface_set_flags_t_print + (vl_api_sw_interface_set_flags_t * mp, void *handle) +{ + u8 *s; + s = format (0, "SCRIPT: sw_interface_set_flags "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + if (mp->admin_up_down) + s = format (s, "admin-up "); + else + s = format (s, "admin-down "); + + if (mp->link_up_down) + s = format (s, "link-up"); + else + s = format (s, "link-down"); + + FINISH; +} + +static void *vl_api_sw_interface_add_del_address_t_print + (vl_api_sw_interface_add_del_address_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_add_del_address "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + if (mp->is_ipv6) + s = format (s, "%U/%d ", format_ip6_address, + (ip6_address_t *) mp->address, mp->address_length); + else + s = format (s, "%U/%d ", format_ip4_address, + (ip4_address_t *) mp->address, mp->address_length); + + if (mp->is_add == 0) + s = format (s, "del "); + if (mp->del_all) + s = format (s, "del-all "); + + FINISH; +} + +static void *vl_api_sw_interface_set_table_t_print + (vl_api_sw_interface_set_table_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_table "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + if (mp->vrf_id) + s = format (s, "vrf %d ", ntohl (mp->vrf_id)); + + if (mp->is_ipv6) + s = format (s, "ipv6 "); + + FINISH; +} + +static void *vl_api_sw_interface_set_mpls_enable_t_print + (vl_api_sw_interface_set_mpls_enable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_mpls_enable "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + if (mp->enable == 0) + s = format (s, "disable"); + + FINISH; +} + +static void *vl_api_sw_interface_set_vpath_t_print + (vl_api_sw_interface_set_vpath_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_vpath "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + if (mp->enable) + s = format (s, "enable "); + else + s = format (s, "disable "); + + FINISH; +} + +static void *vl_api_sw_interface_set_vxlan_bypass_t_print + (vl_api_sw_interface_set_vxlan_bypass_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_vxlan_bypass "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + if (mp->is_ipv6) + s = format (s, "ip6"); + + if (mp->enable) + s = format (s, "enable "); + else + s = format (s, "disable "); + + FINISH; +} + +static void *vl_api_sw_interface_set_l2_xconnect_t_print + (vl_api_sw_interface_set_l2_xconnect_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_l2_xconnect "); + + s = format (s, "sw_if_index %d ", ntohl (mp->rx_sw_if_index)); + + if (mp->enable) + { + s = format (s, "tx_sw_if_index %d ", ntohl (mp->tx_sw_if_index)); + } + else + s = format (s, "delete "); + + FINISH; +} + +static void *vl_api_sw_interface_set_l2_bridge_t_print + (vl_api_sw_interface_set_l2_bridge_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_l2_bridge "); + + s = format (s, "sw_if_index %d ", ntohl (mp->rx_sw_if_index)); + + if (mp->enable) + { + s = format (s, "bd_id %d shg %d %senable ", ntohl (mp->bd_id), + mp->shg, ((mp->bvi) ? "bvi " : " ")); + } + else + s = format (s, "disable "); + + FINISH; +} + +static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_pipe "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = format (s, "subport %u pipe %u profile %u ", + ntohl (mp->subport), ntohl (mp->pipe), ntohl (mp->profile)); + + FINISH; +} + +static void *vl_api_sw_interface_set_dpdk_hqos_subport_t_print + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_subport "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = + format (s, + "subport %u rate %u bkt_size %u tc0 %u tc1 %u tc2 %u tc3 %u period %u", + ntohl (mp->subport), ntohl (mp->tb_rate), ntohl (mp->tb_size), + ntohl (mp->tc_rate[0]), ntohl (mp->tc_rate[1]), + ntohl (mp->tc_rate[2]), ntohl (mp->tc_rate[3]), + ntohl (mp->tc_period)); + + FINISH; +} + +static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_tctbl "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = format (s, "entry %u tc %u queue %u", + ntohl (mp->entry), ntohl (mp->tc), ntohl (mp->queue)); + + FINISH; +} + +static void *vl_api_bridge_domain_add_del_t_print + (vl_api_bridge_domain_add_del_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: bridge_domain_add_del "); + + s = format (s, "bd_id %d ", ntohl (mp->bd_id)); + + if (mp->is_add) + { + s = format (s, "flood %d uu-flood %d forward %d learn %d arp-term %d", + mp->flood, mp->uu_flood, mp->forward, mp->learn, + mp->arp_term); + } + else + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_bridge_domain_dump_t_print + (vl_api_bridge_domain_dump_t * mp, void *handle) +{ + u8 *s; + u32 bd_id = ntohl (mp->bd_id); + + s = format (0, "SCRIPT: bridge_domain_dump "); + + if (bd_id != ~0) + s = format (s, "bd_id %d ", bd_id); + + FINISH; +} + +static void *vl_api_l2fib_add_del_t_print + (vl_api_l2fib_add_del_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: l2fib_add_del "); + + s = format (s, "mac %U ", format_ethernet_address, &mp->mac); + + s = format (s, "bd_id %d ", ntohl (mp->bd_id)); + + + if (mp->is_add) + { + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + if (mp->static_mac) + s = format (s, "%s", "static "); + if (mp->filter_mac) + s = format (s, "%s", "filter "); + if (mp->bvi_mac) + s = format (s, "%s", "bvi "); + } + else + { + s = format (s, "del "); + } + + FINISH; +} + +static void * +vl_api_l2_flags_t_print (vl_api_l2_flags_t * mp, void *handle) +{ + u8 *s; + u32 flags = ntohl (mp->feature_bitmap); + + s = format (0, "SCRIPT: l2_flags "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + +#define _(a,b) \ + if (flags & L2INPUT_FEAT_ ## a) s = format (s, #a " "); + foreach_l2input_feat; +#undef _ + + FINISH; +} + +static void *vl_api_bridge_flags_t_print + (vl_api_bridge_flags_t * mp, void *handle) +{ + u8 *s; + u32 flags = ntohl (mp->feature_bitmap); + + s = format (0, "SCRIPT: bridge_flags "); + + s = format (s, "bd_id %d ", ntohl (mp->bd_id)); + + if (flags & L2_LEARN) + s = format (s, "learn "); + if (flags & L2_FWD) + s = format (s, "forward "); + if (flags & L2_FLOOD) + s = format (s, "flood "); + if (flags & L2_UU_FLOOD) + s = format (s, "uu-flood "); + if (flags & L2_ARP_TERM) + s = format (s, "arp-term "); + + if (mp->is_set == 0) + s = format (s, "clear "); + + FINISH; +} + +static void *vl_api_bd_ip_mac_add_del_t_print + (vl_api_bd_ip_mac_add_del_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: bd_ip_mac_add_del "); + s = format (s, "bd_id %d ", ntohl (mp->bd_id)); + + if (mp->is_ipv6) + s = format (s, "%U ", format_ip6_address, + (ip6_address_t *) mp->ip_address); + else + s = format (s, "%U ", format_ip4_address, + (ip4_address_t *) mp->ip_address); + + s = format (s, "%U ", format_ethernet_address, mp->mac_address); + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_tap_connect_t_print + (vl_api_tap_connect_t * mp, void *handle) +{ + u8 *s; + u8 null_mac[6]; + + memset (null_mac, 0, sizeof (null_mac)); + + s = format (0, "SCRIPT: tap_connect "); + s = format (s, "tapname %s ", mp->tap_name); + if (mp->use_random_mac) + s = format (s, "random-mac "); + if (mp->tag[0]) + s = format (s, "tag %s ", mp->tag); + if (memcmp (mp->mac_address, null_mac, 6)) + s = format (s, "mac %U ", format_ethernet_address, mp->mac_address); + + FINISH; +} + +static void *vl_api_tap_modify_t_print + (vl_api_tap_modify_t * mp, void *handle) +{ + u8 *s; + u8 null_mac[6]; + + memset (null_mac, 0, sizeof (null_mac)); + + s = format (0, "SCRIPT: tap_modify "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + s = format (s, "tapname %s ", mp->tap_name); + if (mp->use_random_mac) + s = format (s, "random-mac "); + + if (memcmp (mp->mac_address, null_mac, 6)) + s = format (s, "mac %U ", format_ethernet_address, mp->mac_address); + + FINISH; +} + +static void *vl_api_tap_delete_t_print + (vl_api_tap_delete_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: tap_delete "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_sw_interface_tap_dump_t_print + (vl_api_sw_interface_tap_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_tap_dump "); + + FINISH; +} + + +static void *vl_api_ip_add_del_route_t_print + (vl_api_ip_add_del_route_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ip_add_del_route "); + if (mp->is_add == 0) + s = format (s, "del "); + + if (mp->next_hop_sw_if_index) + s = format (s, "sw_if_index %d ", ntohl (mp->next_hop_sw_if_index)); + + if (mp->is_ipv6) + s = format (s, "%U/%d ", format_ip6_address, mp->dst_address, + mp->dst_address_length); + else + s = format (s, "%U/%d ", format_ip4_address, mp->dst_address, + mp->dst_address_length); + if (mp->is_local) + s = format (s, "local "); + else if (mp->is_drop) + s = format (s, "drop "); + else if (mp->is_classify) + s = format (s, "classify %d", ntohl (mp->classify_table_index)); + else + { + if (mp->is_ipv6) + s = format (s, "via %U ", format_ip6_address, mp->next_hop_address); + else + s = format (s, "via %U ", format_ip4_address, mp->next_hop_address); + } + + if (mp->table_id != 0) + s = format (s, "vrf %d ", ntohl (mp->table_id)); + + if (mp->create_vrf_if_needed) + s = format (s, "create-vrf "); + + if (mp->next_hop_weight != 1) + s = format (s, "weight %d ", mp->next_hop_weight); + + if (mp->not_last) + s = format (s, "not-last "); + + if (mp->is_multipath) + s = format (s, "multipath "); + + if (mp->is_multipath) + s = format (s, "multipath "); + + if (mp->next_hop_table_id) + s = format (s, "lookup-in-vrf %d ", ntohl (mp->next_hop_table_id)); + + FINISH; +} + +static void *vl_api_proxy_arp_add_del_t_print + (vl_api_proxy_arp_add_del_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: proxy_arp_add_del "); + + s = format (s, "%U - %U ", format_ip4_address, mp->low_address, + format_ip4_address, mp->hi_address); + + if (mp->vrf_id) + s = format (s, "vrf %d ", ntohl (mp->vrf_id)); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_proxy_arp_intfc_enable_disable_t_print + (vl_api_proxy_arp_intfc_enable_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: proxy_arp_intfc_enable_disable "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + s = format (s, "enable %d ", mp->enable_disable); + + FINISH; +} + +static void *vl_api_mpls_tunnel_add_del_t_print + (vl_api_mpls_tunnel_add_del_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: mpls_tunnel_add_del "); + + if (mp->mt_next_hop_sw_if_index) + s = format (s, "sw_if_index %d ", ntohl (mp->mt_next_hop_sw_if_index)); + + if (mp->mt_next_hop_proto_is_ip4) + s = format (s, "%U ", format_ip4_address, mp->mt_next_hop); + else + s = format (s, "%U ", format_ip6_address, mp->mt_next_hop); + + if (mp->mt_l2_only) + s = format (s, "l2-only "); + + if (mp->mt_is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_sw_interface_set_unnumbered_t_print + (vl_api_sw_interface_set_unnumbered_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_unnumbered "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + s = format (s, "unnum_if_index %d ", ntohl (mp->unnumbered_sw_if_index)); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_ip_neighbor_add_del_t_print + (vl_api_ip_neighbor_add_del_t * mp, void *handle) +{ + u8 *s; + u8 null_mac[6]; + + memset (null_mac, 0, sizeof (null_mac)); + + s = format (0, "SCRIPT: ip_neighbor_add_del "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + if (mp->is_static) + s = format (s, "is_static "); + + s = format (s, "vrf_id %d ", ntohl (mp->vrf_id)); + + if (memcmp (mp->mac_address, null_mac, 6)) + s = format (s, "mac %U ", format_ethernet_address, mp->mac_address); + + if (mp->is_ipv6) + s = + format (s, "dst %U ", format_ip6_address, + (ip6_address_t *) mp->dst_address); + else + s = + format (s, "dst %U ", format_ip4_address, + (ip4_address_t *) mp->dst_address); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void * +vl_api_reset_vrf_t_print (vl_api_reset_vrf_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: reset_vrf "); + + if (mp->vrf_id) + s = format (s, "vrf %d ", ntohl (mp->vrf_id)); + + if (mp->is_ipv6 != 0) + s = format (s, "ipv6 "); + + FINISH; +} + +static void *vl_api_create_vlan_subif_t_print + (vl_api_create_vlan_subif_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: create_vlan_subif "); + + if (mp->sw_if_index) + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + if (mp->vlan_id) + s = format (s, "vlan_id %d ", ntohl (mp->vlan_id)); + + FINISH; +} + +#define foreach_create_subif_bit \ +_(no_tags) \ +_(one_tag) \ +_(two_tags) \ +_(dot1ad) \ +_(exact_match) \ +_(default_sub) \ +_(outer_vlan_id_any) \ +_(inner_vlan_id_any) + +static void *vl_api_create_subif_t_print + (vl_api_create_subif_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: create_subif "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + s = format (s, "sub_id %d ", ntohl (mp->sub_id)); + + if (mp->outer_vlan_id) + s = format (s, "outer_vlan_id %d ", ntohs (mp->outer_vlan_id)); + + if (mp->inner_vlan_id) + s = format (s, "inner_vlan_id %d ", ntohs (mp->inner_vlan_id)); + +#define _(a) if (mp->a) s = format (s, "%s ", #a); + foreach_create_subif_bit; +#undef _ + + FINISH; +} + +static void *vl_api_delete_subif_t_print + (vl_api_delete_subif_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: delete_subif "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_oam_add_del_t_print + (vl_api_oam_add_del_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: oam_add_del "); + + if (mp->vrf_id) + s = format (s, "vrf %d ", ntohl (mp->vrf_id)); + + s = format (s, "src %U ", format_ip4_address, mp->src_address); + + s = format (s, "dst %U ", format_ip4_address, mp->dst_address); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void * +vl_api_reset_fib_t_print (vl_api_reset_fib_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: reset_fib "); + + if (mp->vrf_id) + s = format (s, "vrf %d ", ntohl (mp->vrf_id)); + + if (mp->is_ipv6 != 0) + s = format (s, "ipv6 "); + + FINISH; +} + +static void *vl_api_dhcp_proxy_config_t_print + (vl_api_dhcp_proxy_config_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: dhcp_proxy_config "); + + s = format (s, "vrf_id %d ", ntohl (mp->vrf_id)); + + if (mp->is_ipv6) + { + s = format (s, "svr %U ", format_ip6_address, + (ip6_address_t *) mp->dhcp_server); + s = format (s, "src %U ", format_ip6_address, + (ip6_address_t *) mp->dhcp_src_address); + } + else + { + s = format (s, "svr %U ", format_ip4_address, + (ip4_address_t *) mp->dhcp_server); + s = format (s, "src %U ", format_ip4_address, + (ip4_address_t *) mp->dhcp_src_address); + } + if (mp->is_add == 0) + s = format (s, "del "); + + s = format (s, "insert-cid %d ", mp->insert_circuit_id); + + FINISH; +} + +static void *vl_api_dhcp_proxy_config_2_t_print + (vl_api_dhcp_proxy_config_2_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: dhcp_proxy_config_2 "); + + s = format (s, "rx_vrf_id %d ", ntohl (mp->rx_vrf_id)); + s = format (s, "server_vrf_id %d ", ntohl (mp->server_vrf_id)); + + if (mp->is_ipv6) + { + s = format (s, "svr %U ", format_ip6_address, + (ip6_address_t *) mp->dhcp_server); + s = format (s, "src %U ", format_ip6_address, + (ip6_address_t *) mp->dhcp_src_address); + } + else + { + s = format (s, "svr %U ", format_ip4_address, + (ip4_address_t *) mp->dhcp_server); + s = format (s, "src %U ", format_ip4_address, + (ip4_address_t *) mp->dhcp_src_address); + } + if (mp->is_add == 0) + s = format (s, "del "); + + s = format (s, "insert-cid %d ", mp->insert_circuit_id); + + FINISH; +} + +static void *vl_api_dhcp_proxy_set_vss_t_print + (vl_api_dhcp_proxy_set_vss_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: dhcp_proxy_set_vss "); + + s = format (s, "tbl_id %d ", ntohl (mp->tbl_id)); + + s = format (s, "fib_id %d ", ntohl (mp->fib_id)); + + s = format (s, "oui %d ", ntohl (mp->oui)); + + if (mp->is_ipv6 != 0) + s = format (s, "ipv6 "); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_dhcp_client_config_t_print + (vl_api_dhcp_client_config_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: dhcp_client_config "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + s = format (s, "hostname %s ", mp->hostname); + + s = format (s, "want_dhcp_event %d ", mp->want_dhcp_event); + + s = format (s, "pid %d ", mp->pid); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + + +static void *vl_api_set_ip_flow_hash_t_print + (vl_api_set_ip_flow_hash_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: set_ip_flow_hash "); + + s = format (s, "vrf_id %d ", ntohl (mp->vrf_id)); + + if (mp->src) + s = format (s, "src "); + + if (mp->dst) + s = format (s, "dst "); + + if (mp->sport) + s = format (s, "sport "); + + if (mp->dport) + s = format (s, "dport "); + + if (mp->proto) + s = format (s, "proto "); + + if (mp->reverse) + s = format (s, "reverse "); + + if (mp->is_ipv6 != 0) + s = format (s, "ipv6 "); + + FINISH; +} + +static void *vl_api_sw_interface_ip6_set_link_local_address_t_print + (vl_api_sw_interface_ip6_set_link_local_address_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_ip6_set_link_local_address "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + s = format (s, "%U/%d ", format_ip6_address, mp->address, + mp->address_length); + + FINISH; +} + +static void *vl_api_sw_interface_ip6nd_ra_prefix_t_print + (vl_api_sw_interface_ip6nd_ra_prefix_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_ip6nd_ra_prefix "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + s = format (s, "%U/%d ", format_ip6_address, mp->address, + mp->address_length); + + s = format (s, "val_life %d ", ntohl (mp->val_lifetime)); + + s = format (s, "pref_life %d ", ntohl (mp->pref_lifetime)); + + if (mp->use_default) + s = format (s, "def "); + + if (mp->no_advertise) + s = format (s, "noadv "); + + if (mp->off_link) + s = format (s, "offl "); + + if (mp->no_autoconfig) + s = format (s, "noauto "); + + if (mp->no_onlink) + s = format (s, "nolink "); + + if (mp->is_no) + s = format (s, "isno "); + + FINISH; +} + +static void *vl_api_sw_interface_ip6nd_ra_config_t_print + (vl_api_sw_interface_ip6nd_ra_config_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_ip6nd_ra_config "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + s = format (s, "maxint %d ", ntohl (mp->max_interval)); + + s = format (s, "minint %d ", ntohl (mp->min_interval)); + + s = format (s, "life %d ", ntohl (mp->lifetime)); + + s = format (s, "count %d ", ntohl (mp->initial_count)); + + s = format (s, "interval %d ", ntohl (mp->initial_interval)); + + if (mp->suppress) + s = format (s, "suppress "); + + if (mp->managed) + s = format (s, "managed "); + + if (mp->other) + s = format (s, "other "); + + if (mp->ll_option) + s = format (s, "ll "); + + if (mp->send_unicast) + s = format (s, "send "); + + if (mp->cease) + s = format (s, "cease "); + + if (mp->is_no) + s = format (s, "isno "); + + if (mp->default_router) + s = format (s, "def "); + + FINISH; +} + +static void *vl_api_set_arp_neighbor_limit_t_print + (vl_api_set_arp_neighbor_limit_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: set_arp_neighbor_limit "); + + s = format (s, "arp_nbr_limit %d ", ntohl (mp->arp_neighbor_limit)); + + if (mp->is_ipv6 != 0) + s = format (s, "ipv6 "); + + FINISH; +} + +static void *vl_api_l2_patch_add_del_t_print + (vl_api_l2_patch_add_del_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: l2_patch_add_del "); + + s = format (s, "rx_sw_if_index %d ", ntohl (mp->rx_sw_if_index)); + + s = format (s, "tx_sw_if_index %d ", ntohl (mp->tx_sw_if_index)); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_sr_tunnel_add_del_t_print + (vl_api_sr_tunnel_add_del_t * mp, void *handle) +{ + u8 *s; + ip6_address_t *this_address; + int i; + u16 flags_host_byte_order; + u8 pl_flag; + + s = format (0, "SCRIPT: sr_tunnel_add_del "); + + if (mp->name[0]) + s = format (s, "name %s ", mp->name); + + s = format (s, "src %U dst %U/%d ", format_ip6_address, + (ip6_address_t *) mp->src_address, + format_ip6_address, + (ip6_address_t *) mp->dst_address, mp->dst_mask_width); + + this_address = (ip6_address_t *) mp->segs_and_tags; + for (i = 0; i < mp->n_segments; i++) + { + s = format (s, "next %U ", format_ip6_address, this_address); + this_address++; + } + for (i = 0; i < mp->n_tags; i++) + { + s = format (s, "tag %U ", format_ip6_address, this_address); + this_address++; + } + + flags_host_byte_order = clib_net_to_host_u16 (mp->flags_net_byte_order); + + if (flags_host_byte_order & IP6_SR_HEADER_FLAG_CLEANUP) + s = format (s, " clean "); + + if (flags_host_byte_order & IP6_SR_HEADER_FLAG_PROTECTED) + s = format (s, "protected "); + + for (i = 1; i <= 4; i++) + { + pl_flag = ip6_sr_policy_list_flags (flags_host_byte_order, i); + + switch (pl_flag) + { + case IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT: + continue; + + case IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE: + s = format (s, "InPE %d ", i); + break; + + case IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE: + s = format (s, "EgPE %d ", i); + break; + + case IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR: + s = format (s, "OrgSrc %d ", i); + break; + + default: + clib_warning ("BUG: pl elt %d value %d", i, pl_flag); + break; + } + } + + if (mp->policy_name[0]) + s = format (s, "policy_name %s ", mp->policy_name); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_sr_policy_add_del_t_print + (vl_api_sr_policy_add_del_t * mp, void *handle) +{ + u8 *s; + int i; + + s = format (0, "SCRIPT: sr_policy_add_del "); + + if (mp->name[0]) + s = format (s, "name %s ", mp->name); + + + if (mp->tunnel_names[0]) + { + // start deserializing tunnel_names + int num_tunnels = mp->tunnel_names[0]; //number of tunnels + u8 *deser_tun_names = mp->tunnel_names; + deser_tun_names += 1; //moving along + + u8 *tun_name = 0; + int tun_name_len = 0; + + for (i = 0; i < num_tunnels; i++) + { + tun_name_len = *deser_tun_names; + deser_tun_names += 1; + vec_resize (tun_name, tun_name_len); + memcpy (tun_name, deser_tun_names, tun_name_len); + s = format (s, "tunnel %s ", tun_name); + deser_tun_names += tun_name_len; + tun_name = 0; + } + } + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_sr_multicast_map_add_del_t_print + (vl_api_sr_multicast_map_add_del_t * mp, void *handle) +{ + + u8 *s = 0; + /* int i; */ + + s = format (0, "SCRIPT: sr_multicast_map_add_del "); + + if (mp->multicast_address[0]) + s = format (s, "address %U ", format_ip6_address, &mp->multicast_address); + + if (mp->policy_name[0]) + s = format (s, "sr-policy %s ", &mp->policy_name); + + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + + +static void *vl_api_classify_add_del_table_t_print + (vl_api_classify_add_del_table_t * mp, void *handle) +{ + u8 *s; + int i; + + s = format (0, "SCRIPT: classify_add_del_table "); + + if (mp->is_add == 0) + { + s = format (s, "table %d ", ntohl (mp->table_index)); + s = format (s, "%s ", mp->del_chain ? "del-chain" : "del"); + } + else + { + s = format (s, "nbuckets %d ", ntohl (mp->nbuckets)); + s = format (s, "memory_size %d ", ntohl (mp->memory_size)); + s = format (s, "skip %d ", ntohl (mp->skip_n_vectors)); + s = format (s, "match %d ", ntohl (mp->match_n_vectors)); + s = format (s, "next-table %d ", ntohl (mp->next_table_index)); + s = format (s, "miss-next %d ", ntohl (mp->miss_next_index)); + s = format (s, "current-data-flag %d ", ntohl (mp->current_data_flag)); + if (mp->current_data_flag) + s = format (s, "current-data-offset %d ", + ntohl (mp->current_data_offset)); + s = format (s, "mask hex "); + for (i = 0; i < ntohl (mp->match_n_vectors) * sizeof (u32x4); i++) + s = format (s, "%02x", mp->mask[i]); + vec_add1 (s, ' '); + } + + FINISH; +} + +static void *vl_api_classify_add_del_session_t_print + (vl_api_classify_add_del_session_t * mp, void *handle) +{ + u8 *s; + int i, limit = 0; + + s = format (0, "SCRIPT: classify_add_del_session "); + + s = format (s, "table_index %d ", ntohl (mp->table_index)); + s = format (s, "hit_next_index %d ", ntohl (mp->hit_next_index)); + s = format (s, "opaque_index %d ", ntohl (mp->opaque_index)); + s = format (s, "advance %d ", ntohl (mp->advance)); + s = format (s, "action %d ", mp->action); + if (mp->action) + s = format (s, "metadata %d ", ntohl (mp->metadata)); + if (mp->is_add == 0) + s = format (s, "del "); + + s = format (s, "match hex "); + for (i = 5 * sizeof (u32x4) - 1; i > 0; i--) + { + if (mp->match[i] != 0) + { + limit = i + 1; + break; + } + } + + for (i = 0; i < limit; i++) + s = format (s, "%02x", mp->match[i]); + + FINISH; +} + +static void *vl_api_classify_set_interface_ip_table_t_print + (vl_api_classify_set_interface_ip_table_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: classify_set_interface_ip_table "); + + if (mp->is_ipv6) + s = format (s, "ipv6 "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + s = format (s, "table %d ", ntohl (mp->table_index)); + + FINISH; +} + +static void *vl_api_classify_set_interface_l2_tables_t_print + (vl_api_classify_set_interface_l2_tables_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: classify_set_interface_l2_tables "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + s = format (s, "ip4-table %d ", ntohl (mp->ip4_table_index)); + s = format (s, "ip6-table %d ", ntohl (mp->ip6_table_index)); + s = format (s, "other-table %d ", ntohl (mp->other_table_index)); + s = format (s, "is-input %d ", mp->is_input); + + FINISH; +} + +static void *vl_api_add_node_next_t_print + (vl_api_add_node_next_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: add_node_next "); + + s = format (0, "node %s next %s ", mp->node_name, mp->next_name); + + FINISH; +} + +static void *vl_api_l2tpv3_create_tunnel_t_print + (vl_api_l2tpv3_create_tunnel_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: l2tpv3_create_tunnel "); + + s = format (s, "client_address %U our_address %U ", + format_ip6_address, (ip6_address_t *) (mp->client_address), + format_ip6_address, (ip6_address_t *) (mp->our_address)); + s = format (s, "local_session_id %d ", ntohl (mp->local_session_id)); + s = format (s, "remote_session_id %d ", ntohl (mp->remote_session_id)); + s = format (s, "local_cookie %lld ", + clib_net_to_host_u64 (mp->local_cookie)); + s = format (s, "remote_cookie %lld ", + clib_net_to_host_u64 (mp->remote_cookie)); + if (mp->l2_sublayer_present) + s = format (s, "l2-sublayer-present "); + + FINISH; +} + +static void *vl_api_l2tpv3_set_tunnel_cookies_t_print + (vl_api_l2tpv3_set_tunnel_cookies_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: l2tpv3_set_tunnel_cookies "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + s = format (s, "new_local_cookie %llu ", + clib_net_to_host_u64 (mp->new_local_cookie)); + + s = format (s, "new_remote_cookie %llu ", + clib_net_to_host_u64 (mp->new_remote_cookie)); + + FINISH; +} + +static void *vl_api_l2tpv3_interface_enable_disable_t_print + (vl_api_l2tpv3_interface_enable_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: l2tpv3_interface_enable_disable "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + if (mp->enable_disable == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_l2tpv3_set_lookup_key_t_print + (vl_api_l2tpv3_set_lookup_key_t * mp, void *handle) +{ + u8 *s; + char *str = "unknown"; + + s = format (0, "SCRIPT: l2tpv3_set_lookup_key "); + + switch (mp->key) + { + case L2T_LOOKUP_SRC_ADDRESS: + str = "lookup_v6_src"; + break; + case L2T_LOOKUP_DST_ADDRESS: + str = "lookup_v6_dst"; + break; + case L2T_LOOKUP_SESSION_ID: + str = "lookup_session_id"; + break; + default: + break; + } + + s = format (s, "%s ", str); + + FINISH; +} + +static void *vl_api_sw_if_l2tpv3_tunnel_dump_t_print + (vl_api_sw_if_l2tpv3_tunnel_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_if_l2tpv3_tunnel_dump "); + + FINISH; +} + +static void *vl_api_vxlan_add_del_tunnel_t_print + (vl_api_vxlan_add_del_tunnel_t * mp, void *handle) +{ + u8 *s; + s = format (0, "SCRIPT: vxlan_add_del_tunnel "); + + ip46_address_t src, dst; + + ip46_from_addr_buf (mp->is_ipv6, mp->dst_address, &dst); + ip46_from_addr_buf (mp->is_ipv6, mp->src_address, &src); + + u8 is_grp = ip46_address_is_multicast (&dst); + char *dst_name = is_grp ? "group" : "dst"; + + s = format (s, "src %U ", format_ip46_address, &src, IP46_TYPE_ANY); + s = format (s, "%s %U ", dst_name, format_ip46_address, + &dst, IP46_TYPE_ANY); + + if (is_grp) + s = format (s, "mcast_sw_if_index %d ", ntohl (mp->mcast_sw_if_index)); + + if (mp->encap_vrf_id) + s = format (s, "encap-vrf-id %d ", ntohl (mp->encap_vrf_id)); + + s = format (s, "decap-next %d ", ntohl (mp->decap_next_index)); + + s = format (s, "vni %d ", ntohl (mp->vni)); + + if (mp->is_add == 0) + s = format (s, "del "); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_vxlan_tunnel_dump_t_print + (vl_api_vxlan_tunnel_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: vxlan_tunnel_dump "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_gre_add_del_tunnel_t_print + (vl_api_gre_add_del_tunnel_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: gre_add_del_tunnel "); + + s = format (s, "dst %U ", format_ip46_address, + (ip46_address_t *) & (mp->dst_address), + mp->is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4); + + s = format (s, "src %U ", format_ip46_address, + (ip46_address_t *) & (mp->src_address), + mp->is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4); + + if (mp->teb) + s = format (s, "teb "); + + if (mp->outer_fib_id) + s = format (s, "outer-fib-id %d ", ntohl (mp->outer_fib_id)); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_gre_tunnel_dump_t_print + (vl_api_gre_tunnel_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: gre_tunnel_dump "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_l2_fib_clear_table_t_print + (vl_api_l2_fib_clear_table_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: l2_fib_clear_table "); + + FINISH; +} + +static void *vl_api_l2_interface_efp_filter_t_print + (vl_api_l2_interface_efp_filter_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: l2_interface_efp_filter "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + if (mp->enable_disable) + s = format (s, "enable "); + else + s = format (s, "disable "); + + FINISH; +} + +static void *vl_api_l2_interface_vlan_tag_rewrite_t_print + (vl_api_l2_interface_vlan_tag_rewrite_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: l2_interface_vlan_tag_rewrite "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + s = format (s, "vtr_op %d ", ntohl (mp->vtr_op)); + s = format (s, "push_dot1q %d ", ntohl (mp->push_dot1q)); + s = format (s, "tag1 %d ", ntohl (mp->tag1)); + s = format (s, "tag2 %d ", ntohl (mp->tag2)); + + FINISH; +} + +static void *vl_api_create_vhost_user_if_t_print + (vl_api_create_vhost_user_if_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: create_vhost_user_if "); + + s = format (s, "socket %s ", mp->sock_filename); + if (mp->is_server) + s = format (s, "server "); + if (mp->renumber) + s = format (s, "renumber %d ", ntohl (mp->custom_dev_instance)); + if (mp->tag[0]) + s = format (s, "tag %s", mp->tag); + + FINISH; +} + +static void *vl_api_modify_vhost_user_if_t_print + (vl_api_modify_vhost_user_if_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: modify_vhost_user_if "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + s = format (s, "socket %s ", mp->sock_filename); + if (mp->is_server) + s = format (s, "server "); + if (mp->renumber) + s = format (s, "renumber %d ", ntohl (mp->custom_dev_instance)); + + FINISH; +} + +static void *vl_api_delete_vhost_user_if_t_print + (vl_api_delete_vhost_user_if_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: delete_vhost_user_if "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_sw_interface_vhost_user_dump_t_print + (vl_api_sw_interface_vhost_user_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_vhost_user_dump "); + + FINISH; +} + +static void *vl_api_sw_interface_dump_t_print + (vl_api_sw_interface_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_dump "); + + if (mp->name_filter_valid) + s = format (s, "name_filter %s ", mp->name_filter); + else + s = format (s, "all "); + + FINISH; +} + +static void *vl_api_l2_fib_table_dump_t_print + (vl_api_l2_fib_table_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: l2_fib_table_dump "); + + s = format (s, "bd_id %d ", ntohl (mp->bd_id)); + + FINISH; +} + +static void *vl_api_control_ping_t_print + (vl_api_control_ping_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: control_ping "); + + FINISH; +} + +static void *vl_api_want_interface_events_t_print + (vl_api_want_interface_events_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: want_interface_events pid %d enable %d ", + ntohl (mp->pid), ntohl (mp->enable_disable)); + + FINISH; +} + +static void *vl_api_cli_request_t_print + (vl_api_cli_request_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: cli_request "); + + FINISH; +} + +static void *vl_api_cli_inband_t_print + (vl_api_cli_inband_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: cli_inband "); + + FINISH; +} + +static void *vl_api_memclnt_create_t_print + (vl_api_memclnt_create_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: memclnt_create name %s ", mp->name); + + FINISH; +} + +static void *vl_api_show_version_t_print + (vl_api_show_version_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: show_version "); + + FINISH; +} + +static void *vl_api_vxlan_gpe_add_del_tunnel_t_print + (vl_api_vxlan_gpe_add_del_tunnel_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: vxlan_gpe_add_del_tunnel "); + + s = format (s, "local %U ", format_ip46_address, &mp->local, mp->is_ipv6); + + s = format (s, "remote %U ", format_ip46_address, &mp->remote, mp->is_ipv6); + + s = format (s, "protocol %d ", ntohl (mp->protocol)); + + s = format (s, "vni %d ", ntohl (mp->vni)); + + if (mp->is_add == 0) + s = format (s, "del "); + + if (mp->encap_vrf_id) + s = format (s, "encap-vrf-id %d ", ntohl (mp->encap_vrf_id)); + + if (mp->decap_vrf_id) + s = format (s, "decap-vrf-id %d ", ntohl (mp->decap_vrf_id)); + + FINISH; +} + +static void *vl_api_vxlan_gpe_tunnel_dump_t_print + (vl_api_vxlan_gpe_tunnel_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: vxlan_gpe_tunnel_dump "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_interface_name_renumber_t_print + (vl_api_interface_name_renumber_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: interface_renumber "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + s = format (s, "new_show_dev_instance %d ", + ntohl (mp->new_show_dev_instance)); + + FINISH; +} + +static void *vl_api_want_ip4_arp_events_t_print + (vl_api_want_ip4_arp_events_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: want_ip4_arp_events "); + s = format (s, "pid %d address %U ", mp->pid, + format_ip4_address, &mp->address); + if (mp->enable_disable == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_want_ip6_nd_events_t_print + (vl_api_want_ip6_nd_events_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: want_ip6_nd_events "); + s = format (s, "pid %d address %U ", mp->pid, + format_ip6_address, mp->address); + if (mp->enable_disable == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_input_acl_set_interface_t_print + (vl_api_input_acl_set_interface_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: input_acl_set_interface "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + s = format (s, "ip4-table %d ", ntohl (mp->ip4_table_index)); + s = format (s, "ip6-table %d ", ntohl (mp->ip6_table_index)); + s = format (s, "l2-table %d ", ntohl (mp->l2_table_index)); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_ip_address_dump_t_print + (vl_api_ip_address_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ip6_address_dump "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + s = format (s, "is_ipv6 %d ", mp->is_ipv6 != 0); + + FINISH; +} + +static void * +vl_api_ip_dump_t_print (vl_api_ip_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ip_dump "); + s = format (s, "is_ipv6 %d ", mp->is_ipv6 != 0); + + FINISH; +} + +static void *vl_api_cop_interface_enable_disable_t_print + (vl_api_cop_interface_enable_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: cop_interface_enable_disable "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + if (mp->enable_disable) + s = format (s, "enable "); + else + s = format (s, "disable "); + + FINISH; +} + +static void *vl_api_cop_whitelist_enable_disable_t_print + (vl_api_cop_whitelist_enable_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: cop_whitelist_enable_disable "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + s = format (s, "fib-id %d ", ntohl (mp->fib_id)); + if (mp->ip4) + s = format (s, "ip4 "); + if (mp->ip6) + s = format (s, "ip6 "); + if (mp->default_cop) + s = format (s, "default "); + + FINISH; +} + +static void *vl_api_af_packet_create_t_print + (vl_api_af_packet_create_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: af_packet_create "); + s = format (s, "host_if_name %s ", mp->host_if_name); + if (mp->use_random_hw_addr) + s = format (s, "hw_addr random "); + else + s = format (s, "hw_addr %U ", format_ethernet_address, mp->hw_addr); + + FINISH; +} + +static void *vl_api_af_packet_delete_t_print + (vl_api_af_packet_delete_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: af_packet_delete "); + s = format (s, "host_if_name %s ", mp->host_if_name); + + FINISH; +} + +static u8 * +format_policer_action (u8 * s, va_list * va) +{ + u32 action = va_arg (*va, u32); + u32 dscp = va_arg (*va, u32); + char *t = 0; + + if (action == SSE2_QOS_ACTION_DROP) + s = format (s, "drop"); + else if (action == SSE2_QOS_ACTION_TRANSMIT) + s = format (s, "transmit"); + else if (action == SSE2_QOS_ACTION_MARK_AND_TRANSMIT) + { + s = format (s, "mark-and-transmit "); + switch (dscp) + { +#define _(v,f,str) case VNET_DSCP_##f: t = str; break; + foreach_vnet_dscp +#undef _ + default: + break; + } + s = format (s, "%s", t); + } + + return s; +} + +static void *vl_api_policer_add_del_t_print + (vl_api_policer_add_del_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: policer_add_del "); + s = format (s, "name %s ", mp->name); + s = format (s, "cir %d ", mp->cir); + s = format (s, "eir %d ", mp->eir); + s = format (s, "cb %d ", mp->cb); + s = format (s, "eb %d ", mp->eb); + + switch (mp->rate_type) + { + case SSE2_QOS_RATE_KBPS: + s = format (s, "rate_type kbps "); + break; + case SSE2_QOS_RATE_PPS: + s = format (s, "rate_type pps "); + break; + default: + break; + } + + switch (mp->round_type) + { + case SSE2_QOS_ROUND_TO_CLOSEST: + s = format (s, "round_type closest "); + break; + case SSE2_QOS_ROUND_TO_UP: + s = format (s, "round_type up "); + break; + case SSE2_QOS_ROUND_TO_DOWN: + s = format (s, "round_type down "); + break; + default: + break; + } + + switch (mp->type) + { + case SSE2_QOS_POLICER_TYPE_1R2C: + s = format (s, "type 1r2c "); + break; + case SSE2_QOS_POLICER_TYPE_1R3C_RFC_2697: + s = format (s, "type 1r3c "); + break; + case SSE2_QOS_POLICER_TYPE_2R3C_RFC_2698: + s = format (s, "type 2r3c-2698 "); + break; + case SSE2_QOS_POLICER_TYPE_2R3C_RFC_4115: + s = format (s, "type 2r3c-4115 "); + break; + case SSE2_QOS_POLICER_TYPE_2R3C_RFC_MEF5CF1: + s = format (s, "type 2r3c-mef5cf1 "); + break; + default: + break; + } + + s = format (s, "conform_action %U ", format_policer_action, + mp->conform_action_type, mp->conform_dscp); + s = format (s, "exceed_action %U ", format_policer_action, + mp->exceed_action_type, mp->exceed_dscp); + s = format (s, "violate_action %U ", format_policer_action, + mp->violate_action_type, mp->violate_dscp); + + if (mp->color_aware) + s = format (s, "color-aware "); + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_policer_dump_t_print + (vl_api_policer_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: policer_dump "); + if (mp->match_name_valid) + s = format (s, "name %s ", mp->match_name); + + FINISH; +} + +static void *vl_api_policer_classify_set_interface_t_print + (vl_api_policer_classify_set_interface_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: policer_classify_set_interface "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + if (mp->ip4_table_index != ~0) + s = format (s, "ip4-table %d ", ntohl (mp->ip4_table_index)); + if (mp->ip6_table_index != ~0) + s = format (s, "ip6-table %d ", ntohl (mp->ip6_table_index)); + if (mp->l2_table_index != ~0) + s = format (s, "l2-table %d ", ntohl (mp->l2_table_index)); + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_policer_classify_dump_t_print + (vl_api_policer_classify_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: policer_classify_dump "); + switch (mp->type) + { + case POLICER_CLASSIFY_TABLE_IP4: + s = format (s, "type ip4 "); + break; + case POLICER_CLASSIFY_TABLE_IP6: + s = format (s, "type ip6 "); + break; + case POLICER_CLASSIFY_TABLE_L2: + s = format (s, "type l2 "); + break; + default: + break; + } + + FINISH; +} + +static void *vl_api_sw_interface_clear_stats_t_print + (vl_api_sw_interface_clear_stats_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_clear_stats "); + if (mp->sw_if_index != ~0) + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_mpls_tunnel_dump_t_print + (vl_api_mpls_tunnel_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: mpls_tunnel_dump "); + + s = format (s, "tunnel_index %d ", ntohl (mp->tunnel_index)); + + FINISH; +} + +static void *vl_api_mpls_fib_dump_t_print + (vl_api_mpls_fib_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: mpls_fib_decap_dump "); + + FINISH; +} + +static void *vl_api_ip_fib_dump_t_print + (vl_api_ip_fib_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ip_fib_dump "); + + FINISH; +} + +static void *vl_api_ip6_fib_dump_t_print + (vl_api_ip6_fib_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ip6_fib_dump "); + + FINISH; +} + +static void *vl_api_classify_table_ids_t_print + (vl_api_classify_table_ids_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: classify_table_ids "); + + FINISH; +} + +static void *vl_api_classify_table_by_interface_t_print + (vl_api_classify_table_by_interface_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: classify_table_by_interface "); + if (mp->sw_if_index != ~0) + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_classify_table_info_t_print + (vl_api_classify_table_info_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: classify_table_info "); + if (mp->table_id != ~0) + s = format (s, "table_id %d ", ntohl (mp->table_id)); + + FINISH; +} + +static void *vl_api_classify_session_dump_t_print + (vl_api_classify_session_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: classify_session_dump "); + if (mp->table_id != ~0) + s = format (s, "table_id %d ", ntohl (mp->table_id)); + + FINISH; +} + +static void *vl_api_set_ipfix_exporter_t_print + (vl_api_set_ipfix_exporter_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: set_ipfix_exporter "); + + s = format (s, "collector-address %U ", format_ip4_address, + (ip4_address_t *) mp->collector_address); + s = format (s, "collector-port %d ", ntohs (mp->collector_port)); + s = format (s, "src-address %U ", format_ip4_address, + (ip4_address_t *) mp->src_address); + s = format (s, "vrf-id %d ", ntohl (mp->vrf_id)); + s = format (s, "path-mtu %d ", ntohl (mp->path_mtu)); + s = format (s, "template-interval %d ", ntohl (mp->template_interval)); + s = format (s, "udp-checksum %d ", mp->udp_checksum); + + FINISH; +} + +static void *vl_api_ipfix_exporter_dump_t_print + (vl_api_ipfix_exporter_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ipfix_exporter_dump "); + + FINISH; +} + +static void *vl_api_set_ipfix_classify_stream_t_print + (vl_api_set_ipfix_classify_stream_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: set_ipfix_classify_stream "); + + s = format (s, "domain-id %d ", ntohl (mp->domain_id)); + s = format (s, "src-port %d ", ntohs (mp->src_port)); + + FINISH; +} + +static void *vl_api_ipfix_classify_stream_dump_t_print + (vl_api_ipfix_classify_stream_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ipfix_classify_stream_dump "); + + FINISH; +} + +static void *vl_api_ipfix_classify_table_add_del_t_print + (vl_api_ipfix_classify_table_add_del_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ipfix_classify_table_add_del "); + + s = format (s, "table-id %d ", ntohl (mp->table_id)); + s = format (s, "ip-version %d ", mp->ip_version); + s = format (s, "transport-protocol %d ", mp->transport_protocol); + + FINISH; +} + +static void *vl_api_ipfix_classify_table_dump_t_print + (vl_api_ipfix_classify_table_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ipfix_classify_table_dump "); + + FINISH; +} + +static void *vl_api_sw_interface_span_enable_disable_t_print + (vl_api_sw_interface_span_enable_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_span_enable_disable "); + s = format (s, "src_sw_if_index %u ", ntohl (mp->sw_if_index_from)); + s = format (s, "dst_sw_if_index %u ", ntohl (mp->sw_if_index_to)); + + switch (mp->state) + { + case 0: + s = format (s, "disable "); + break; + case 1: + s = format (s, "rx "); + break; + case 2: + s = format (s, "tx "); + break; + case 3: + default: + s = format (s, "both "); + break; + } + + FINISH; +} + +static void * +vl_api_sw_interface_span_dump_t_print (vl_api_sw_interface_span_dump_t * mp, + void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_span_dump "); + + FINISH; +} + +static void *vl_api_get_next_index_t_print + (vl_api_get_next_index_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: get_next_index "); + s = format (s, "node-name %s ", mp->node_name); + s = format (s, "next-node-name %s ", mp->next_name); + + FINISH; +} + +static void *vl_api_pg_create_interface_t_print + (vl_api_pg_create_interface_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: pg_create_interface "); + s = format (0, "if_id %d", ntohl (mp->interface_id)); + + FINISH; +} + +static void *vl_api_pg_capture_t_print + (vl_api_pg_capture_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: pg_capture "); + s = format (0, "if_id %d ", ntohl (mp->interface_id)); + s = format (0, "pcap %s", mp->pcap_file_name); + if (mp->count != ~0) + s = format (s, "count %d ", ntohl (mp->count)); + if (!mp->is_enabled) + s = format (s, "disable"); + + FINISH; +} + +static void *vl_api_pg_enable_disable_t_print + (vl_api_pg_enable_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: pg_enable_disable "); + if (ntohl (mp->stream_name_length) > 0) + s = format (s, "stream %s", mp->stream_name); + if (!mp->is_enabled) + s = format (s, "disable"); + + FINISH; +} + +static void *vl_api_ip_source_and_port_range_check_add_del_t_print + (vl_api_ip_source_and_port_range_check_add_del_t * mp, void *handle) +{ + u8 *s; + int i; + + s = format (0, "SCRIPT: ip_source_and_port_range_check_add_del "); + if (mp->is_ipv6) + s = format (s, "%U/%d ", format_ip6_address, mp->address, + mp->mask_length); + else + s = format (s, "%U/%d ", format_ip4_address, mp->address, + mp->mask_length); + + for (i = 0; i < mp->number_of_ranges; i++) + { + s = format (s, "range %d - %d ", mp->low_ports[i], mp->high_ports[i]); + } + + s = format (s, "vrf %d ", ntohl (mp->vrf_id)); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_ip_source_and_port_range_check_interface_add_del_t_print + (vl_api_ip_source_and_port_range_check_interface_add_del_t * mp, + void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ip_source_and_port_range_check_interface_add_del "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + if (mp->tcp_out_vrf_id != ~0) + s = format (s, "tcp-out-vrf %d ", ntohl (mp->tcp_out_vrf_id)); + + if (mp->udp_out_vrf_id != ~0) + s = format (s, "udp-out-vrf %d ", ntohl (mp->udp_out_vrf_id)); + + if (mp->tcp_in_vrf_id != ~0) + s = format (s, "tcp-in-vrf %d ", ntohl (mp->tcp_in_vrf_id)); + + if (mp->udp_in_vrf_id != ~0) + s = format (s, "udp-in-vrf %d ", ntohl (mp->udp_in_vrf_id)); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_lisp_enable_disable_t_print + (vl_api_lisp_enable_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_enable_disable %s", + mp->is_en ? "enable" : "disable"); + + FINISH; +} + +static void *vl_api_lisp_gpe_add_del_iface_t_print + (vl_api_lisp_gpe_add_del_iface_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_gpe_add_del_iface "); + + s = format (s, "%s ", mp->is_add ? "up" : "down"); + s = format (s, "vni %d ", mp->vni); + s = format (s, "%s %d ", mp->is_l2 ? "bd_id" : "table_id", mp->dp_table); + + FINISH; +} + +static void *vl_api_lisp_pitr_set_locator_set_t_print + (vl_api_lisp_pitr_set_locator_set_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_pitr_set_locator_set "); + + if (mp->is_add) + s = format (s, "locator-set %s ", mp->ls_name); + else + s = format (s, "del"); + + FINISH; +} + +static u8 * +format_lisp_flat_eid (u8 * s, va_list * args) +{ + u32 type = va_arg (*args, u32); + u8 *eid = va_arg (*args, u8 *); + u32 eid_len = va_arg (*args, u32); + + switch (type) + { + case 0: + return format (s, "%U/%d", format_ip4_address, eid, eid_len); + case 1: + return format (s, "%U/%d", format_ip6_address, eid, eid_len); + case 3: + return format (s, "%U", format_ethernet_address, eid); + } + return 0; +} + +/** Used for transferring locators via VPP API */ +typedef CLIB_PACKED (struct + { + u8 is_ip4; + /**< is locator an IPv4 address */ + u8 priority; + /**< locator priority */ + u8 weight; + /**< locator weight */ + u8 addr[16]; + /**< IPv4/IPv6 address */ + }) rloc_t; + +static u8 * +format_rloc (u8 * s, va_list * args) +{ + rloc_t *rloc = va_arg (*args, rloc_t *); + + if (rloc->is_ip4) + s = format (s, "%U ", format_ip4_address, rloc->addr); + else + s = format (s, "%U ", format_ip6_address, rloc->addr); + + s = format (s, "p %d w %d", rloc->priority, rloc->weight); + + return s; +} + +static void *vl_api_lisp_add_del_remote_mapping_t_print + (vl_api_lisp_add_del_remote_mapping_t * mp, void *handle) +{ + u8 *s; + u32 i, rloc_num = 0; + + s = format (0, "SCRIPT: lisp_add_del_remote_mapping "); + + if (mp->del_all) + s = format (s, "del-all "); + + s = format (s, "%s ", mp->is_add ? "add" : "del"); + s = format (s, "vni %d ", clib_net_to_host_u32 (mp->vni)); + + s = format (s, "eid %U ", format_lisp_flat_eid, + mp->eid_type, mp->eid, mp->eid_len); + + if (mp->is_src_dst) + { + s = format (s, "seid %U ", format_lisp_flat_eid, + mp->eid_type, mp->seid, mp->seid_len); + } + + rloc_num = clib_net_to_host_u32 (mp->rloc_num); + + if (0 == rloc_num) + s = format (s, "action %d", mp->action); + else + { + rloc_t *rloc = (rloc_t *) mp->rlocs; + for (i = 0; i < rloc_num; i++) + s = format (s, "%U ", format_rloc, &rloc[i]); + } + + FINISH; +} + +static void *vl_api_lisp_add_del_adjacency_t_print + (vl_api_lisp_add_del_adjacency_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_add_del_adjacency "); + + s = format (s, "%s ", mp->is_add ? "add" : "del"); + s = format (s, "vni %d ", clib_net_to_host_u32 (mp->vni)); + s = format (s, "reid %U leid %U ", + format_lisp_flat_eid, mp->eid_type, mp->reid, mp->reid_len, + format_lisp_flat_eid, mp->eid_type, mp->leid, mp->leid_len); + + FINISH; +} + +static void *vl_api_lisp_add_del_map_request_itr_rlocs_t_print + (vl_api_lisp_add_del_map_request_itr_rlocs_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_add_del_map_request_itr_rlocs "); + + if (mp->is_add) + s = format (s, "%s", mp->locator_set_name); + else + s = format (s, "del"); + + FINISH; +} + +static void *vl_api_lisp_eid_table_add_del_map_t_print + (vl_api_lisp_eid_table_add_del_map_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_eid_table_add_del_map "); + + if (!mp->is_add) + s = format (s, "del "); + + s = format (s, "vni %d ", clib_net_to_host_u32 (mp->vni)); + s = format (s, "%s %d ", + mp->is_l2 ? "bd_index" : "vrf", + clib_net_to_host_u32 (mp->dp_table)); + FINISH; +} + +static void *vl_api_lisp_add_del_local_eid_t_print + (vl_api_lisp_add_del_local_eid_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_add_del_local_eid "); + + if (!mp->is_add) + s = format (s, "del "); + + s = format (s, "vni %d ", clib_net_to_host_u32 (mp->vni)); + s = format (s, "eid %U ", format_lisp_flat_eid, mp->eid_type, mp->eid, + mp->prefix_len); + s = format (s, "locator-set %s ", mp->locator_set_name); + if (*mp->key) + { + u32 key_id = mp->key_id; + s = format (s, "key-id %U", format_hmac_key_id, key_id); + s = format (s, "secret-key %s", mp->key); + } + FINISH; +} + +static void *vl_api_lisp_gpe_add_del_fwd_entry_t_print + (vl_api_lisp_gpe_add_del_fwd_entry_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_gpe_add_del_fwd_entry TODO"); + + FINISH; +} + +static void *vl_api_lisp_add_del_map_resolver_t_print + (vl_api_lisp_add_del_map_resolver_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_add_del_map_resolver "); + + if (!mp->is_add) + s = format (s, "del "); + + if (mp->is_ipv6) + s = format (s, "%U ", format_ip6_address, mp->ip_address); + else + s = format (s, "%U ", format_ip4_address, mp->ip_address); + + FINISH; +} + +static void *vl_api_lisp_gpe_enable_disable_t_print + (vl_api_lisp_gpe_enable_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_gpe_enable_disable "); + + s = format (s, "%s ", mp->is_en ? "enable" : "disable"); + + FINISH; +} + +typedef CLIB_PACKED (struct + { + u32 sw_if_index; + /**< locator sw_if_index */ + u8 priority; + /**< locator priority */ + u8 weight; + /**< locator weight */ + }) ls_locator_t; + +static u8 * +format_locator (u8 * s, va_list * args) +{ + ls_locator_t *l = va_arg (*args, ls_locator_t *); + + return format (s, "sw_if_index %d p %d w %d", + l->sw_if_index, l->priority, l->weight); +} + +static void *vl_api_lisp_add_del_locator_set_t_print + (vl_api_lisp_add_del_locator_set_t * mp, void *handle) +{ + u8 *s; + u32 loc_num = 0, i; + ls_locator_t *locs; + + s = format (0, "SCRIPT: lisp_add_del_locator_set "); + + if (!mp->is_add) + s = format (s, "del "); + + s = format (s, "locator-set %s ", mp->locator_set_name); + + loc_num = clib_net_to_host_u32 (mp->locator_num); + locs = (ls_locator_t *) mp->locators; + + for (i = 0; i < loc_num; i++) + s = format (s, "%U ", format_locator, &locs[i]); + + FINISH; +} + +static void *vl_api_lisp_add_del_locator_t_print + (vl_api_lisp_add_del_locator_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_add_del_locator "); + + if (!mp->is_add) + s = format (s, "del "); + + s = format (s, "locator-set %s ", mp->locator_set_name); + s = format (s, "sw_if_index %d ", mp->sw_if_index); + s = format (s, "p %d w %d ", mp->priority, mp->weight); + + FINISH; +} + +static void *vl_api_lisp_locator_set_dump_t_print + (vl_api_lisp_locator_set_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_locator_set_dump "); + if (mp->filter == 1) + s = format (s, "local"); + else if (mp->filter == 2) + s = format (s, "remote"); + + FINISH; +} + +static void *vl_api_lisp_locator_dump_t_print + (vl_api_lisp_locator_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_locator_dump "); + if (mp->is_index_set) + s = format (s, "ls_index %d", clib_net_to_host_u32 (mp->ls_index)); + else + s = format (s, "ls_name %s", mp->ls_name); + + FINISH; +} + +static void *vl_api_lisp_map_request_mode_t_print + (vl_api_lisp_map_request_mode_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_map_request_mode "); + + switch (mp->mode) + { + case 0: + s = format (s, "dst-only"); + break; + case 1: + s = format (s, "src-dst"); + default: + break; + } + + FINISH; +} + +static void *vl_api_lisp_eid_table_dump_t_print + (vl_api_lisp_eid_table_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_eid_table_dump "); + + if (mp->eid_set) + { + s = format (s, "vni %d ", clib_net_to_host_u32 (mp->vni)); + s = format (s, "eid %U ", format_lisp_flat_eid, mp->eid_type, + mp->eid, mp->prefix_length); + switch (mp->filter) + { + case 1: + s = format (s, "local "); + break; + case 2: + s = format (s, "remote "); + break; + } + } + + FINISH; +} + +static void *vl_api_lisp_rloc_probe_enable_disable_t_print + (vl_api_lisp_rloc_probe_enable_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_rloc_probe_enable_disable "); + if (mp->is_enabled) + s = format (s, "enable"); + else + s = format (s, "disable"); + + FINISH; +} + +static void *vl_api_lisp_map_register_enable_disable_t_print + (vl_api_lisp_map_register_enable_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_map_register_enable_disable "); + if (mp->is_enabled) + s = format (s, "enable"); + else + s = format (s, "disable"); + + FINISH; +} + +static void *vl_api_lisp_adjacencies_get_t_print + (vl_api_lisp_adjacencies_get_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_adjacencies_get "); + s = format (s, "vni %d", clib_net_to_host_u32 (mp->vni)); + + FINISH; +} + +static void *vl_api_lisp_eid_table_map_dump_t_print + (vl_api_lisp_eid_table_map_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: lisp_eid_table_map_dump "); + + if (mp->is_l2) + s = format (s, "l2"); + else + s = format (s, "l3"); + + FINISH; +} + +static void *vl_api_ipsec_gre_add_del_tunnel_t_print + (vl_api_ipsec_gre_add_del_tunnel_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ipsec_gre_add_del_tunnel "); + + s = format (s, "dst %U ", format_ip4_address, + (ip4_address_t *) & (mp->dst_address)); + + s = format (s, "src %U ", format_ip4_address, + (ip4_address_t *) & (mp->src_address)); + + s = format (s, "local_sa %d ", ntohl (mp->local_sa_id)); + + s = format (s, "remote_sa %d ", ntohl (mp->remote_sa_id)); + + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_ipsec_gre_tunnel_dump_t_print + (vl_api_ipsec_gre_tunnel_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ipsec_gre_tunnel_dump "); + + if (mp->sw_if_index != ~0) + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + + FINISH; +} + +static void *vl_api_l2_interface_pbb_tag_rewrite_t_print + (vl_api_l2_interface_pbb_tag_rewrite_t * mp, void *handle) +{ + u8 *s; + u32 vtr_op = ntohl (mp->vtr_op); + + s = format (0, "SCRIPT: l2_interface_pbb_tag_rewrite "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + s = format (s, "vtr_op %d ", vtr_op); + if (vtr_op != L2_VTR_DISABLED && vtr_op != L2_VTR_POP_2) + { + if (vtr_op == L2_VTR_TRANSLATE_2_2) + s = format (s, "%d ", ntohs (mp->outer_tag)); + s = format (s, "dmac %U ", format_ethernet_address, &mp->b_dmac); + s = format (s, "smac %U ", format_ethernet_address, &mp->b_smac); + s = format (s, "sid %d ", ntohl (mp->i_sid)); + s = format (s, "vlanid %d ", ntohs (mp->b_vlanid)); + } + + FINISH; +} + +static void *vl_api_flow_classify_set_interface_t_print + (vl_api_flow_classify_set_interface_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: flow_classify_set_interface "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + if (mp->ip4_table_index != ~0) + s = format (s, "ip4-table %d ", ntohl (mp->ip4_table_index)); + if (mp->ip6_table_index != ~0) + s = format (s, "ip6-table %d ", ntohl (mp->ip6_table_index)); + if (mp->is_add == 0) + s = format (s, "del "); + + FINISH; +} + +static void * +vl_api_punt_t_print (vl_api_punt_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: punt "); + + if (mp->ipv != (u8) ~ 0) + s = format (s, "ip %d ", mp->ipv); + + s = format (s, "protocol %d ", mp->l4_protocol); + + if (mp->l4_port != (u16) ~ 0) + s = format (s, "port %d ", ntohs (mp->l4_port)); + + if (!mp->is_add) + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_flow_classify_dump_t_print + (vl_api_flow_classify_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: flow_classify_dump "); + switch (mp->type) + { + case FLOW_CLASSIFY_TABLE_IP4: + s = format (s, "type ip4 "); + break; + case FLOW_CLASSIFY_TABLE_IP6: + s = format (s, "type ip6 "); + break; + default: + break; + } + + FINISH; +} + +static void *vl_api_get_first_msg_id_t_print + (vl_api_get_first_msg_id_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: get_first_msg_id %s ", mp->name); + + FINISH; +} + +static void *vl_api_ioam_enable_t_print + (vl_api_ioam_enable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ioam_enable "); + + if (mp->trace_enable) + s = format (s, "trace enabled"); + + if (mp->pot_enable) + s = format (s, "POT enabled"); + + if (mp->seqno) + s = format (s, "Seqno enabled"); + + if (mp->analyse) + s = format (s, "Analyse enabled"); + + FINISH; +} + +static void *vl_api_ioam_disable_t_print + (vl_api_ioam_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: ioam_disable "); + s = format (s, "trace disabled"); + s = format (s, "POT disabled"); + s = format (s, "Seqno disabled"); + s = format (s, "Analyse disabled"); + + FINISH; +} + +static void *vl_api_feature_enable_disable_t_print + (vl_api_feature_enable_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: feature_enable_disable "); + s = format (s, "arc_name %s ", mp->arc_name); + s = format (s, "feature_name %s ", mp->feature_name); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + if (!mp->enable) + s = format (s, "disable"); + + FINISH; +} + +static void *vl_api_sw_interface_tag_add_del_t_print + (vl_api_sw_interface_tag_add_del_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_tag_add_del "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + if (mp->is_add) + s = format (s, "tag %s ", mp->tag); + else + s = format (s, "del "); + + FINISH; +} + +static void *vl_api_sw_interface_set_mtu_t_print + (vl_api_sw_interface_set_mtu_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_mtu "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + s = format (s, "tag %d ", ntohs (mp->mtu)); + + FINISH; +} + +#define foreach_custom_print_no_arg_function \ +_(lisp_eid_table_vni_dump) \ +_(lisp_map_resolver_dump) \ +_(lisp_map_server_dump) \ +_(show_lisp_rloc_probe_state) \ +_(show_lisp_map_register_state) \ +_(show_lisp_map_request_mode) \ +_(lisp_gpe_tunnel_dump) + +#define _(f) \ +static void * vl_api_ ## f ## _t_print \ + (vl_api_ ## f ## _t * mp, void * handle) \ +{ \ + u8 * s; \ + s = format (0, "SCRIPT: " #f ); \ + FINISH; \ +} +foreach_custom_print_no_arg_function +#undef _ +#define foreach_custom_print_function \ +_(CREATE_LOOPBACK, create_loopback) \ +_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ +_(SW_INTERFACE_ADD_DEL_ADDRESS, sw_interface_add_del_address) \ +_(SW_INTERFACE_SET_TABLE, sw_interface_set_table) \ +_(SW_INTERFACE_SET_MPLS_ENABLE, sw_interface_set_mpls_enable) \ +_(SW_INTERFACE_SET_VPATH, sw_interface_set_vpath) \ +_(SW_INTERFACE_SET_VXLAN_BYPASS, sw_interface_set_vxlan_bypass) \ +_(TAP_CONNECT, tap_connect) \ +_(TAP_MODIFY, tap_modify) \ +_(TAP_DELETE, tap_delete) \ +_(SW_INTERFACE_TAP_DUMP, sw_interface_tap_dump) \ +_(IP_ADD_DEL_ROUTE, ip_add_del_route) \ +_(PROXY_ARP_ADD_DEL, proxy_arp_add_del) \ +_(PROXY_ARP_INTFC_ENABLE_DISABLE, proxy_arp_intfc_enable_disable) \ +_(MPLS_TUNNEL_ADD_DEL, mpls_tunnel_add_del) \ +_(SW_INTERFACE_SET_UNNUMBERED, sw_interface_set_unnumbered) \ +_(IP_NEIGHBOR_ADD_DEL, ip_neighbor_add_del) \ +_(RESET_VRF, reset_vrf) \ +_(CREATE_VLAN_SUBIF, create_vlan_subif) \ +_(CREATE_SUBIF, create_subif) \ +_(OAM_ADD_DEL, oam_add_del) \ +_(RESET_FIB, reset_fib) \ +_(DHCP_PROXY_CONFIG, dhcp_proxy_config) \ +_(DHCP_PROXY_SET_VSS, dhcp_proxy_set_vss) \ +_(SET_IP_FLOW_HASH, set_ip_flow_hash) \ +_(SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS, \ + sw_interface_ip6_set_link_local_address) \ +_(SW_INTERFACE_IP6ND_RA_PREFIX, sw_interface_ip6nd_ra_prefix) \ +_(SW_INTERFACE_IP6ND_RA_CONFIG, sw_interface_ip6nd_ra_config) \ +_(SET_ARP_NEIGHBOR_LIMIT, set_arp_neighbor_limit) \ +_(L2_PATCH_ADD_DEL, l2_patch_add_del) \ +_(SR_TUNNEL_ADD_DEL, sr_tunnel_add_del) \ +_(SR_POLICY_ADD_DEL, sr_policy_add_del) \ +_(SR_MULTICAST_MAP_ADD_DEL, sr_multicast_map_add_del) \ +_(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \ +_(L2FIB_ADD_DEL, l2fib_add_del) \ +_(L2_FLAGS, l2_flags) \ +_(BRIDGE_FLAGS, bridge_flags) \ +_(CLASSIFY_ADD_DEL_TABLE, classify_add_del_table) \ +_(CLASSIFY_ADD_DEL_SESSION, classify_add_del_session) \ +_(SW_INTERFACE_SET_L2_BRIDGE, sw_interface_set_l2_bridge) \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport)\ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) \ +_(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ +_(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ +_(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table) \ +_(CLASSIFY_SET_INTERFACE_L2_TABLES, classify_set_interface_l2_tables) \ +_(ADD_NODE_NEXT, add_node_next) \ +_(DHCP_PROXY_CONFIG_2, dhcp_proxy_config_2) \ +_(DHCP_CLIENT_CONFIG, dhcp_client_config) \ +_(L2TPV3_CREATE_TUNNEL, l2tpv3_create_tunnel) \ +_(L2TPV3_SET_TUNNEL_COOKIES, l2tpv3_set_tunnel_cookies) \ +_(L2TPV3_INTERFACE_ENABLE_DISABLE, l2tpv3_interface_enable_disable) \ +_(L2TPV3_SET_LOOKUP_KEY, l2tpv3_set_lookup_key) \ +_(SW_IF_L2TPV3_TUNNEL_DUMP, sw_if_l2tpv3_tunnel_dump) \ +_(VXLAN_ADD_DEL_TUNNEL, vxlan_add_del_tunnel) \ +_(VXLAN_TUNNEL_DUMP, vxlan_tunnel_dump) \ +_(GRE_ADD_DEL_TUNNEL, gre_add_del_tunnel) \ +_(GRE_TUNNEL_DUMP, gre_tunnel_dump) \ +_(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \ +_(L2_INTERFACE_EFP_FILTER, l2_interface_efp_filter) \ +_(L2_INTERFACE_VLAN_TAG_REWRITE, l2_interface_vlan_tag_rewrite) \ +_(CREATE_VHOST_USER_IF, create_vhost_user_if) \ +_(MODIFY_VHOST_USER_IF, modify_vhost_user_if) \ +_(DELETE_VHOST_USER_IF, delete_vhost_user_if) \ +_(SW_INTERFACE_DUMP, sw_interface_dump) \ +_(CONTROL_PING, control_ping) \ +_(WANT_INTERFACE_EVENTS, want_interface_events) \ +_(CLI_REQUEST, cli_request) \ +_(CLI_INBAND, cli_inband) \ +_(MEMCLNT_CREATE, memclnt_create) \ +_(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) \ +_(SHOW_VERSION, show_version) \ +_(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \ +_(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \ +_(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \ +_(INTERFACE_NAME_RENUMBER, interface_name_renumber) \ +_(WANT_IP4_ARP_EVENTS, want_ip4_arp_events) \ +_(WANT_IP6_ND_EVENTS, want_ip6_nd_events) \ +_(INPUT_ACL_SET_INTERFACE, input_acl_set_interface) \ +_(IP_ADDRESS_DUMP, ip_address_dump) \ +_(IP_DUMP, ip_dump) \ +_(DELETE_LOOPBACK, delete_loopback) \ +_(BD_IP_MAC_ADD_DEL, bd_ip_mac_add_del) \ +_(COP_INTERFACE_ENABLE_DISABLE, cop_interface_enable_disable) \ +_(COP_WHITELIST_ENABLE_DISABLE, cop_whitelist_enable_disable) \ +_(AF_PACKET_CREATE, af_packet_create) \ +_(AF_PACKET_DELETE, af_packet_delete) \ +_(SW_INTERFACE_CLEAR_STATS, sw_interface_clear_stats) \ +_(MPLS_FIB_DUMP, mpls_fib_dump) \ +_(MPLS_TUNNEL_DUMP, mpls_tunnel_dump) \ +_(CLASSIFY_TABLE_IDS,classify_table_ids) \ +_(CLASSIFY_TABLE_BY_INTERFACE, classify_table_by_interface) \ +_(CLASSIFY_TABLE_INFO,classify_table_info) \ +_(CLASSIFY_SESSION_DUMP,classify_session_dump) \ +_(SET_IPFIX_EXPORTER, set_ipfix_exporter) \ +_(IPFIX_EXPORTER_DUMP, ipfix_exporter_dump) \ +_(SET_IPFIX_CLASSIFY_STREAM, set_ipfix_classify_stream) \ +_(IPFIX_CLASSIFY_STREAM_DUMP, ipfix_classify_stream_dump) \ +_(IPFIX_CLASSIFY_TABLE_ADD_DEL, ipfix_classify_table_add_del) \ +_(IPFIX_CLASSIFY_TABLE_DUMP, ipfix_classify_table_dump) \ +_(SW_INTERFACE_SPAN_ENABLE_DISABLE, sw_interface_span_enable_disable) \ +_(SW_INTERFACE_SPAN_DUMP, sw_interface_span_dump) \ +_(GET_NEXT_INDEX, get_next_index) \ +_(PG_CREATE_INTERFACE,pg_create_interface) \ +_(PG_CAPTURE, pg_capture) \ +_(PG_ENABLE_DISABLE, pg_enable_disable) \ +_(POLICER_ADD_DEL, policer_add_del) \ +_(POLICER_DUMP, policer_dump) \ +_(POLICER_CLASSIFY_SET_INTERFACE, policer_classify_set_interface) \ +_(POLICER_CLASSIFY_DUMP, policer_classify_dump) \ +_(IP_SOURCE_AND_PORT_RANGE_CHECK_ADD_DEL, \ + ip_source_and_port_range_check_add_del) \ +_(IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL, \ + ip_source_and_port_range_check_interface_add_del) \ +_(LISP_ENABLE_DISABLE, lisp_enable_disable) \ +_(LISP_GPE_ENABLE_DISABLE, lisp_gpe_enable_disable) \ +_(LISP_GPE_ADD_DEL_IFACE, lisp_gpe_add_del_iface) \ +_(LISP_PITR_SET_LOCATOR_SET, lisp_pitr_set_locator_set) \ +_(LISP_MAP_REQUEST_MODE, lisp_map_request_mode) \ +_(SHOW_LISP_MAP_REQUEST_MODE, show_lisp_map_request_mode) \ +_(LISP_ADD_DEL_REMOTE_MAPPING, lisp_add_del_remote_mapping) \ +_(LISP_ADD_DEL_ADJACENCY, lisp_add_del_adjacency) \ +_(LISP_ADD_DEL_MAP_REQUEST_ITR_RLOCS, \ + lisp_add_del_map_request_itr_rlocs) \ +_(LISP_EID_TABLE_ADD_DEL_MAP, lisp_eid_table_add_del_map) \ +_(LISP_ADD_DEL_LOCAL_EID, lisp_add_del_local_eid) \ +_(LISP_GPE_ADD_DEL_FWD_ENTRY, lisp_gpe_add_del_fwd_entry) \ +_(LISP_ADD_DEL_LOCATOR_SET, lisp_add_del_locator_set) \ +_(LISP_ADD_DEL_MAP_RESOLVER, lisp_add_del_map_resolver) \ +_(LISP_ADD_DEL_LOCATOR, lisp_add_del_locator) \ +_(LISP_EID_TABLE_DUMP, lisp_eid_table_dump) \ +_(LISP_EID_TABLE_MAP_DUMP, lisp_eid_table_map_dump) \ +_(LISP_EID_TABLE_VNI_DUMP, lisp_eid_table_vni_dump) \ +_(LISP_GPE_TUNNEL_DUMP, lisp_gpe_tunnel_dump) \ +_(LISP_MAP_RESOLVER_DUMP, lisp_map_resolver_dump) \ +_(LISP_MAP_SERVER_DUMP, lisp_map_server_dump) \ +_(LISP_LOCATOR_SET_DUMP, lisp_locator_set_dump) \ +_(LISP_LOCATOR_DUMP, lisp_locator_dump) \ +_(LISP_ADJACENCIES_GET, lisp_adjacencies_get) \ +_(SHOW_LISP_RLOC_PROBE_STATE, show_lisp_rloc_probe_state) \ +_(SHOW_LISP_MAP_REGISTER_STATE, show_lisp_map_register_state) \ +_(LISP_RLOC_PROBE_ENABLE_DISABLE, lisp_rloc_probe_enable_disable) \ +_(LISP_MAP_REGISTER_ENABLE_DISABLE, lisp_map_register_enable_disable) \ +_(IPSEC_GRE_ADD_DEL_TUNNEL, ipsec_gre_add_del_tunnel) \ +_(IPSEC_GRE_TUNNEL_DUMP, ipsec_gre_tunnel_dump) \ +_(DELETE_SUBIF, delete_subif) \ +_(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) \ +_(PUNT, punt) \ +_(FLOW_CLASSIFY_SET_INTERFACE, flow_classify_set_interface) \ +_(FLOW_CLASSIFY_DUMP, flow_classify_dump) \ +_(GET_FIRST_MSG_ID, get_first_msg_id) \ +_(IOAM_ENABLE, ioam_enable) \ +_(IOAM_DISABLE, ioam_disable) \ +_(IP_FIB_DUMP, ip_fib_dump) \ +_(IP6_FIB_DUMP, ip6_fib_dump) \ +_(FEATURE_ENABLE_DISABLE, feature_enable_disable) \ +_(SW_INTERFACE_TAG_ADD_DEL, sw_interface_tag_add_del) \ +_(SW_INTERFACE_SET_MTU, sw_interface_set_mtu) + void +vl_msg_api_custom_dump_configure (api_main_t * am) +{ +#define _(n,f) am->msg_print_handlers[VL_API_##n] \ + = (void *) vl_api_##f##_t_print; + foreach_custom_print_function; +#undef _ +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c new file mode 100644 index 00000000..20deb6a2 --- /dev/null +++ b/src/vpp/api/gmon.c @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +typedef struct +{ + svmdb_client_t *svmdb_client; + f64 *vector_rate_ptr; + f64 *input_rate_ptr; + f64 *sig_error_rate_ptr; + pid_t *vpef_pid_ptr; + u64 last_sig_errors; + u64 current_sig_errors; + uword *sig_error_bitmap; + vlib_main_t *vlib_main; + vlib_main_t **my_vlib_mains; + +} gmon_main_t; + +#if DPDK == 0 +static inline u64 +vnet_get_aggregate_rx_packets (void) +{ + return 0; +} +#else +#include +#include +#include +#endif + +gmon_main_t gmon_main; + +static u64 +get_significant_errors (gmon_main_t * gm) +{ + vlib_main_t *this_vlib_main; + vlib_error_main_t *em; + uword code; + int vm_index; + u64 significant_errors = 0; + + /* *INDENT-OFF* */ + clib_bitmap_foreach (code, gm->sig_error_bitmap, + ({ + for (vm_index = 0; vm_index < vec_len (gm->my_vlib_mains); vm_index++) + { + this_vlib_main = gm->my_vlib_mains[vm_index]; + em = &this_vlib_main->error_main; + significant_errors += em->counters[code] - + ((vec_len(em->counters_last_clear) > code) ? + em->counters_last_clear[code] : 0); + } + })); + /* *INDENT-ON* */ + + return (significant_errors); +} + +static clib_error_t * +publish_pid (vlib_main_t * vm) +{ + gmon_main_t *gm = &gmon_main; + + *gm->vpef_pid_ptr = getpid (); + + return 0; +} + +VLIB_API_INIT_FUNCTION (publish_pid); + + +static uword +gmon_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + f64 vector_rate; + u64 input_packets, last_input_packets, new_sig_errors; + f64 last_runtime, dt, now; + gmon_main_t *gm = &gmon_main; + int i; + + last_runtime = 0.0; + last_input_packets = 0; + + last_runtime = 0.0; + last_input_packets = 0; + + /* Initial wait for the world to settle down */ + vlib_process_suspend (vm, 5.0); + + if (vec_len (vlib_mains) == 0) + vec_add1 (gm->my_vlib_mains, &vlib_global_main); + else + { + for (i = 0; i < vec_len (vlib_mains); i++) + vec_add1 (gm->my_vlib_mains, vlib_mains[i]); + } + + while (1) + { + vlib_process_suspend (vm, 5.0); + vector_rate = vlib_last_vector_length_per_node (vm); + *gm->vector_rate_ptr = vector_rate; + now = vlib_time_now (vm); + dt = now - last_runtime; + input_packets = vnet_get_aggregate_rx_packets (); + *gm->input_rate_ptr = (f64) (input_packets - last_input_packets) / dt; + last_runtime = now; + last_input_packets = input_packets; + + new_sig_errors = get_significant_errors (gm); + *gm->sig_error_rate_ptr = + ((f64) (new_sig_errors - gm->last_sig_errors)) / dt; + gm->last_sig_errors = new_sig_errors; + } + + return 0; /* not so much */ +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (gmon_process_node,static) = { + .function = gmon_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "gmon-process", +}; +/* *INDENT-ON* */ + +static clib_error_t * +gmon_init (vlib_main_t * vm) +{ + gmon_main_t *gm = &gmon_main; + api_main_t *am = &api_main; + pid_t *swp = 0; + f64 *v = 0; + clib_error_t *error; + svmdb_map_args_t _ma, *ma = &_ma; + + if ((error = vlib_call_init_function (vm, vpe_api_init))) + return (error); + + if ((error = vlib_call_init_function (vm, vlibmemory_init))) + return (error); + + gm->vlib_main = vm; + + memset (ma, 0, sizeof (*ma)); + ma->root_path = am->root_path; + ma->uid = am->api_uid; + ma->gid = am->api_gid; + + gm->svmdb_client = svmdb_map (ma); + + /* Find or create, set to zero */ + vec_add1 (v, 0.0); + svmdb_local_set_vec_variable (gm->svmdb_client, + "vpp_vector_rate", (char *) v, sizeof (*v)); + vec_free (v); + vec_add1 (v, 0.0); + svmdb_local_set_vec_variable (gm->svmdb_client, + "vpp_input_rate", (char *) v, sizeof (*v)); + vec_free (v); + vec_add1 (v, 0.0); + svmdb_local_set_vec_variable (gm->svmdb_client, + "vpp_sig_error_rate", + (char *) v, sizeof (*v)); + vec_free (v); + + vec_add1 (swp, 0.0); + svmdb_local_set_vec_variable (gm->svmdb_client, + "vpp_pid", (char *) swp, sizeof (*swp)); + vec_free (swp); + + /* the value cells will never move, so acquire references to them */ + gm->vector_rate_ptr = + svmdb_local_get_variable_reference (gm->svmdb_client, + SVMDB_NAMESPACE_VEC, + "vpp_vector_rate"); + gm->input_rate_ptr = + svmdb_local_get_variable_reference (gm->svmdb_client, + SVMDB_NAMESPACE_VEC, + "vpp_input_rate"); + gm->sig_error_rate_ptr = + svmdb_local_get_variable_reference (gm->svmdb_client, + SVMDB_NAMESPACE_VEC, + "vpp_sig_error_rate"); + gm->vpef_pid_ptr = + svmdb_local_get_variable_reference (gm->svmdb_client, + SVMDB_NAMESPACE_VEC, "vpp_pid"); + return 0; +} + +VLIB_INIT_FUNCTION (gmon_init); + +static clib_error_t * +gmon_exit (vlib_main_t * vm) +{ + gmon_main_t *gm = &gmon_main; + + if (gm->vector_rate_ptr) + { + *gm->vector_rate_ptr = 0.0; + *gm->vpef_pid_ptr = 0; + *gm->input_rate_ptr = 0.0; + *gm->sig_error_rate_ptr = 0.0; + svm_region_unmap ((void *) gm->svmdb_client->db_rp); + vec_free (gm->svmdb_client); + } + return 0; +} + +VLIB_MAIN_LOOP_EXIT_FUNCTION (gmon_exit); + +static int +significant_error_enable_disable (gmon_main_t * gm, u32 index, int enable) +{ + vlib_main_t *vm = gm->vlib_main; + vlib_error_main_t *em = &vm->error_main; + + if (index >= vec_len (em->counters)) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + gm->sig_error_bitmap = + clib_bitmap_set (gm->sig_error_bitmap, index, enable); + return 0; +} + +static clib_error_t * +set_significant_error_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 index; + int enable = 1; + int rv; + gmon_main_t *gm = &gmon_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &index)) + ; + else if (unformat (input, "disable")) + enable = 0; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + rv = significant_error_enable_disable (gm, index, enable); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return + (0, "significant_error_enable_disable returned %d", rv); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_significant_error_command, static) = { + .path = "set significant error", + .short_help = "set significant error [disable]", + .function = set_significant_error_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/json_format.c b/src/vpp/api/json_format.c new file mode 100644 index 00000000..63454b87 --- /dev/null +++ b/src/vpp/api/json_format.c @@ -0,0 +1,304 @@ +/* + *------------------------------------------------------------------ + * json_format.c + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + +*/ +#include +#include "json_format.h" +#include +#include + +#define VAT_TAB_WIDTH 2 + +typedef struct vat_print_ctx_s +{ + FILE *ofp; + u32 indent; +} vat_print_ctx_t; + +/* Format an IP4 address. */ +static u8 * +vat_json_format_ip4_address (u8 * s, va_list * args) +{ + u8 *a = va_arg (*args, u8 *); + return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]); +} + +/* Format an IP6 address. */ +static u8 * +vat_json_format_ip6_address (u8 * s, va_list * args) +{ + ip6_address_t *a = va_arg (*args, ip6_address_t *); + u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon; + + i_max_n_zero = ARRAY_LEN (a->as_u16); + max_n_zeros = 0; + i_first_zero = i_max_n_zero; + n_zeros = 0; + for (i = 0; i < ARRAY_LEN (a->as_u16); i++) + { + u32 is_zero = a->as_u16[i] == 0; + if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16)) + { + i_first_zero = i; + n_zeros = 0; + } + n_zeros += is_zero; + if ((!is_zero && n_zeros > max_n_zeros) + || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros)) + { + i_max_n_zero = i_first_zero; + max_n_zeros = n_zeros; + i_first_zero = ARRAY_LEN (a->as_u16); + n_zeros = 0; + } + } + + last_double_colon = 0; + for (i = 0; i < ARRAY_LEN (a->as_u16); i++) + { + if (i == i_max_n_zero && max_n_zeros > 1) + { + s = format (s, "::"); + i += max_n_zeros - 1; + last_double_colon = 1; + } + else + { + s = format (s, "%s%x", + (last_double_colon || i == 0) ? "" : ":", + clib_net_to_host_u16 (a->as_u16[i])); + last_double_colon = 0; + } + } + + return s; +} + +static void +vat_json_indent_print (vat_print_ctx_t * ctx) +{ + int i; + for (i = 0; i < ctx->indent * VAT_TAB_WIDTH; i++) + { + fformat (ctx->ofp, " "); + } +} + +static void +vat_json_indent_line (vat_print_ctx_t * ctx, char *fmt, ...) +{ + va_list va; + + vat_json_indent_print (ctx); + va_start (va, fmt); + va_fformat (ctx->ofp, fmt, &va); + va_end (va); +} + +static u8 +is_num_only (vat_json_node_t * p) +{ + vat_json_node_t *elem; + vec_foreach (elem, p) + { + if (VAT_JSON_INT != elem->type && VAT_JSON_UINT != elem->type) + { + return 0; + } + } + return 1; +} + +static void +vat_json_print_internal (vat_print_ctx_t * ctx, vat_json_node_t * node) +{ +#define P(fmt,...) fformat(ctx->ofp, fmt, ##__VA_ARGS__) +#define PL(fmt,...) fformat(ctx->ofp, fmt"\n", ##__VA_ARGS__) +#define PPL(fmt,...) vat_json_indent_line(ctx, fmt"\n", ##__VA_ARGS__) +#define PP(fmt,...) vat_json_indent_line(ctx, fmt, ##__VA_ARGS__) +#define INCR (ctx->indent++) +#define DECR (ctx->indent--) + + vat_json_pair_t *pair; + u32 i, count; + vat_json_node_t *elem; + u8 num_only = 0; + + if (!node) + { + return; + } + + switch (node->type) + { + case VAT_JSON_OBJECT: + count = vec_len (node->pairs); + if (count >= 1) + { + PL ("{"); + INCR; + for (i = 0; i < count; i++) + { + pair = &node->pairs[i]; + PP ("\"%s\": ", pair->name); + vat_json_print_internal (ctx, &pair->value); + if (i < count - 1) + { + P (","); + } + PL (); + } + DECR; + PP ("}"); + } + else + { + P ("{}"); + } + break; + case VAT_JSON_ARRAY: + num_only = is_num_only (node->array); + count = vec_len (node->array); + if (count >= 1) + { + if (num_only) + P ("["); + else + PL ("[ "); + INCR; + for (i = 0; i < count; i++) + { + elem = &node->array[i]; + if (!num_only) + { + vat_json_indent_print (ctx); + } + vat_json_print_internal (ctx, elem); + if (i < count - 1) + { + if (num_only) + { + P (", "); + } + else + { + P (","); + } + } + if (!num_only) + PL (); + } + DECR; + if (!num_only) + PP ("]"); + else + P ("]"); + } + else + { + P ("[]"); + } + break; + case VAT_JSON_INT: + P ("%d", node->sint); + break; + case VAT_JSON_UINT: + P ("%" PRIu64, node->uint); + break; + case VAT_JSON_REAL: + P ("%f", node->real); + break; + case VAT_JSON_STRING: + P ("\"%s\"", node->string); + break; + case VAT_JSON_IPV4: + P ("\"%U\"", vat_json_format_ip4_address, &node->ip4); + break; + case VAT_JSON_IPV6: + P ("\"%U\"", vat_json_format_ip6_address, &node->ip6); + break; + default: + break; + } +#undef PPL +#undef PP +#undef PL +#undef P +} + +void +vat_json_print (FILE * ofp, vat_json_node_t * node) +{ + vat_print_ctx_t ctx; + memset (&ctx, 0, sizeof ctx); + ctx.indent = 0; + ctx.ofp = ofp; + fformat (ofp, "\n"); + vat_json_print_internal (&ctx, node); + fformat (ofp, "\n"); +} + +void +vat_json_free (vat_json_node_t * node) +{ + int i = 0; + + if (NULL == node) + { + return; + } + switch (node->type) + { + case VAT_JSON_OBJECT: + for (i = 0; i < vec_len (node->pairs); i++) + { + vat_json_free (&node->pairs[i].value); + } + if (NULL != node->pairs) + { + vec_free (node->pairs); + } + break; + case VAT_JSON_ARRAY: + for (i = 0; i < vec_len (node->array); i++) + { + vat_json_free (&node->array[i]); + } + if (NULL != node->array) + { + vec_free (node->array); + } + break; + case VAT_JSON_STRING: + if (NULL != node->string) + { + vec_free (node->string); + } + break; + default: + break; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/json_format.h b/src/vpp/api/json_format.h new file mode 100644 index 00000000..154fb3df --- /dev/null +++ b/src/vpp/api/json_format.h @@ -0,0 +1,254 @@ +/* + *------------------------------------------------------------------ + * json_format.h + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __JSON_FORMAT_H__ +#define __JSON_FORMAT_H__ + +#include +#include +#include + +/* JSON value type */ +typedef enum +{ + VAT_JSON_NONE, + VAT_JSON_OBJECT, + VAT_JSON_ARRAY, + VAT_JSON_STRING, + VAT_JSON_REAL, + VAT_JSON_UINT, + VAT_JSON_INT, + VAT_JSON_IPV4, + VAT_JSON_IPV6, + VAT_JSON_MAX +} vat_json_val_type_t; + +typedef struct vat_json_node_s vat_json_node_t; +typedef struct vat_json_pair_s vat_json_pair_t; + +/* JSON object structure */ +struct vat_json_node_s +{ + vat_json_val_type_t type; + union + { + vat_json_pair_t *pairs; + vat_json_node_t *array; + u8 *string; + struct in_addr ip4; + struct in6_addr ip6; + u64 uint; + i64 sint; + f64 real; + }; +}; + +struct vat_json_pair_s +{ + const char *name; + vat_json_node_t value; +}; + +void vat_json_print (FILE * ofp, vat_json_node_t * node); +void vat_json_free (vat_json_node_t * node); + +static_always_inline void +vat_json_init_object (vat_json_node_t * json) +{ + json->type = VAT_JSON_OBJECT; + json->pairs = NULL; +} + +static_always_inline void +vat_json_init_array (vat_json_node_t * json) +{ + json->type = VAT_JSON_ARRAY; + json->array = NULL; +} + +static_always_inline void +vat_json_set_string (vat_json_node_t * json, u8 * str) +{ + json->type = VAT_JSON_STRING; + json->string = str; +} + +static_always_inline void +vat_json_set_string_copy (vat_json_node_t * json, const u8 * str) +{ + u8 *ns = NULL; + vec_validate (ns, strlen ((const char *) str)); + strcpy ((char *) ns, (const char *) str); + vec_add1 (ns, '\0'); + vat_json_set_string (json, ns); +} + +static_always_inline void +vat_json_set_int (vat_json_node_t * json, i64 num) +{ + json->type = VAT_JSON_INT; + json->sint = num; +} + +static_always_inline void +vat_json_set_uint (vat_json_node_t * json, u64 num) +{ + json->type = VAT_JSON_UINT; + json->uint = num; +} + +static_always_inline void +vat_json_set_real (vat_json_node_t * json, f64 real) +{ + json->type = VAT_JSON_REAL; + json->real = real; +} + +static_always_inline void +vat_json_set_ip4 (vat_json_node_t * json, struct in_addr ip4) +{ + json->type = VAT_JSON_IPV4; + json->ip4 = ip4; +} + +static_always_inline void +vat_json_set_ip6 (vat_json_node_t * json, struct in6_addr ip6) +{ + json->type = VAT_JSON_IPV6; + json->ip6 = ip6; +} + +static_always_inline vat_json_node_t * +vat_json_object_add (vat_json_node_t * json, const char *name) +{ + ASSERT (VAT_JSON_OBJECT == json->type); + uword pos = vec_len (json->pairs); + vec_validate (json->pairs, pos); + json->pairs[pos].name = name; + return &json->pairs[pos].value; +} + +static_always_inline vat_json_node_t * +vat_json_array_add (vat_json_node_t * json) +{ + ASSERT (VAT_JSON_ARRAY == json->type); + uword pos = vec_len (json->array); + vec_validate (json->array, pos); + return &json->array[pos]; +} + +static_always_inline vat_json_node_t * +vat_json_object_add_list (vat_json_node_t * json, const char *name) +{ + vat_json_node_t *array_node = vat_json_object_add (json, name); + vat_json_init_array (array_node); + return array_node; +} + +static_always_inline void +vat_json_object_add_string_copy (vat_json_node_t * json, + const char *name, u8 * str) +{ + vat_json_set_string_copy (vat_json_object_add (json, name), str); +} + +static_always_inline void +vat_json_object_add_uint (vat_json_node_t * json, + const char *name, u64 number) +{ + vat_json_set_uint (vat_json_object_add (json, name), number); +} + +static_always_inline void +vat_json_object_add_int (vat_json_node_t * json, const char *name, i64 number) +{ + vat_json_set_int (vat_json_object_add (json, name), number); +} + +static_always_inline void +vat_json_object_add_real (vat_json_node_t * json, const char *name, f64 real) +{ + vat_json_set_real (vat_json_object_add (json, name), real); +} + +static_always_inline void +vat_json_object_add_ip4 (vat_json_node_t * json, + const char *name, struct in_addr ip4) +{ + vat_json_set_ip4 (vat_json_object_add (json, name), ip4); +} + +static_always_inline void +vat_json_object_add_ip6 (vat_json_node_t * json, + const char *name, struct in6_addr ip6) +{ + vat_json_set_ip6 (vat_json_object_add (json, name), ip6); +} + +static_always_inline void +vat_json_array_add_int (vat_json_node_t * json, i64 number) +{ + vat_json_set_int (vat_json_array_add (json), number); +} + +static_always_inline void +vat_json_array_add_uint (vat_json_node_t * json, u64 number) +{ + vat_json_set_uint (vat_json_array_add (json), number); +} + +static_always_inline void +vat_json_object_add_bytes (vat_json_node_t * json, + const char *name, u8 * array, uword size) +{ + ASSERT (VAT_JSON_OBJECT == json->type); + vat_json_node_t *json_array = vat_json_object_add (json, name); + vat_json_init_array (json_array); + int i; + for (i = 0; i < size; i++) + { + vat_json_array_add_uint (json_array, array[i]); + } +} + +static_always_inline vat_json_node_t * +vat_json_object_get_element (vat_json_node_t * json, const char *name) +{ + int i = 0; + + ASSERT (VAT_JSON_OBJECT == json->type); + for (i = 0; i < vec_len (json->pairs); i++) + { + if (0 == strcmp (json->pairs[i].name, name)) + { + return &json->pairs[i].value; + } + } + return NULL; +} + +#endif /* __JSON_FORMAT_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/summary_stats_client.c b/src/vpp/api/summary_stats_client.c new file mode 100644 index 00000000..03999567 --- /dev/null +++ b/src/vpp/api/summary_stats_client.c @@ -0,0 +1,302 @@ +/* + *------------------------------------------------------------------ + * summary_stats_client - + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +#define f64_endian(a) +#define f64_print(a,b) + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +vl_shmem_hdr_t *shmem_hdr; + +typedef struct +{ + volatile int sigterm_received; + + struct sockaddr_in send_data_addr; + int send_data_socket; + u8 *display_name; + + /* convenience */ + unix_shared_memory_queue_t *vl_input_queue; + u32 my_client_index; +} test_main_t; + +test_main_t test_main; + +/* + * Satisfy external references when -lvlib is not available. + */ +vlib_main_t vlib_global_main; +vlib_main_t **vlib_mains; + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("vlib_cli_output callled..."); +} + + +static void +vl_api_vnet_summary_stats_reply_t_handler (vl_api_vnet_summary_stats_reply_t * + mp) +{ + test_main_t *tm = &test_main; + static u8 *sb; + int n; + + printf ("total rx pkts %llu, total rx bytes %llu\n", + (unsigned long long) mp->total_pkts[0], + (unsigned long long) mp->total_bytes[0]); + printf ("total tx pkts %llu, total tx bytes %llu\n", + (unsigned long long) mp->total_pkts[1], + (unsigned long long) mp->total_bytes[1]); + printf ("vector rate %.2f\n", mp->vector_rate); + + vec_reset_length (sb); + sb = format (sb, "%v,%.0f,%llu,%llu,%llu,%llu\n%c", + tm->display_name, mp->vector_rate, + (unsigned long long) mp->total_pkts[0], + (unsigned long long) mp->total_bytes[0], + (unsigned long long) mp->total_pkts[1], + (unsigned long long) mp->total_bytes[1], 0); + + n = sendto (tm->send_data_socket, sb, vec_len (sb), + 0, (struct sockaddr *) &tm->send_data_addr, + sizeof (tm->send_data_addr)); + + if (n != vec_len (sb)) + clib_unix_warning ("sendto"); + +} + +#define foreach_api_msg \ +_(VNET_SUMMARY_STATS_REPLY, vnet_summary_stats_reply) + +int +connect_to_vpe (char *name) +{ + int rv = 0; + + rv = vl_client_connect_to_vlib ("/vpe-api", name, 32); + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_api_msg; +#undef _ + + shmem_hdr = api_main.shmem_hdr; + + return rv; +} + +int +disconnect_from_vpe (void) +{ + vl_client_disconnect_from_vlib (); + return 0; +} + +static void +sigterm_handler (int sig) +{ + test_main_t *tm = &test_main; + tm->sigterm_received = 1; +} + +/* Parse an IP4 address %d.%d.%d.%d. */ +uword +unformat_ip4_address (unformat_input_t * input, va_list * args) +{ + u8 *result = va_arg (*args, u8 *); + unsigned a[4]; + + if (!unformat (input, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3])) + return 0; + + if (a[0] >= 256 || a[1] >= 256 || a[2] >= 256 || a[3] >= 256) + return 0; + + result[0] = a[0]; + result[1] = a[1]; + result[2] = a[2]; + result[3] = a[3]; + + return 1; +} + +int +main (int argc, char **argv) +{ + api_main_t *am = &api_main; + test_main_t *tm = &test_main; + vl_api_vnet_get_summary_stats_t *mp; + unformat_input_t _input, *input = &_input; + clib_error_t *error = 0; + ip4_address_t collector_ip; + u8 *display_name = 0; + u16 collector_port = 7654; + + collector_ip.as_u32 = (u32) ~ 0; + + unformat_init_command_line (input, argv); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "collector-ip %U", + unformat_ip4_address, &collector_ip)) + ; + else if (unformat (input, "display-name %v", &display_name)) + ; + else if (unformat (input, "collector-port %d", &collector_port)) + ; + else + { + error = + clib_error_return + (0, "Usage: %s collector-ip \n" + " [display-name ] [collector-port ]\n" + " port defaults to 7654", argv[0]); + break; + } + } + + if (error == 0 && collector_ip.as_u32 == (u32) ~ 0) + error = clib_error_return (0, "collector-ip not set...\n"); + + + if (error) + { + clib_error_report (error); + exit (1); + } + + if (display_name == 0) + { + display_name = format (0, "vpe-to-%d.%d.%d.%d", + collector_ip.as_u8[0], + collector_ip.as_u8[1], + collector_ip.as_u8[2], collector_ip.as_u8[3]); + } + + + connect_to_vpe ("test_client"); + + tm->vl_input_queue = shmem_hdr->vl_input_queue; + tm->my_client_index = am->my_client_index; + tm->display_name = display_name; + + signal (SIGTERM, sigterm_handler); + signal (SIGINT, sigterm_handler); + signal (SIGQUIT, sigterm_handler); + + /* data (multicast) RX socket */ + tm->send_data_socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (tm->send_data_socket < 0) + { + clib_unix_warning (0, "data_rx_socket"); + exit (1); + } + + memset (&tm->send_data_addr, 0, sizeof (tm->send_data_addr)); + tm->send_data_addr.sin_family = AF_INET; + tm->send_data_addr.sin_addr.s_addr = collector_ip.as_u32; + tm->send_data_addr.sin_port = htons (collector_port); + + fformat (stdout, "Send SIGINT or SIGTERM to quit...\n"); + + while (1) + { + sleep (5); + + if (tm->sigterm_received) + break; + /* Poll for stats */ + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_VNET_GET_SUMMARY_STATS); + mp->client_index = tm->my_client_index; + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); + } + + fformat (stdout, "Exiting...\n"); + + disconnect_from_vpe (); + exit (0); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/test_client.c b/src/vpp/api/test_client.c new file mode 100644 index 00000000..5c568950 --- /dev/null +++ b/src/vpp/api/test_client.c @@ -0,0 +1,1531 @@ +/* + *------------------------------------------------------------------ + * api.c - message handler registration + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include + +#define f64_endian(a) +#define f64_print(a,b) + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +vl_shmem_hdr_t *shmem_hdr; + +typedef struct +{ + int link_events_on; + int stats_on; + int oam_events_on; + + /* convenience */ + unix_shared_memory_queue_t *vl_input_queue; + u32 my_client_index; +} test_main_t; + +test_main_t test_main; + +/* + * Satisfy external references when -lvlib is not available. + */ +vlib_main_t vlib_global_main; +vlib_main_t **vlib_mains; + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("vlib_cli_output callled..."); +} + +u8 * +format_ethernet_address (u8 * s, va_list * args) +{ + u8 *a = va_arg (*args, u8 *); + + return format (s, "%02x:%02x:%02x:%02x:%02x:%02x", + a[0], a[1], a[2], a[3], a[4], a[5]); +} + +static void +vl_api_sw_interface_details_t_handler (vl_api_sw_interface_details_t * mp) +{ + char *duplex, *speed; + + switch (mp->link_duplex << VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT) + { + case VNET_HW_INTERFACE_FLAG_HALF_DUPLEX: + duplex = "half"; + break; + case VNET_HW_INTERFACE_FLAG_FULL_DUPLEX: + duplex = "full"; + break; + default: + duplex = "bogus"; + break; + } + switch (mp->link_speed << VNET_HW_INTERFACE_FLAG_SPEED_SHIFT) + { + case VNET_HW_INTERFACE_FLAG_SPEED_10M: + speed = "10Mbps"; + break; + case VNET_HW_INTERFACE_FLAG_SPEED_100M: + speed = "100Mbps"; + break; + case VNET_HW_INTERFACE_FLAG_SPEED_1G: + speed = "1Gbps"; + break; + case VNET_HW_INTERFACE_FLAG_SPEED_10G: + speed = "10Gbps"; + break; + case VNET_HW_INTERFACE_FLAG_SPEED_40G: + speed = "40Gbps"; + break; + case VNET_HW_INTERFACE_FLAG_SPEED_100G: + speed = "100Gbps"; + break; + default: + speed = "bogus"; + break; + } + fformat (stdout, "details: %s sw_if_index %d sup_sw_if_index %d " + "link_duplex %s link_speed %s", + mp->interface_name, ntohl (mp->sw_if_index), + ntohl (mp->sup_sw_if_index), duplex, speed); + + if (mp->l2_address_length) + fformat (stdout, " l2 address: %U\n", + format_ethernet_address, mp->l2_address); + else + fformat (stdout, "\n"); +} + +static void +vl_api_sw_interface_set_flags_t_handler (vl_api_sw_interface_set_flags_t * mp) +{ + fformat (stdout, "set flags: sw_if_index %d, admin %s link %s\n", + ntohl (mp->sw_if_index), + mp->admin_up_down ? "up" : "down", + mp->link_up_down ? "up" : "down"); +} + +static void + vl_api_sw_interface_set_flags_reply_t_handler + (vl_api_sw_interface_set_flags_reply_t * mp) +{ + fformat (stdout, "set flags reply: reply %d\n", ntohl (mp->retval)); +} + +static void + vl_api_want_interface_events_reply_t_handler + (vl_api_want_interface_events_reply_t * mp) +{ +} + +static void +vl_api_want_stats_reply_t_handler (vl_api_want_stats_reply_t * mp) +{ + fformat (stdout, "want stats reply %d\n", ntohl (mp->retval)); +} + +static void +vl_api_want_oam_events_reply_t_handler (vl_api_want_oam_events_reply_t * mp) +{ + fformat (stdout, "want oam reply %d\n", ntohl (mp->retval)); +} + +static void +vl_api_ip_add_del_route_reply_t_handler (vl_api_ip_add_del_route_reply_t * mp) +{ + fformat (stdout, "add_route reply %d\n", ntohl (mp->retval)); +} + +static void + vl_api_sw_interface_add_del_address_reply_t_handler + (vl_api_sw_interface_add_del_address_reply_t * mp) +{ + fformat (stdout, "add_del_address reply %d\n", ntohl (mp->retval)); +} + +static void + vl_api_sw_interface_set_table_reply_t_handler + (vl_api_sw_interface_set_table_reply_t * mp) +{ + fformat (stdout, "set_table reply %d\n", ntohl (mp->retval)); +} + +static void +vl_api_tap_connect_reply_t_handler (vl_api_tap_connect_reply_t * mp) +{ + fformat (stdout, "tap connect reply %d, sw_if_index %d\n", + ntohl (mp->retval), ntohl (mp->sw_if_index)); +} + +static void +vl_api_create_vlan_subif_reply_t_handler (vl_api_create_vlan_subif_reply_t * + mp) +{ + fformat (stdout, "create vlan subif reply %d, sw_if_index %d\n", + ntohl (mp->retval), ntohl (mp->sw_if_index)); +} + +static void vl_api_proxy_arp_add_del_reply_t_handler + (vl_api_proxy_arp_add_del_reply_t * mp) +{ + fformat (stdout, "add del proxy arp reply %d\n", ntohl (mp->retval)); +} + +static void vl_api_proxy_arp_intfc_enable_disable_reply_t_handler + (vl_api_proxy_arp_intfc_enable_disable_reply_t * mp) +{ + fformat (stdout, "proxy arp intfc ena/dis reply %d\n", ntohl (mp->retval)); +} + +static void vl_api_ip_neighbor_add_del_reply_t_handler + (vl_api_ip_neighbor_add_del_reply_t * mp) +{ + fformat (stdout, "ip neighbor add del reply %d\n", ntohl (mp->retval)); +} + +static void +vl_api_vnet_interface_counters_t_handler (vl_api_vnet_interface_counters_t * + mp) +{ + char *counter_name; + u32 count, sw_if_index; + int i; + + count = ntohl (mp->count); + sw_if_index = ntohl (mp->first_sw_if_index); + if (mp->is_combined == 0) + { + u64 *vp, v; + vp = (u64 *) mp->data; + + switch (mp->vnet_counter_type) + { + case VNET_INTERFACE_COUNTER_DROP: + counter_name = "drop"; + break; + case VNET_INTERFACE_COUNTER_PUNT: + counter_name = "punt"; + break; + case VNET_INTERFACE_COUNTER_IP4: + counter_name = "ip4"; + break; + case VNET_INTERFACE_COUNTER_IP6: + counter_name = "ip6"; + break; + case VNET_INTERFACE_COUNTER_RX_NO_BUF: + counter_name = "rx-no-buf"; + break; + case VNET_INTERFACE_COUNTER_RX_MISS: + counter_name = "rx-miss"; + break; + case VNET_INTERFACE_COUNTER_RX_ERROR: + counter_name = "rx-error"; + break; + case VNET_INTERFACE_COUNTER_TX_ERROR: + counter_name = "tx-error (fifo-full)"; + break; + default: + counter_name = "bogus"; + break; + } + for (i = 0; i < count; i++) + { + v = clib_mem_unaligned (vp, u64); + v = clib_net_to_host_u64 (v); + vp++; + fformat (stdout, "%d.%s %lld\n", sw_if_index, counter_name, v); + sw_if_index++; + } + } + else + { + vlib_counter_t *vp; + u64 packets, bytes; + vp = (vlib_counter_t *) mp->data; + + switch (mp->vnet_counter_type) + { + case VNET_INTERFACE_COUNTER_RX: + counter_name = "rx"; + break; + case VNET_INTERFACE_COUNTER_TX: + counter_name = "tx"; + break; + default: + counter_name = "bogus"; + break; + } + for (i = 0; i < count; i++) + { + packets = clib_mem_unaligned (&vp->packets, u64); + packets = clib_net_to_host_u64 (packets); + bytes = clib_mem_unaligned (&vp->bytes, u64); + bytes = clib_net_to_host_u64 (bytes); + vp++; + fformat (stdout, "%d.%s.packets %lld\n", + sw_if_index, counter_name, packets); + fformat (stdout, "%d.%s.bytes %lld\n", + sw_if_index, counter_name, bytes); + sw_if_index++; + } + } +} + +/* Format an IP4 address. */ +u8 * +format_ip4_address (u8 * s, va_list * args) +{ + u8 *a = va_arg (*args, u8 *); + return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]); +} + +/* Format an IP4 route destination and length. */ +u8 * +format_ip4_address_and_length (u8 * s, va_list * args) +{ + u8 *a = va_arg (*args, u8 *); + u8 l = va_arg (*args, u32); + return format (s, "%U/%d", format_ip4_address, a, l); +} + +static void +vl_api_vnet_ip4_fib_counters_t_handler (vl_api_vnet_ip4_fib_counters_t * mp) +{ + int i; + vl_api_ip4_fib_counter_t *ctrp; + u32 count; + + count = ntohl (mp->count); + + fformat (stdout, "fib id %d, count this msg %d\n", + ntohl (mp->vrf_id), count); + + ctrp = mp->c; + for (i = 0; i < count; i++) + { + fformat (stdout, "%U: %lld packets, %lld bytes\n", + format_ip4_address_and_length, &ctrp->address, + (u32) ctrp->address_length, + clib_net_to_host_u64 (ctrp->packets), + clib_net_to_host_u64 (ctrp->bytes)); + ctrp++; + } +} + +/* Format an IP6 address. */ +u8 * +format_ip6_address (u8 * s, va_list * args) +{ + ip6_address_t *a = va_arg (*args, ip6_address_t *); + u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon; + + i_max_n_zero = ARRAY_LEN (a->as_u16); + max_n_zeros = 0; + i_first_zero = i_max_n_zero; + n_zeros = 0; + for (i = 0; i < ARRAY_LEN (a->as_u16); i++) + { + u32 is_zero = a->as_u16[i] == 0; + if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16)) + { + i_first_zero = i; + n_zeros = 0; + } + n_zeros += is_zero; + if ((!is_zero && n_zeros > max_n_zeros) + || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros)) + { + i_max_n_zero = i_first_zero; + max_n_zeros = n_zeros; + i_first_zero = ARRAY_LEN (a->as_u16); + n_zeros = 0; + } + } + + last_double_colon = 0; + for (i = 0; i < ARRAY_LEN (a->as_u16); i++) + { + if (i == i_max_n_zero && max_n_zeros > 1) + { + s = format (s, "::"); + i += max_n_zeros - 1; + last_double_colon = 1; + } + else + { + s = format (s, "%s%x", + (last_double_colon || i == 0) ? "" : ":", + clib_net_to_host_u16 (a->as_u16[i])); + last_double_colon = 0; + } + } + + return s; +} + +/* Format an IP6 route destination and length. */ +u8 * +format_ip6_address_and_length (u8 * s, va_list * args) +{ + ip6_address_t *a = va_arg (*args, ip6_address_t *); + u8 l = va_arg (*args, u32); + return format (s, "%U/%d", format_ip6_address, a, l); +} + +static void +vl_api_vnet_ip6_fib_counters_t_handler (vl_api_vnet_ip6_fib_counters_t * mp) +{ + int i; + vl_api_ip6_fib_counter_t *ctrp; + u32 count; + + count = ntohl (mp->count); + + fformat (stdout, "fib id %d, count this msg %d\n", + ntohl (mp->vrf_id), count); + + ctrp = mp->c; + for (i = 0; i < count; i++) + { + fformat (stdout, "%U: %lld packets, %lld bytes\n", + format_ip6_address_and_length, &ctrp->address, + (u32) ctrp->address_length, + clib_net_to_host_u64 (ctrp->packets), + clib_net_to_host_u64 (ctrp->bytes)); + ctrp++; + } +} + +static void +vl_api_oam_event_t_handler (vl_api_oam_event_t * mp) +{ + fformat (stdout, "OAM: %U now %s\n", + format_ip4_address, &mp->dst_address, + mp->state == 1 ? "alive" : "dead"); +} + +static void +vl_api_oam_add_del_reply_t_handler (vl_api_oam_add_del_reply_t * mp) +{ + fformat (stdout, "oam add del reply %d\n", ntohl (mp->retval)); +} + +static void +vl_api_reset_fib_reply_t_handler (vl_api_reset_fib_reply_t * mp) +{ + fformat (stdout, "fib reset reply %d\n", ntohl (mp->retval)); +} + +static void +vl_api_dhcp_proxy_set_vss_reply_t_handler (vl_api_dhcp_proxy_set_vss_reply_t * + mp) +{ + fformat (stdout, "dhcp proxy set vss reply %d\n", ntohl (mp->retval)); +} + +static void +vl_api_dhcp_proxy_config_reply_t_handler (vl_api_dhcp_proxy_config_reply_t * + mp) +{ + fformat (stdout, "dhcp proxy config reply %d\n", ntohl (mp->retval)); +} + +static void +vl_api_set_ip_flow_hash_reply_t_handler (vl_api_set_ip_flow_hash_reply_t * mp) +{ + fformat (stdout, "set ip flow hash reply %d\n", ntohl (mp->retval)); +} + +static void + vl_api_sw_interface_ip6nd_ra_config_reply_t_handler + (vl_api_sw_interface_ip6nd_ra_config_reply_t * mp) +{ + fformat (stdout, "ip6 nd ra-config reply %d\n", ntohl (mp->retval)); +} + +static void + vl_api_sw_interface_ip6nd_ra_prefix_reply_t_handler + (vl_api_sw_interface_ip6nd_ra_prefix_reply_t * mp) +{ + fformat (stdout, "ip6 nd ra-prefix reply %d\n", ntohl (mp->retval)); +} + +static void + vl_api_sw_interface_ip6_enable_disable_reply_t_handler + (vl_api_sw_interface_ip6_enable_disable_reply_t * mp) +{ + fformat (stdout, "ip6 enable/disable reply %d\n", ntohl (mp->retval)); +} + +static void + vl_api_sw_interface_ip6_set_link_local_address_reply_t_handler + (vl_api_sw_interface_ip6_set_link_local_address_reply_t * mp) +{ + fformat (stdout, "ip6 set link-local address reply %d\n", + ntohl (mp->retval)); +} + +static void vl_api_create_loopback_reply_t_handler + (vl_api_create_loopback_reply_t * mp) +{ + fformat (stdout, "create loopback status %d, sw_if_index %d\n", + ntohl (mp->retval), ntohl (mp->sw_if_index)); +} + +static void +vl_api_sr_tunnel_add_del_reply_t_handler (vl_api_sr_tunnel_add_del_reply_t * + mp) +{ + fformat (stdout, "sr tunnel add/del reply %d\n", ntohl (mp->retval)); +} + +static void vl_api_l2_patch_add_del_reply_t_handler + (vl_api_l2_patch_add_del_reply_t * mp) +{ + fformat (stdout, "l2 patch reply %d\n", ntohl (mp->retval)); +} + +static void vl_api_sw_interface_set_l2_xconnect_reply_t_handler + (vl_api_sw_interface_set_l2_xconnect_reply_t * mp) +{ + fformat (stdout, "l2_xconnect reply %d\n", ntohl (mp->retval)); +} + +static void vl_api_sw_interface_set_l2_bridge_reply_t_handler + (vl_api_sw_interface_set_l2_bridge_reply_t * mp) +{ + fformat (stdout, "l2_bridge reply %d\n", ntohl (mp->retval)); +} + +static void +noop_handler (void *notused) +{ +} + +#define vl_api_vnet_ip4_fib_counters_t_endian noop_handler +#define vl_api_vnet_ip4_fib_counters_t_print noop_handler +#define vl_api_vnet_ip6_fib_counters_t_endian noop_handler +#define vl_api_vnet_ip6_fib_counters_t_print noop_handler + +#define foreach_api_msg \ +_(SW_INTERFACE_DETAILS, sw_interface_details) \ +_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ +_(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply) \ +_(WANT_INTERFACE_EVENTS_REPLY, want_interface_events_reply) \ +_(WANT_STATS_REPLY, want_stats_reply) \ +_(WANT_OAM_EVENTS_REPLY, want_oam_events_reply) \ +_(OAM_EVENT, oam_event) \ +_(OAM_ADD_DEL_REPLY, oam_add_del_reply) \ +_(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ +_(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ +_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ +_(IP_ADD_DEL_ROUTE_REPLY, ip_add_del_route_reply) \ +_(SW_INTERFACE_ADD_DEL_ADDRESS_REPLY, sw_interface_add_del_address_reply) \ +_(SW_INTERFACE_SET_TABLE_REPLY, sw_interface_set_table_reply) \ +_(TAP_CONNECT_REPLY, tap_connect_reply) \ +_(CREATE_VLAN_SUBIF_REPLY, create_vlan_subif_reply) \ +_(PROXY_ARP_ADD_DEL_REPLY, proxy_arp_add_del_reply) \ +_(PROXY_ARP_INTFC_ENABLE_DISABLE_REPLY, proxy_arp_intfc_enable_disable_reply) \ +_(IP_NEIGHBOR_ADD_DEL_REPLY, ip_neighbor_add_del_reply) \ +_(RESET_FIB_REPLY, reset_fib_reply) \ +_(DHCP_PROXY_CONFIG_REPLY, dhcp_proxy_config_reply) \ +_(DHCP_PROXY_SET_VSS_REPLY, dhcp_proxy_set_vss_reply) \ +_(SET_IP_FLOW_HASH_REPLY, set_ip_flow_hash_reply) \ +_(SW_INTERFACE_IP6ND_RA_CONFIG_REPLY, sw_interface_ip6nd_ra_config_reply) \ +_(SW_INTERFACE_IP6ND_RA_PREFIX_REPLY, sw_interface_ip6nd_ra_prefix_reply) \ +_(SW_INTERFACE_IP6_ENABLE_DISABLE_REPLY, sw_interface_ip6_enable_disable_reply) \ +_(SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS_REPLY, sw_interface_ip6_set_link_local_address_reply) \ + _(CREATE_LOOPBACK_REPLY, create_loopback_reply) \ +_(L2_PATCH_ADD_DEL_REPLY, l2_patch_add_del_reply) \ +_(SR_TUNNEL_ADD_DEL_REPLY,sr_tunnel_add_del_reply) \ +_(SW_INTERFACE_SET_L2_XCONNECT_REPLY, sw_interface_set_l2_xconnect_reply) \ +_(SW_INTERFACE_SET_L2_BRIDGE_REPLY, sw_interface_set_l2_bridge_reply) + +int +connect_to_vpe (char *name) +{ + int rv = 0; + + rv = vl_client_connect_to_vlib ("/vpe-api", name, 32); + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_api_msg; +#undef _ + + shmem_hdr = api_main.shmem_hdr; + + return rv; +} + +int +disconnect_from_vpe (void) +{ + vl_client_disconnect_from_vlib (); + return 0; +} + +void +link_up_down_enable_disable (test_main_t * tm, int enable) +{ + vl_api_want_interface_events_t *mp; + + /* Request admin / link up down messages */ + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_WANT_INTERFACE_EVENTS); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->enable_disable = enable; + mp->pid = getpid (); + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); + tm->link_events_on = enable; +} + +void +stats_enable_disable (test_main_t * tm, int enable) +{ + vl_api_want_stats_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_WANT_STATS); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->enable_disable = enable; + mp->pid = getpid (); + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); + tm->stats_on = enable; +} + +void +oam_events_enable_disable (test_main_t * tm, int enable) +{ + vl_api_want_oam_events_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_WANT_OAM_EVENTS); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->enable_disable = enable; + mp->pid = getpid (); + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); + tm->oam_events_on = enable; +} + +void +oam_add_del (test_main_t * tm, int is_add) +{ + vl_api_oam_add_del_t *mp; + ip4_address_t tmp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_OAM_ADD_DEL); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->is_add = is_add; + + tmp.as_u32 = ntohl (0xc0a80101); /* 192.168.1.1 */ + clib_memcpy (mp->src_address, tmp.as_u8, 4); + + tmp.as_u32 = ntohl (0xc0a80103); /* 192.168.1.3 */ + clib_memcpy (mp->dst_address, tmp.as_u8, 4); + + mp->vrf_id = 0; + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +dump (test_main_t * tm) +{ + vl_api_sw_interface_dump_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_DUMP); + mp->client_index = tm->my_client_index; + mp->name_filter_valid = 1; + strncpy ((char *) mp->name_filter, "eth", sizeof (mp->name_filter) - 1); + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +add_del_ip4_route (test_main_t * tm, int enable_disable) +{ + vl_api_ip_add_del_route_t *mp; + u32 tmp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IP_ADD_DEL_ROUTE); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->table_id = ntohl (0); + mp->create_vrf_if_needed = 1; + + mp->next_hop_sw_if_index = ntohl (5); + mp->is_add = enable_disable; + mp->next_hop_weight = 1; + + /* Next hop: 6.0.0.1 */ + tmp = ntohl (0x06000001); + clib_memcpy (mp->next_hop_address, &tmp, sizeof (tmp)); + + /* Destination: 10.0.0.1/32 */ + tmp = ntohl (0x0); + clib_memcpy (mp->dst_address, &tmp, sizeof (tmp)); + mp->dst_address_length = 0; + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +add_del_ip6_route (test_main_t * tm, int enable_disable) +{ + vl_api_ip_add_del_route_t *mp; + u64 tmp[2]; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IP_ADD_DEL_ROUTE); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->next_hop_sw_if_index = ntohl (5); + mp->is_add = enable_disable; + mp->is_ipv6 = 1; + mp->next_hop_weight = 1; + mp->dst_address_length = 64; + + /* add/del dabe::/64 via db01::11 */ + + tmp[0] = clib_host_to_net_u64 (0xdabe000000000000ULL); + tmp[1] = clib_host_to_net_u64 (0x0ULL); + clib_memcpy (mp->dst_address, &tmp[0], 8); + clib_memcpy (&mp->dst_address[8], &tmp[1], 8); + + tmp[0] = clib_host_to_net_u64 (0xdb01000000000000ULL); + tmp[1] = clib_host_to_net_u64 (0x11ULL); + clib_memcpy (mp->next_hop_address, &tmp[0], 8); + clib_memcpy (&mp->next_hop_address[8], &tmp[1], 8); + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +add_del_interface_address (test_main_t * tm, int enable_disable) +{ + vl_api_sw_interface_add_del_address_t *mp; + u32 tmp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_ADD_DEL_ADDRESS); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->sw_if_index = ntohl (5); + mp->is_add = enable_disable; + mp->address_length = 8; + + tmp = ntohl (0x01020304); + clib_memcpy (mp->address, &tmp, 4); + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +add_del_v6_interface_address (test_main_t * tm, int enable_disable) +{ + vl_api_sw_interface_add_del_address_t *mp; + u64 tmp[2]; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_ADD_DEL_ADDRESS); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->is_ipv6 = 1; + mp->sw_if_index = ntohl (5); + mp->is_add = enable_disable; + mp->address_length = 64; + + tmp[0] = clib_host_to_net_u64 (0xdb01000000000000ULL); + tmp[1] = clib_host_to_net_u64 (0x11ULL); + + clib_memcpy (mp->address, &tmp[0], 8); + clib_memcpy (&mp->address[8], &tmp[1], 8); + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +del_all_interface_addresses (test_main_t * tm) +{ + vl_api_sw_interface_add_del_address_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_ADD_DEL_ADDRESS); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->sw_if_index = ntohl (5); + mp->del_all = 1; + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +set_interface_table (test_main_t * tm, int is_ipv6, u32 vrf_id) +{ + vl_api_sw_interface_set_table_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_TABLE); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->sw_if_index = ntohl (5); + mp->is_ipv6 = is_ipv6; + mp->vrf_id = ntohl (vrf_id); + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +connect_unix_tap (test_main_t * tm, char *name) +{ + vl_api_tap_connect_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_TAP_CONNECT); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + strncpy ((char *) mp->tap_name, name, sizeof (mp->tap_name) - 1); + mp->use_random_mac = 1; + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +create_vlan_subif (test_main_t * tm, u32 vlan_id) +{ + vl_api_create_vlan_subif_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_CREATE_VLAN_SUBIF); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->sw_if_index = ntohl (5); + mp->vlan_id = ntohl (vlan_id); + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +add_del_proxy_arp (test_main_t * tm, int is_add) +{ + vl_api_proxy_arp_add_del_t *mp; + u32 tmp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_PROXY_ARP_ADD_DEL); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->vrf_id = ntohl (11); + mp->is_add = is_add; + + /* proxy fib 11, 1.1.1.1 -> 1.1.1.10 */ + tmp = ntohl (0x01010101); + clib_memcpy (mp->low_address, &tmp, 4); + + tmp = ntohl (0x0101010a); + clib_memcpy (mp->hi_address, &tmp, 4); + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +proxy_arp_intfc_enable_disable (test_main_t * tm, int enable_disable) +{ + vl_api_proxy_arp_intfc_enable_disable_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_PROXY_ARP_INTFC_ENABLE_DISABLE); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->sw_if_index = ntohl (6); + mp->enable_disable = enable_disable; + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +add_ip4_neighbor (test_main_t * tm, int add_del) +{ + vl_api_ip_neighbor_add_del_t *mp; + u32 tmp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_ADD_DEL); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->vrf_id = ntohl (11); + mp->sw_if_index = ntohl (6); + mp->is_add = add_del; + + memset (mp->mac_address, 0xbe, sizeof (mp->mac_address)); + + tmp = ntohl (0x0101010a); + clib_memcpy (mp->dst_address, &tmp, 4); + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +add_ip6_neighbor (test_main_t * tm, int add_del) +{ + vl_api_ip_neighbor_add_del_t *mp; + u64 tmp[2]; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_ADD_DEL); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->vrf_id = ntohl (11); + mp->sw_if_index = ntohl (6); + mp->is_add = add_del; + mp->is_ipv6 = 1; + + memset (mp->mac_address, 0xbe, sizeof (mp->mac_address)); + + tmp[0] = clib_host_to_net_u64 (0xdb01000000000000ULL); + tmp[1] = clib_host_to_net_u64 (0x11ULL); + + clib_memcpy (mp->dst_address, &tmp[0], 8); + clib_memcpy (&mp->dst_address[8], &tmp[1], 8); + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +reset_fib (test_main_t * tm, u8 is_ip6) +{ + vl_api_reset_fib_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_RESET_FIB); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->vrf_id = ntohl (11); + mp->is_ipv6 = is_ip6; + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +dhcpv6_set_vss (test_main_t * tm) +{ + vl_api_dhcp_proxy_set_vss_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_SET_VSS); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->oui = ntohl (6); + mp->fib_id = ntohl (60); + mp->is_add = 1; + mp->is_ipv6 = 1; + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +dhcpv4_set_vss (test_main_t * tm) +{ + vl_api_dhcp_proxy_set_vss_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_SET_VSS); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->oui = ntohl (4); + mp->fib_id = ntohl (40); + mp->is_add = 1; + mp->is_ipv6 = 0; + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +dhcp_set_vss (test_main_t * tm) +{ + dhcpv4_set_vss (tm); + dhcpv6_set_vss (tm); +} + +void +dhcp_set_proxy (test_main_t * tm, int ipv6) +{ + vl_api_dhcp_proxy_config_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_CONFIG); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->vrf_id = ntohl (0); + mp->is_ipv6 = ipv6; + mp->insert_circuit_id = 1; + mp->is_add = 1; + mp->dhcp_server[0] = 0x20; + mp->dhcp_server[1] = 0x01; + mp->dhcp_server[2] = 0xab; + mp->dhcp_server[3] = 0xcd; + mp->dhcp_server[4] = 0x12; + mp->dhcp_server[5] = 0x34; + mp->dhcp_server[6] = 0xfe; + mp->dhcp_server[7] = 0xdc; + mp->dhcp_server[14] = 0; + mp->dhcp_server[15] = 0x2; + + mp->dhcp_src_address[0] = 0x20; + mp->dhcp_src_address[1] = 0x01; + mp->dhcp_src_address[2] = 0xab; + mp->dhcp_src_address[3] = 0xcd; + mp->dhcp_src_address[4] = 0x12; + mp->dhcp_src_address[5] = 0x34; + mp->dhcp_src_address[6] = 0x56; + mp->dhcp_src_address[7] = 0x78; + mp->dhcp_src_address[14] = 0; + mp->dhcp_src_address[15] = 0x2; + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +set_ip_flow_hash (test_main_t * tm, u8 is_ip6) +{ + vl_api_set_ip_flow_hash_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SET_IP_FLOW_HASH); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->vrf_id = 0; + mp->is_ipv6 = is_ip6; + mp->dst = 1; + mp->reverse = 1; + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +ip6nd_ra_config (test_main_t * tm, int is_no) +{ + vl_api_sw_interface_ip6nd_ra_config_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->sw_if_index = ntohl (5); + mp->is_no = is_no; + + mp->suppress = 1; + + + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6ND_RA_CONFIG); + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +ip6nd_ra_prefix (test_main_t * tm, int is_no) +{ + vl_api_sw_interface_ip6nd_ra_prefix_t *mp; + u64 tmp[2]; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->sw_if_index = ntohl (5); + mp->is_no = is_no; + + mp->use_default = 1; + + + tmp[0] = clib_host_to_net_u64 (0xdb01000000000000ULL); + tmp[1] = clib_host_to_net_u64 (0x11ULL); + + + clib_memcpy (mp->address, &tmp[0], 8); + clib_memcpy (&mp->address[8], &tmp[1], 8); + + mp->address_length = 64; + + + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6ND_RA_PREFIX); + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +ip6_enable_disable (test_main_t * tm, int enable) +{ + vl_api_sw_interface_ip6_enable_disable_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->sw_if_index = ntohl (5); + mp->enable = (enable == 1);; + + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6_ENABLE_DISABLE); + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +loop_create (test_main_t * tm) +{ + vl_api_create_loopback_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + + mp->_vl_msg_id = ntohs (VL_API_CREATE_LOOPBACK); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +ip6_set_link_local_address (test_main_t * tm) +{ + vl_api_sw_interface_ip6_set_link_local_address_t *mp; + u64 tmp[2]; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->sw_if_index = ntohl (5); + + tmp[0] = clib_host_to_net_u64 (0xfe80000000000000ULL); + tmp[1] = clib_host_to_net_u64 (0x11ULL); + + clib_memcpy (mp->address, &tmp[0], 8); + clib_memcpy (&mp->address[8], &tmp[1], 8); + + mp->address_length = 64; + + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS); + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + + +void +set_flags (test_main_t * tm, int up_down) +{ + vl_api_sw_interface_set_flags_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_FLAGS); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->sw_if_index = ntohl (5); + mp->admin_up_down = up_down; + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); + +} + +void +l2_patch_add_del (test_main_t * tm, int is_add) +{ + vl_api_l2_patch_add_del_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_L2_PATCH_ADD_DEL); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->is_add = is_add; + mp->rx_sw_if_index = ntohl (1); + mp->tx_sw_if_index = ntohl (2); + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +l2_xconnect (test_main_t * tm) +{ + vl_api_sw_interface_set_l2_xconnect_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_L2_XCONNECT); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->rx_sw_if_index = ntohl (5); + mp->tx_sw_if_index = ntohl (6); + mp->enable = 1; + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +void +l2_bridge (test_main_t * tm) +{ + vl_api_sw_interface_set_l2_bridge_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_L2_BRIDGE); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + mp->rx_sw_if_index = ntohl (5); + mp->bd_id = ntohl (6); + mp->bvi = ntohl (1); + mp->shg = ntohl (0); + mp->enable = 1; + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +int +main (int argc, char **argv) +{ + api_main_t *am = &api_main; + test_main_t *tm = &test_main; + int ch; + + connect_to_vpe ("test_client"); + + tm->vl_input_queue = shmem_hdr->vl_input_queue; + tm->my_client_index = am->my_client_index; + + fformat (stdout, "Type 'h' for help, 'q' to quit...\n"); + + while (1) + { + ch = getchar (); + switch (ch) + { + case 'q': + goto done; + case 'd': + dump (tm); + break; + case 'L': + link_up_down_enable_disable (tm, 1 /* enable_disable */ ); + break; + case 'l': + link_up_down_enable_disable (tm, 0 /* enable_disable */ ); + break; + case 'S': + stats_enable_disable (tm, 1 /* enable_disable */ ); + break; + case 's': + stats_enable_disable (tm, 0 /* enable_disable */ ); + break; + case '3': + add_del_ip4_route (tm, 0 /* add */ ); + break; + case '4': + add_del_ip4_route (tm, 1 /* add */ ); + break; + case '5': + add_del_ip6_route (tm, 0 /* add */ ); + break; + case '6': + add_del_ip6_route (tm, 1 /* add */ ); + break; + case 'A': + add_del_interface_address (tm, 1 /* add */ ); + break; + case 'a': + add_del_interface_address (tm, 0 /* add */ ); + break; + case 'B': + add_del_v6_interface_address (tm, 1 /* add */ ); + break; + case 'b': + add_del_v6_interface_address (tm, 0 /* add */ ); + break; + case 'E': + l2_patch_add_del (tm, 1 /* is_add */ ); + break; + case 'e': + l2_patch_add_del (tm, 0 /* is_add */ ); + break; + case 'z': + del_all_interface_addresses (tm); + break; + case 't': + set_interface_table (tm, 0 /* is_ipv6 */ , + 11 /* my amp goes to 11 */ ); + break; + case 'T': + set_interface_table (tm, 1 /* is_ipv6 */ , + 12 /* my amp goes to 12 */ ); + break; + + case 'u': + create_vlan_subif (tm, 123); + break; + + case 'c': + connect_unix_tap (tm, "foo"); + break; + + case 'n': + add_ip4_neighbor (tm, 1 /* is_add */ ); + add_ip6_neighbor (tm, 1 /* is_add */ ); + break; + + case 'N': + add_ip4_neighbor (tm, 0 /* is_add */ ); + add_ip6_neighbor (tm, 0 /* is_add */ ); + break; + + case 'p': + add_del_proxy_arp (tm, 1 /* add */ ); + break; + + case 'i': + proxy_arp_intfc_enable_disable (tm, 1 /* enable */ ); + break; + + case 'O': + oam_events_enable_disable (tm, 0 /* enable */ ); + break; + + case 'o': + oam_events_enable_disable (tm, 1 /* enable */ ); + break; + + case '0': + oam_add_del (tm, 0 /* is_add */ ); + break; + + case '1': + oam_add_del (tm, 1 /* is_add */ ); + break; + + case 'r': + reset_fib (tm, 0 /* is_ip6 */ ); + break; + + case 'R': + reset_fib (tm, 1 /* is_ip6 */ ); + break; + + case 'j': + dhcp_set_vss (tm); + break; + + case 'k': + dhcp_set_proxy (tm, 0); + break; + + case 'K': + dhcp_set_proxy (tm, 1 /*ipv6 */ ); + break; + + case 'v': + set_ip_flow_hash (tm, 0 /* is_ip6 */ ); + break; + + case 'V': + ip6_set_link_local_address (tm); + break; + + case 'w': + ip6_enable_disable (tm, 1 /* enable */ ); + break; + + case 'W': + ip6_enable_disable (tm, 0 /* disable */ ); + break; + + case 'x': + ip6nd_ra_config (tm, 0 /* is_no */ ); + break; + case 'X': + ip6nd_ra_config (tm, 1 /* is_no */ ); + break; + case 'y': + ip6nd_ra_prefix (tm, 0 /* is_no */ ); + break; + case 'Y': + ip6nd_ra_prefix (tm, 1 /* is_no */ ); + break; + + case '7': + loop_create (tm); + break; + + case 'F': + set_flags (tm, 1 /* up_down */ ); + break; + + case 'f': + set_flags (tm, 0 /* up_down */ ); + break; + + case '@': + l2_xconnect (tm); + break; + + case '#': + l2_bridge (tm); + break; + + case 'h': + fformat (stdout, "q=quit,d=dump,L=link evts on,l=link evts off\n"); + fformat (stdout, "S=stats on,s=stats off\n"); + fformat (stdout, "4=add v4 route, 3=del v4 route\n"); + fformat (stdout, "6=add v6 route, 5=del v6 route\n"); + fformat (stdout, "A=add v4 intfc route, a=del v4 intfc route\n"); + fformat (stdout, "B=add v6 intfc route, b=del v6 intfc route\n"); + fformat (stdout, "z=del all intfc routes\n"); + fformat (stdout, "t=set v4 intfc table, T=set v6 intfc table\n"); + fformat (stdout, "c=connect unix tap\n"); + fformat (stdout, + "j=set dhcpv4 and v6 link-address/option-82 params\n"); + fformat (stdout, "k=set dhcpv4 relay agent params\n"); + fformat (stdout, "K=set dhcpv6 relay agent params\n"); + fformat (stdout, "E=add l2 patch, e=del l2 patch\n"); + fformat (stdout, "V=ip6 set link-local address \n"); + fformat (stdout, "w=ip6 enable \n"); + fformat (stdout, "W=ip6 disable \n"); + fformat (stdout, "x=ip6 nd config \n"); + fformat (stdout, "X=no ip6 nd config\n"); + fformat (stdout, "y=ip6 nd prefix \n"); + fformat (stdout, "Y=no ip6 nd prefix\n"); + fformat (stdout, "@=l2 xconnect\n"); + fformat (stdout, "#=l2 bridge\n"); + + default: + break; + } + + } + +done: + + if (tm->link_events_on) + link_up_down_enable_disable (tm, 0 /* enable */ ); + if (tm->stats_on) + stats_enable_disable (tm, 0 /* enable */ ); + if (tm->oam_events_on) + oam_events_enable_disable (tm, 0 /* enable */ ); + + disconnect_from_vpe (); + exit (0); +} + +#undef vl_api_version +#define vl_api_version(n,v) static u32 vpe_api_version = v; +#include +#undef vl_api_version + +void +vl_client_add_api_signatures (vl_api_memclnt_create_t * mp) +{ + /* + * Send the main API signature in slot 0. This bit of code must + * match the checks in ../vpe/api/api.c: vl_msg_api_version_check(). + */ + mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/test_ha.c b/src/vpp/api/test_ha.c new file mode 100644 index 00000000..3264d5f9 --- /dev/null +++ b/src/vpp/api/test_ha.c @@ -0,0 +1,249 @@ +/* + *------------------------------------------------------------------ + * api.c - message handler registration + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#define f64_endian(a) +#define f64_print(a,b) + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +vl_shmem_hdr_t *shmem_hdr; + +typedef struct +{ + u32 pings_sent; + u32 pings_replied; + volatile u32 signal_received; + + /* convenience */ + unix_shared_memory_queue_t *vl_input_queue; + u32 my_client_index; + svmdb_client_t *svmdb_client; +} test_main_t; + +test_main_t test_main; + +static void vl_api_control_ping_reply_t_handler + (vl_api_control_ping_reply_t * mp) +{ + test_main_t *tm = &test_main; + + fformat (stdout, "control ping reply from pid %d\n", ntohl (mp->vpe_pid)); + tm->pings_replied++; +} + +vlib_main_t vlib_global_main; +vlib_main_t **vlib_mains; + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG: vlib_cli_output callled..."); +} + +#define foreach_api_msg \ +_(CONTROL_PING_REPLY,control_ping_reply) + +void +ping (test_main_t * tm) +{ + vl_api_control_ping_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_CONTROL_PING); + mp->client_index = tm->my_client_index; + mp->context = 0xdeadbeef; + + vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); +} + +static void +noop_handler (void *notused) +{ +} + +int +connect_to_vpe (char *name) +{ + int rv = 0; + test_main_t *tm = &test_main; + api_main_t *am = &api_main; + + rv = vl_client_connect_to_vlib ("/vpe-api", name, 32); + if (rv < 0) + return rv; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_api_msg; +#undef _ + + shmem_hdr = api_main.shmem_hdr; + tm->vl_input_queue = shmem_hdr->vl_input_queue; + tm->my_client_index = am->my_client_index; + return 0; +} + +int +disconnect_from_vpe (void) +{ + vl_client_disconnect_from_vlib (); + + return 0; +} + +void +signal_handler (int signo) +{ + test_main_t *tm = &test_main; + + tm->signal_received = 1; +} + + +int +main (int argc, char **argv) +{ + test_main_t *tm = &test_main; + api_main_t *am = &api_main; + u32 swt_pid = 0; + int connected = 0; + + signal (SIGINT, signal_handler); + + while (1) + { + if (tm->signal_received) + break; + + if (am->shmem_hdr) + swt_pid = am->shmem_hdr->vl_pid; + + /* If kill returns 0, the vpe-f process is alive */ + if (kill (swt_pid, 0) == 0) + { + /* Try to connect */ + if (connected == 0) + { + fformat (stdout, "Connect to VPE-f\n"); + if (connect_to_vpe ("test_ha_client") >= 0) + { + tm->pings_sent = 0; + tm->pings_replied = 0; + connected = 1; + } + else + { + fformat (stdout, "Connect failed, sleep and retry...\n"); + sleep (1); + continue; + } + } + tm->pings_sent++; + ping (tm); + + sleep (1); + + /* havent heard back in 3 seconds, disco / reco */ + if ((tm->pings_replied + 3) <= tm->pings_sent) + { + fformat (stdout, "VPE-f pid %d not responding\n", swt_pid); + swt_pid = 0; + disconnect_from_vpe (); + connected = 0; + } + } + else + { + if (connected) + { + fformat (stdout, "VPE-f pid %d died\n", swt_pid); + swt_pid = 0; + disconnect_from_vpe (); + connected = 0; + } + sleep (1); + } + } + + fformat (stdout, "Signal received, graceful exit\n"); + disconnect_from_vpe (); + exit (0); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/vat.h b/src/vpp/api/vat.h new file mode 120000 index 00000000..3adbdbae --- /dev/null +++ b/src/vpp/api/vat.h @@ -0,0 +1 @@ +../../vat/vat.h \ No newline at end of file diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api new file mode 100644 index 00000000..abd0e8f1 --- /dev/null +++ b/src/vpp/api/vpe.api @@ -0,0 +1,2782 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \file + + This file defines vpe control-plane API messages which are generally + called through a shared memory interface. +*/ + +/* + * Note: API placement cleanup in progress + * If you're looking for interface APIs, please + * see .../vnet/vnet/{interface.api,interface_api.c} + * IP APIs: see .../vnet/vnet/ip/{ip.api, ip_api.c} + * TAP APIs: see .../vnet/vnet/unix/{tap.api, tap_api.c} + * VXLAN APIs: see .../vnet/vnet/vxlan/{vxlan.api, vxlan_api.c} + * AF-PACKET APIs: ... see /vnet/devices/af_packet/{af_packet.api, af_packet_api.c} + * NETMAP APIs: see ... /vnet/vnet/devices/netmap/{netmap.api, netmap_api.c} + * VHOST-USER APIs: see .../vnet/devices/virtio/{vhost_user.api, vhost_user_api.c} + * VXLAN GPE APIs: see .../vnet/vnet/vxlan-gpe/{vxlan_gpe.api, vxlan_gpe_api.c} + * GRE APIs: see .../vnet/vnet/gre/{gre.api, gre_api.c} + * L2TP APIs: see .../vnet/vnet/l2tp/{l2tp.api, l2tp_api.c} + * BFD APIs: see .../vnet/vnet/bfd/{bfd.api, bfd_api.c} + * IPSEC APIs: see .../vnet/vnet/ipsec/{ipsec.api, ipsec_api.c} + * IPSEC-GRE APIs: see .../vnet/vnet/ipsec-gre/{ipsec_gre.api, ipsec_gre_api.c} + * LISP APIs: see .../vnet/vnet/lisp/{lisp.api, lisp_api.c} + * LISP-GPE APIs: see .../vnet/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c} + */ + +/** \brief Create a new subinterface with the given vlan id + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - software index of the new vlan's parent interface + @param vlan_id - vlan tag of the new interface +*/ +define create_vlan_subif +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 vlan_id; +}; + +/** \brief Reply for the vlan subinterface create request + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param sw_if_index - software index allocated for the new subinterface +*/ +define create_vlan_subif_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Enable or Disable MPLS on and interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - index of the interface + @param enable - if non-zero enable, else disable +*/ +define sw_interface_set_mpls_enable +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 enable; +}; + +/** \brief Reply for MPLS state on an interface + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define sw_interface_set_mpls_enable_reply +{ + u32 context; + i32 retval; +}; + +/** \brief MPLS Route Add / del route + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mr_label - The MPLS label value + @param mr_eos - The End of stack bit + @param mr_table_id - The MPLS table-id the route is added in + @param mr_classify_table_index - If this is a classify route, + this is the classify table index + @param mr_create_table_if_needed - If the MPLS or IP tables do not exist, + create them + @param mr_is_add - Is this a route add or delete + @param mr_is_classify - Is this route result a classify + @param mr_is_multipath - Is this route update a multipath - i.e. is this + a path addition to an existing route + @param mr_is_resolve_host - Recurse resolution constraint via a host prefix + @param mr_is_resolve_attached - Recurse resolution constraint via attached prefix + @param mr_next_hop_proto_is_ip4 - The next-hop is IPV4 + @param mr_next_hop_weight - The weight, for UCMP + @param mr_next_hop[16] - the nextop address + @param mr_next_hop_sw_if_index - the next-hop SW interface + @param mr_next_hop_table_id - the next-hop table-id (if appropriate) + @param mr_next_hop_n_out_labels - the number of labels in the label stack + @param mr_next_hop_out_label_stack - the next-hop output label stack, outer most first + @param next_hop_via_label - The next-hop is a resolved via a local label +*/ +define mpls_route_add_del +{ + u32 client_index; + u32 context; + u32 mr_label; + u8 mr_eos; + u32 mr_table_id; + u32 mr_classify_table_index; + u8 mr_create_table_if_needed; + u8 mr_is_add; + u8 mr_is_classify; + u8 mr_is_multipath; + u8 mr_is_resolve_host; + u8 mr_is_resolve_attached; + u8 mr_next_hop_proto_is_ip4; + u8 mr_next_hop_weight; + u8 mr_next_hop[16]; + u8 mr_next_hop_n_out_labels; + u32 mr_next_hop_sw_if_index; + u32 mr_next_hop_table_id; + u32 mr_next_hop_via_label; + u32 mr_next_hop_out_label_stack[mr_next_hop_n_out_labels]; +}; + +/** \brief Reply for MPLS route add / del request + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define mpls_route_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Dump MPLS fib table + @param client_index - opaque cookie to identify the sender +*/ +define mpls_fib_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief FIB path + @param sw_if_index - index of the interface + @param weight - The weight, for UCMP + @param is_local - local if non-zero, else remote + @param is_drop - Drop the packet + @param is_unreach - Drop the packet and rate limit send ICMP unreachable + @param is_prohibit - Drop the packet and rate limit send ICMP prohibited + @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2 + @param next_hop[16] - the next hop address + + WARNING: this type is replicated, pending cleanup completion + +*/ +typeonly manual_print manual_endian define fib_path2 +{ + u32 sw_if_index; + u32 weight; + u8 is_local; + u8 is_drop; + u8 is_unreach; + u8 is_prohibit; + u8 afi; + u8 next_hop[16]; +}; + +/** \brief mpls FIB table response + @param table_id - MPLS fib table id + @param s_bit - End-of-stack bit + @param label - MPLS label value + @param count - the number of fib_path in path + @param path - array of of fib_path structures +*/ +manual_endian manual_print define mpls_fib_details +{ + u32 context; + u32 table_id; + u8 eos_bit; + u32 label; + u32 count; + vl_api_fib_path2_t path[count]; +}; + +/** \brief Bind/Unbind an MPLS local label to an IP prefix. i.e. create + a per-prefix label entry. + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mb_mpls_table_id - The MPLS table-id the MPLS entry will be added in + @param mb_label - The MPLS label value to bind + @param mb_ip_table_id - The IP table-id of the IP prefix to bind to. + @param mb_create_table_if_needed - Create either/both tables if required. + @param mb_is_bind - Bind or unbind + @param mb_is_ip4 - The prefix to bind to is IPv4 + @param mb_address_length - Length of IP prefix + @param mb_address[16] - IP prefix/ +*/ +define mpls_ip_bind_unbind +{ + u32 client_index; + u32 context; + u32 mb_mpls_table_id; + u32 mb_label; + u32 mb_ip_table_id; + u8 mb_create_table_if_needed; + u8 mb_is_bind; + u8 mb_is_ip4; + u8 mb_address_length; + u8 mb_address[16]; +}; + +/** \brief Reply for MPLS IP bind/unbind request + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define mpls_ip_bind_unbind_reply +{ + u32 context; + i32 retval; +}; + +/** \brief MPLS tunnel Add / del route + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mt_is_add - Is this a route add or delete + @param mt_sw_if_index - The SW interface index of the tunnel to delete + @param mt_next_hop_proto_is_ip4 - The next-hop is IPV4 + @param mt_next_hop_weight - The weight, for UCMP + @param mt_next_hop[16] - the nextop address + @param mt_next_hop_sw_if_index - the next-hop SW interface + @param mt_next_hop_table_id - the next-hop table-id (if appropriate) + @param mt_next_hop_n_out_labels - the number of next-hop output labels + @param mt_next_hop_out_label_stack - the next-hop output label stack, outer most first +*/ +define mpls_tunnel_add_del +{ + u32 client_index; + u32 context; + u32 mt_sw_if_index; + u8 mt_is_add; + u8 mt_l2_only; + u8 mt_next_hop_proto_is_ip4; + u8 mt_next_hop_weight; + u8 mt_next_hop[16]; + u8 mt_next_hop_n_out_labels; + u32 mt_next_hop_sw_if_index; + u32 mt_next_hop_table_id; + u32 mt_next_hop_out_label_stack[mt_next_hop_n_out_labels]; +}; + +/** \brief Reply for MPLS tunnel add / del request + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param sw_if_index - SW interface index of the tunnel created +*/ +define mpls_tunnel_add_del_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Dump mpls eth tunnel table + @param client_index - opaque cookie to identify the sender + @param tunnel_index - eth tunnel identifier or -1 in case of all tunnels +*/ +define mpls_tunnel_dump +{ + u32 client_index; + u32 context; + i32 tunnel_index; +}; + +/** \brief mpls eth tunnel operational state response + @param tunnel_index - eth tunnel identifier + @param intfc_address - interface ipv4 addr + @param mask_width - interface ipv4 addr mask + @param hw_if_index - interface id + @param l2_only - + @param tunnel_dst_mac - + @param tx_sw_if_index - + @param encap_index - reference to mpls label table + @param nlabels - number of resolved labels + @param labels - resolved labels +*/ +define mpls_tunnel_details +{ + u32 context; + u32 tunnel_index; + u8 mt_l2_only; + u8 mt_sw_if_index; + u8 mt_next_hop_proto_is_ip4; + u8 mt_next_hop[16]; + u32 mt_next_hop_sw_if_index; + u32 mt_next_hop_table_id; + u32 mt_next_hop_n_labels; + u32 mt_next_hop_out_labels[mt_next_hop_n_labels]; +}; + +/** \brief Proxy ARP add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf_id - VRF / Fib table ID + @param is_add - 1 if adding the Proxy ARP range, 0 if deleting + @param low_address[4] - Low address of the Proxy ARP range + @param hi_address[4] - High address of the Proxy ARP range +*/ +define proxy_arp_add_del +{ + u32 client_index; + u32 context; + u32 vrf_id; + u8 is_add; + u8 low_address[4]; + u8 hi_address[4]; +}; + +/** \brief Reply for proxy arp add / del request + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define proxy_arp_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Proxy ARP add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - Which interface to enable / disable Proxy Arp on + @param enable_disable - 1 to enable Proxy ARP on interface, 0 to disable +*/ +define proxy_arp_intfc_enable_disable +{ + u32 client_index; + u32 context; + u32 sw_if_index; + /* 1 = on, 0 = off */ + u8 enable_disable; +}; + +/** \brief Reply for Proxy ARP interface enable / disable request + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define proxy_arp_intfc_enable_disable_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Reset VRF (remove all routes etc) request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4 + @param vrf_id - ID of th FIB table / VRF to reset +*/ +define reset_vrf +{ + u32 client_index; + u32 context; + u8 is_ipv6; + u32 vrf_id; +}; + +/** \brief Reply for Reset VRF request + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define reset_vrf_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Is Address Reachable request - DISABLED + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param next_hop_sw_if_index - index of interface used to get to next hop + @param is_ipv6 - 1 for IPv6, 0 for IPv4 + @param is_error - address not found or does not match intf + @param address[] - Address in question +*/ +define is_address_reachable +{ + u32 client_index; /* (api_main_t *) am->my_client_index */ + u32 context; + u32 next_hop_sw_if_index; + u8 is_known; /* on reply, this is the answer */ + u8 is_ipv6; + u8 is_error; /* address not found or does not match intf */ + u8 address[16]; +}; + +/** \brief Want Stats, register for stats updates + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 = enable stats, 0 = disable + @param pid - pid of process requesting stats updates +*/ +define want_stats +{ + u32 client_index; + u32 context; + u32 enable_disable; + u32 pid; +}; + +/** \brief Reply for Want Stats request + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define want_stats_reply +{ + u32 context; + i32 retval; +}; + +typeonly manual_print manual_endian define ip4_fib_counter +{ + u32 address; + u8 address_length; + u64 packets; + u64 bytes; +}; + +manual_print manual_endian define vnet_ip4_fib_counters +{ + u32 vrf_id; + u32 count; + vl_api_ip4_fib_counter_t c[count]; +}; + +typeonly manual_print manual_endian define ip6_fib_counter +{ + u64 address[2]; + u8 address_length; + u64 packets; + u64 bytes; +}; + +manual_print manual_endian define vnet_ip6_fib_counters +{ + u32 vrf_id; + u32 count; + vl_api_ip6_fib_counter_t c[count]; +}; + +/** \brief Request for a single block of summary stats + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define vnet_get_summary_stats +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply for vnet_get_summary_stats request + @param context - sender context, to match reply w/ request + @param retval - return code for request + @param total_pkts - + @param total_bytes - + @param vector_rate - +*/ +define vnet_summary_stats_reply +{ + u32 context; + i32 retval; + u64 total_pkts[2]; + u64 total_bytes[2]; + f64 vector_rate; +}; + +/** \brief OAM event structure + @param dst_address[] - + @param state +*/ +define oam_event +{ + u8 dst_address[4]; + u8 state; +}; + +/** \brief Want OAM events request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable- enable if non-zero, else disable + @param pid - pid of the requesting process +*/ +define want_oam_events +{ + u32 client_index; + u32 context; + u32 enable_disable; + u32 pid; +}; + +/** \brief Want OAM events response + @param context - sender context, to match reply w/ request + @param retval - return code for the want oam stats request +*/ +define want_oam_events_reply +{ + u32 context; + i32 retval; +}; + +/** \brief OAM add / del target request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf_id - vrf_id of the target + @param src_address[] - source address to use for the updates + @param dst_address[] - destination address of the target + @param is_add - add target if non-zero, else delete +*/ +define oam_add_del +{ + u32 client_index; + u32 context; + u32 vrf_id; + u8 src_address[4]; + u8 dst_address[4]; + u8 is_add; +}; + +/** \brief OAM add / del target response + @param context - sender context, to match reply w/ request + @param retval - return code of the request +*/ +define oam_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Reset fib table request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf_id - vrf/table id of the fib table to reset + @param is_ipv6 - an ipv6 fib to reset if non-zero, else ipv4 +*/ +define reset_fib +{ + u32 client_index; + u32 context; + u32 vrf_id; + u8 is_ipv6; +}; + +/** \brief Reset fib response + @param context - sender context, to match reply w/ request + @param retval - return code for the reset bfib request +*/ +define reset_fib_reply +{ + u32 context; + i32 retval; +}; + +/** \brief DHCP Proxy config add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf_id - vrf id + @param if_ipv6 - ipv6 if non-zero, else ipv4 + @param is_add - add the config if non-zero, else delete + @param insert_circuit_id - option82 suboption 1 fib number + @param dhcp_server[] - server address + @param dhcp_src_address[] - +*/ +define dhcp_proxy_config +{ + u32 client_index; + u32 context; + u32 vrf_id; + u8 is_ipv6; + u8 is_add; + u8 insert_circuit_id; + u8 dhcp_server[16]; + u8 dhcp_src_address[16]; +}; + +/** \brief DHCP Proxy config response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define dhcp_proxy_config_reply +{ + u32 context; + i32 retval; +}; + +/** \brief DHCP Proxy set / unset vss request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param tbl_id - table id + @param oui - first part of vpn id + @param fib_id - second part of vpn id + @param is_ipv6 - ip6 if non-zero, else ip4 + @param is_add - set vss if non-zero, else delete +*/ +define dhcp_proxy_set_vss +{ + u32 client_index; + u32 context; + u32 tbl_id; + u32 oui; + u32 fib_id; + u8 is_ipv6; + u8 is_add; +}; + +/** \brief DHCP proxy set / unset vss response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define dhcp_proxy_set_vss_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Create loopback interface request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mac_address - mac addr to assign to the interface if none-zero +*/ +define create_loopback +{ + u32 client_index; + u32 context; + u8 mac_address[6]; +}; + +/** \brief Create loopback interface response + @param context - sender context, to match reply w/ request + @param sw_if_index - sw index of the interface that was created + @param retval - return code for the request +*/ +define create_loopback_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Delete loopback interface request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - sw index of the interface that was created +*/ +define delete_loopback +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Delete loopback interface response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define delete_loopback_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Control ping from client to api server request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define control_ping +{ + u32 client_index; + u32 context; +}; + +/** \brief Control ping from the client to the server response + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param vpe_pid - the pid of the vpe, returned by the server +*/ +define control_ping_reply +{ + u32 context; + i32 retval; + u32 client_index; + u32 vpe_pid; +}; + +/** \brief Process a vpe parser cli string request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param cmd_in_shmem - pointer to cli command string +*/ +define cli_request +{ + u32 client_index; + u32 context; + u64 cmd_in_shmem; +}; +define cli_inband +{ + u32 client_index; + u32 context; + u32 length; + u8 cmd[length]; +}; + +/** \brief vpe parser cli string response + @param context - sender context, to match reply w/ request + @param retval - return code for request + @param reply_in_shmem - Reply string from cli processing if any +*/ +define cli_reply +{ + u32 context; + i32 retval; + u64 reply_in_shmem; +}; +define cli_inband_reply +{ + u32 context; + i32 retval; + u32 length; + u8 reply[length]; +}; + +/** \brief Set max allowed ARP or ip6 neighbor entries request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_ipv6 - neighbor limit if non-zero, else ARP limit + @param arp_neighbor_limit - the new limit, defaults are ~ 50k +*/ +define set_arp_neighbor_limit +{ + u32 client_index; + u32 context; + u8 is_ipv6; + u32 arp_neighbor_limit; +}; + +/** \brief Set max allowed ARP or ip6 neighbor entries response + @param context - sender context, to match reply w/ request + @param retval - return code for request +*/ +define set_arp_neighbor_limit_reply +{ + u32 context; + i32 retval; +}; + +/** \brief L2 interface patch add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param rx_sw_if_index - receive side interface + @param tx_sw_if_index - transmit side interface + @param is_add - if non-zero set up the interface patch, else remove it +*/ +define l2_patch_add_del +{ + u32 client_index; + u32 context; + u32 rx_sw_if_index; + u32 tx_sw_if_index; + u8 is_add; +}; + +/** \brief L2 interface patch add / del response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define l2_patch_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IPv6 segment routing tunnel add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add the tunnel if non-zero, else delete it + @param name[] - tunnel name (len. 64) + @param src_address[] - + @param dst_address[] - + @param dst_mask_width - + @param inner_vrf_id - + @param outer_vrf_id - + @param flags_net_byte_order - + @param n_segments - + @param n_tags - + @param segs_and_tags[] - + @param policy_name[] - name of policy to associate this tunnel to (len. 64) +*/ +define sr_tunnel_add_del +{ + u32 client_index; + u32 context; + u8 is_add; + u8 name[64]; + u8 src_address[16]; + u8 dst_address[16]; + u8 dst_mask_width; + u32 inner_vrf_id; + u32 outer_vrf_id; + u16 flags_net_byte_order; + u8 n_segments; + u8 n_tags; + u8 policy_name[64]; + u8 segs_and_tags[0]; +}; + +/** \brief IPv6 segment routing tunnel add / del response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define sr_tunnel_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IPv6 segment routing policy add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add the tunnel if non-zero, else delete it + @param name[] - policy name (len. 64) + @param tunnel_names[] - +*/ +define sr_policy_add_del +{ + u32 client_index; + u32 context; + u8 is_add; + u8 name[64]; + u8 tunnel_names[0]; +}; + +/** \brief IPv6 segment routing policy add / del response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define sr_policy_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IPv6 segment routing multicast map to policy add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add the tunnel if non-zero, else delete it + @param multicast_address[] - IP6 multicast address + @param policy_name[] = policy name (len.64) +*/ +define sr_multicast_map_add_del +{ + u32 client_index; + u32 context; + u8 is_add; + u8 multicast_address[16]; + u8 policy_name[64]; +}; + +/** \brief IPv6 segment routing multicast map to policy add / del response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define sr_multicast_map_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Interface set vpath request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface used to reach neighbor + @param enable - if non-zero enable, else disable +*/ +define sw_interface_set_vpath +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 enable; +}; + +/** \brief Interface set vpath response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define sw_interface_set_vpath_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Set L2 XConnect between two interfaces request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param rx_sw_if_index - Receive interface index + @param tx_sw_if_index - Transmit interface index + @param enable - enable xconnect if not 0, else set to L3 mode +*/ +define sw_interface_set_l2_xconnect +{ + u32 client_index; + u32 context; + u32 rx_sw_if_index; + u32 tx_sw_if_index; + u8 enable; +}; + +/** \brief Set L2 XConnect response + @param context - sender context, to match reply w/ request + @param retval - L2 XConnect request return code +*/ +define sw_interface_set_l2_xconnect_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Interface bridge mode request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param rx_sw_if_index - the interface + @param bd_id - bridge domain id + @param bvi - Setup interface as a bvi, bridge mode only + @param shg - Shared horizon group, for bridge mode only + @param enable - Enable beige mode if not 0, else set to L3 mode +*/ +define sw_interface_set_l2_bridge +{ + u32 client_index; + u32 context; + u32 rx_sw_if_index; + u32 bd_id; + u8 shg; + u8 bvi; + u8 enable; +}; + +/** \brief Interface bridge mode response + @param context - sender context, to match reply w/ request + @param retval - Bridge mode request return code +*/ +define sw_interface_set_l2_bridge_reply +{ + u32 context; + i32 retval; +}; + +/** \brief L2 FIB add entry request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mac - the entry's mac address + @param bd_id - the entry's bridge domain id + @param sw_if_index - the interface + @param is_add - If non zero add the entry, else delete it + @param static_mac - + @param filter_mac - +*/ +define l2fib_add_del +{ + u32 client_index; + u32 context; + u64 mac; + u32 bd_id; + u32 sw_if_index; + u8 is_add; + u8 static_mac; + u8 filter_mac; + u8 bvi_mac; +}; + +/** \brief L2 FIB add entry response + @param context - sender context, to match reply w/ request + @param retval - return code for the add l2fib entry request +*/ +define l2fib_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Set L2 flags request !!! TODO - need more info, feature bits in l2_input.h + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface + @param is_set - if non-zero, set the bits, else clear them + @param feature_bitmap - non-zero bits to set or clear +*/ +define l2_flags +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 is_set; + u32 feature_bitmap; +}; + +/** \brief Set L2 bits response + @param context - sender context, to match reply w/ request + @param retval - return code for the set l2 bits request +*/ +define l2_flags_reply +{ + u32 context; + i32 retval; + u32 resulting_feature_bitmap; +}; + +/** \brief Set bridge flags (such as L2_LEARN, L2_FWD, L2_FLOOD, + L2_UU_FLOOD, or L2_ARP_TERM) request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the bridge domain to set the flags for + @param is_set - if non-zero, set the flags, else clear them + @param feature_bitmap - bits that are non-zero to set or clear +*/ +define bridge_flags +{ + u32 client_index; + u32 context; + u32 bd_id; + u8 is_set; + u32 feature_bitmap; +}; + +/** \brief Set bridge flags response + @param context - sender context, to match reply w/ request + @param retval - return code for the set bridge flags request + @param resulting_feature_bitmap - the feature bitmap value after the request is implemented +*/ +define bridge_flags_reply +{ + u32 context; + i32 retval; + u32 resulting_feature_bitmap; +}; + +/** \brief Set bridge domain ip to mac entry request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the bridge domain to set the flags for + @param is_add - if non-zero, add the entry, else clear it + @param is_ipv6 - if non-zero, ipv6 address, else ipv4 address + @param mac_address - MAC address + @param +*/ +define bd_ip_mac_add_del +{ + u32 client_index; + u32 context; + u32 bd_id; + u8 is_add; + u8 is_ipv6; + u8 ip_address[16]; + u8 mac_address[6]; +}; + +/** \brief Set bridge domain ip to mac entry response + @param context - sender context, to match reply w/ request + @param retval - return code for the set bridge flags request +*/ +define bd_ip_mac_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Add/Delete classification table request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add- if non-zero add the table, else delete it + @param del_chain - if non-zero delete the whole chain of tables + @param table_index - if add, reuturns index of the created table, else specifies the table to delete + @param nbuckets - number of buckets when adding a table + @param memory_size - memory size when adding a table + @param match_n_vectors - number of match vectors + @param next_table_index - index of next table + @param miss_next_index - index of miss table + @param current_data_flag - option to use current node's packet payload + as the starting point from where packets are classified, + This option is only valid for L2/L3 input ACL for now. + 0: by default, classify data from the buffer's start location + 1: classify packets from VPP node’s current data pointer + @param current_data_offset - a signed value to shift the start location of + the packet to be classified + For example, if input IP ACL node is used, L2 header’s first byte + can be accessible by configuring current_data_offset to -14 + if there is no vlan tag. + This is valid only if current_data_flag is set to 1. + @param mask[] - match mask +*/ +define classify_add_del_table +{ + u32 client_index; + u32 context; + u8 is_add; + u8 del_chain; + u32 table_index; + u32 nbuckets; + u32 memory_size; + u32 skip_n_vectors; + u32 match_n_vectors; + u32 next_table_index; + u32 miss_next_index; + u32 current_data_flag; + i32 current_data_offset; + u8 mask[0]; +}; + +/** \brief Add/Delete classification table response + @param context - sender context, to match reply w/ request + @param retval - return code for the table add/del requst + @param new_table_index - for add, returned index of the new table + @param skip_n_vectors - for add, returned value of skip_n_vectors in table + @param match_n_vectors -for add, returned value of match_n_vectors in table +*/ +define classify_add_del_table_reply +{ + u32 context; + i32 retval; + u32 new_table_index; + u32 skip_n_vectors; + u32 match_n_vectors; +}; + +/** \brief Classify add / del session request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add session if non-zero, else delete + @param table_index - index of the table to add/del the session, required + @param hit_next_index - for add, hit_next_index of new session, required + @param opaque_index - for add, opaque_index of new session + @param advance -for add, advance value for session + @param action - + 0: no action (by default) + metadata is not used. + 1: Classified IP packets will be looked up from the + specified ipv4 fib table (configured by metadata as VRF id). + Only valid for L3 input ACL node + 2: Classified IP packets will be looked up from the + specified ipv6 fib table (configured by metadata as VRF id). + Only valid for L3 input ACL node + @param metadata - valid only if action != 0 + VRF id if action is 1 or 2. + @param match[] - for add, match value for session, required +*/ +define classify_add_del_session +{ + u32 client_index; + u32 context; + u8 is_add; + u32 table_index; + u32 hit_next_index; + u32 opaque_index; + i32 advance; + u8 action; + u32 metadata; + u8 match[0]; +}; + +/** \brief Classify add / del session response + @param context - sender context, to match reply w/ request + @param retval - return code for the add/del session request +*/ +define classify_add_del_session_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Set/unset the classification table for an interface request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_ipv6 - ipv6 if non-zero, else ipv4 + @param sw_if_index - interface to associate with the table + @param table_index - index of the table, if ~0 unset the table +*/ +define classify_set_interface_ip_table +{ + u32 client_index; + u32 context; + u8 is_ipv6; + u32 sw_if_index; + u32 table_index; /* ~0 => off */ +}; + +/** \brief Set/unset interface classification table response + @param context - sender context, to match reply w/ request + @param retval - return code +*/ +define classify_set_interface_ip_table_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Set/unset l2 classification tables for an interface request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface to set/unset tables for + @param ip4_table_index - ip4 index, use ~0 for all 3 indexes to unset + @param ip6_table_index - ip6 index + @param other_table_index - other index +*/ +define classify_set_interface_l2_tables +{ + u32 client_index; + u32 context; + u32 sw_if_index; + /* 3 x ~0 => off */ + u32 ip4_table_index; + u32 ip6_table_index; + u32 other_table_index; + u8 is_input; +}; + +/** \brief Set/unset l2 classification tables for an interface response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define classify_set_interface_l2_tables_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Get node index using name request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param node_name[] - name of the node +*/ +define get_node_index +{ + u32 client_index; + u32 context; + u8 node_name[64]; +}; + +/** \brief Get node index using name request + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param node_index - index of the desired node if found, else ~0 +*/ +define get_node_index_reply +{ + u32 context; + i32 retval; + u32 node_index; +}; + +/** \brief Set the next node for a given node request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param node_name[] - node to add the next node to + @param next_name[] - node to add as the next node +*/ +define add_node_next +{ + u32 client_index; + u32 context; + u8 node_name[64]; + u8 next_name[64]; +}; + +/** \brief IP Set the next node for a given node response + @param context - sender context, to match reply w/ request + @param retval - return code for the add next node request + @param next_index - the index of the next node if success, else ~0 +*/ +define add_node_next_reply +{ + u32 context; + i32 retval; + u32 next_index; +}; + +/** \brief DHCP Proxy config 2 add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param rx_vrf_id - receive vrf id + @param server_vrf_id - server vrf id + @param if_ipv6 - ipv6 if non-zero, else ipv4 + @param is_add - add the config if non-zero, else delete + @param insert_circuit_id - option82 suboption 1 fib number + @param dhcp_server[] - server address + @param dhcp_src_address[] - +*/ +define dhcp_proxy_config_2 +{ + u32 client_index; + u32 context; + u32 rx_vrf_id; + u32 server_vrf_id; + u8 is_ipv6; + u8 is_add; + u8 insert_circuit_id; + u8 dhcp_server[16]; + u8 dhcp_src_address[16]; +}; + +/** \brief DHCP Proxy config 2 add / del response + @param context - sender context, to match reply w/ request + @param retval - return code for request +*/ +define dhcp_proxy_config_2_reply +{ + u32 context; + i32 retval; +}; + +/** \brief L2 fib clear table request, clear all mac entries in the l2 fib + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define l2_fib_clear_table +{ + u32 client_index; + u32 context; +}; + +/** \brief L2 fib clear table response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define l2_fib_clear_table_reply +{ + u32 context; + i32 retval; +}; + +/** \brief L2 interface ethernet flow point filtering enable/disable request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface to enable/disable filtering on + @param enable_disable - if non-zero enable filtering, else disable +*/ +define l2_interface_efp_filter +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 enable_disable; +}; + +/** \brief L2 interface ethernet flow point filtering response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define l2_interface_efp_filter_reply +{ + u32 context; + i32 retval; +}; + +/** \brief L2 interface vlan tag rewrite configure request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface the operation is applied to + @param vtr_op - Choose from l2_vtr_op_t enum values + @param push_dot1q - first pushed flag dot1q id set, else dot1ad + @param tag1 - Needed for any push or translate vtr op + @param tag2 - Needed for any push 2 or translate x-2 vtr ops +*/ +define l2_interface_vlan_tag_rewrite +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 vtr_op; + u32 push_dot1q; // ethertype of first pushed tag is dot1q/dot1ad + u32 tag1; // first pushed tag + u32 tag2; // second pushed tag +}; + +/** \brief L2 interface vlan tag rewrite response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define l2_interface_vlan_tag_rewrite_reply +{ + u32 context; + i32 retval; +}; + +define create_subif +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 sub_id; + + /* These fields map directly onto the subif template */ + u8 no_tags; + u8 one_tag; + u8 two_tags; + u8 dot1ad; // 0 = dot1q, 1=dot1ad + u8 exact_match; + u8 default_sub; + u8 outer_vlan_id_any; + u8 inner_vlan_id_any; + u16 outer_vlan_id; + u16 inner_vlan_id; +}; + +define create_subif_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief show version + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define show_version +{ + u32 client_index; + u32 context; +}; + +/** \brief show version response + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param program - name of the program (vpe) + @param version - version of the program + @param build_directory - root of the workspace where the program was built +*/ +define show_version_reply +{ + u32 context; + i32 retval; + u8 program[32]; + u8 version[32]; + u8 build_date[32]; + u8 build_directory[256]; +}; + +/** \brief l2 fib table entry structure + @param bd_id - the l2 fib / bridge domain table id + @param mac - the entry's mac address + @param sw_if_index - index of the interface + @param static_mac - the entry is statically configured. + @param filter_mac - the entry is a mac filter entry. + @param bvi_mac - the mac address is a bridge virtual interface +*/ +define l2_fib_table_entry +{ + u32 context; + u32 bd_id; + u64 mac; + u32 sw_if_index; + u8 static_mac; + u8 filter_mac; + u8 bvi_mac; +}; + +/** \brief Dump l2 fib (aka bridge domain) table + @param client_index - opaque cookie to identify the sender + @param bd_id - the l2 fib / bridge domain table identifier +*/ +define l2_fib_table_dump +{ + u32 client_index; + u32 context; + u32 bd_id; +}; + +/* Gross kludge, DGMS */ +define interface_name_renumber +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 new_show_dev_instance; +}; + +define interface_name_renumber_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Register for ip4 arp resolution events + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 => register for events, 0 => cancel registration + @param pid - sender's pid + @param address - the exact ip4 address of interest +*/ +define want_ip4_arp_events +{ + u32 client_index; + u32 context; + u8 enable_disable; + u32 pid; + u32 address; +}; + +/** \brief Reply for interface events registration + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define want_ip4_arp_events_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Tell client about an ip4 arp resolution event + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param address - the exact ip4 address of interest + @param pid - client pid registered to receive notification + @param sw_if_index - interface which received ARP packet + @param new_mac - the new mac address + @param mac_ip - 0: resolution event, 1: mac/ip binding in bd +*/ +define ip4_arp_event +{ + u32 client_index; + u32 context; + u32 address; + u32 pid; + u32 sw_if_index; + u8 new_mac[6]; + u8 mac_ip; +}; + +/** \brief Register for ip6 nd resolution events + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 => register for events, 0 => cancel registration + @param pid - sender's pid + @param address - the exact ip6 address of interest +*/ +define want_ip6_nd_events +{ + u32 client_index; + u32 context; + u8 enable_disable; + u32 pid; + u8 address[16]; +}; + +/** \brief Reply for ip6 nd resolution events registration + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define want_ip6_nd_events_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Tell client about an ip6 nd resolution or mac/ip event + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param pid - client pid registered to receive notification + @param sw_if_index - interface which received ARP packet + @param address - the exact ip6 address of interest + @param new_mac - the new mac address + @param mac_ip - 0: resolution event, 1: mac/ip binding in bd +*/ +define ip6_nd_event +{ + u32 client_index; + u32 context; + u32 pid; + u32 sw_if_index; + u8 address[16]; + u8 new_mac[6]; + u8 mac_ip; +}; + +/** \brief L2 bridge domain add or delete request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the bridge domain to create + @param flood - enable/disable bcast/mcast flooding in the bd + @param uu_flood - enable/disable uknown unicast flood in the bd + @param forward - enable/disable forwarding on all interfaces in the bd + @param learn - enable/disable learning on all interfaces in the bd + @param arp_term - enable/disable arp termination in the bd + @param mac_age - mac aging time in min, 0 for disabled + @param is_add - add or delete flag +*/ +define bridge_domain_add_del +{ + u32 client_index; + u32 context; + u32 bd_id; + u8 flood; + u8 uu_flood; + u8 forward; + u8 learn; + u8 arp_term; + u8 mac_age; + u8 is_add; +}; + +/** \brief L2 bridge domain add or delete response + @param context - sender context, to match reply w/ request + @param retval - return code for the set bridge flags request +*/ +define bridge_domain_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief L2 bridge domain request operational state details + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the bridge domain id desired or ~0 to request all bds +*/ +define bridge_domain_dump +{ + u32 client_index; + u32 context; + u32 bd_id; +}; + +/** \brief L2 bridge domain operational state response + @param bd_id - the bridge domain id + @param flood - bcast/mcast flooding state on all interfaces in the bd + @param uu_flood - uknown unicast flooding state on all interfaces in the bd + @param forward - forwarding state on all interfaces in the bd + @param learn - learning state on all interfaces in the bd + @param arp_term - arp termination state on all interfaces in the bd + @param mac_age - mac aging time in min, 0 for disabled + @param n_sw_ifs - number of sw_if_index's in the domain +*/ +define bridge_domain_details +{ + u32 context; + u32 bd_id; + u8 flood; + u8 uu_flood; + u8 forward; + u8 learn; + u8 arp_term; + u8 mac_age; + u32 bvi_sw_if_index; + u32 n_sw_ifs; +}; + +/** \brief L2 bridge domain sw interface operational state response + @param bd_id - the bridge domain id + @param sw_if_index - sw_if_index in the domain + @param shg - split horizon group for the interface +*/ +define bridge_domain_sw_if_details +{ + u32 context; + u32 bd_id; + u32 sw_if_index; + u8 shg; +}; + +/** \brief DHCP Client config add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - index of the interface for DHCP client + @param hostname - hostname + @param is_add - add the config if non-zero, else delete + @param want_dhcp_event - DHCP event sent to the sender + via dhcp_compl_event API message if non-zero + @param pid - sender's pid +*/ +define dhcp_client_config +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 hostname[64]; + u8 is_add; + u8 want_dhcp_event; + u32 pid; +}; + +/** \brief DHCP Client config response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define dhcp_client_config_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Set/unset input ACL interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface to set/unset input ACL + @param ip4_table_index - ip4 classify table index (~0 for skip) + @param ip6_table_index - ip6 classify table index (~0 for skip) + @param l2_table_index - l2 classify table index (~0 for skip) + @param is_add - Set input ACL if non-zero, else unset + Note: User is recommeneded to use just one valid table_index per call. + (ip4_table_index, ip6_table_index, or l2_table_index) +*/ +define input_acl_set_interface +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 ip4_table_index; + u32 ip6_table_index; + u32 l2_table_index; + u8 is_add; +}; + +/** \brief Set/unset input ACL interface response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define input_acl_set_interface_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Tell client about a DHCP completion event + @param client_index - opaque cookie to identify the sender + @param pid - client pid registered to receive notification + @param is_ipv6 - if non-zero the address is ipv6, else ipv4 + @param host_address - Host IP address + @param router_address - Router IP address + @param host_mac - Host MAC address +*/ +define dhcp_compl_event +{ + u32 client_index; + u32 pid; + u8 hostname[64]; + u8 is_ipv6; + u8 host_address[16]; + u8 router_address[16]; + u8 host_mac[6]; +}; + +/** \brief cop: enable/disable junk filtration features on an interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_inded - desired interface + @param enable_disable - 1 => enable, 0 => disable +*/ + +define cop_interface_enable_disable +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 enable_disable; +}; + +/** \brief cop: interface enable/disable junk filtration reply + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ + +define cop_interface_enable_disable_reply +{ + u32 context; + i32 retval; +}; + +/** \brief cop: enable/disable whitelist filtration features on an interface + Note: the supplied fib_id must match in order to remove the feature! + + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface handle, physical interfaces only + @param fib_id - fib identifier for the whitelist / blacklist fib + @param ip4 - 1 => enable ip4 filtration, 0=> disable ip4 filtration + @param ip6 - 1 => enable ip6 filtration, 0=> disable ip6 filtration + @param default_cop - 1 => enable non-ip4, non-ip6 filtration 0=> disable it +*/ + +define cop_whitelist_enable_disable +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 fib_id; + u8 ip4; + u8 ip6; + u8 default_cop; +}; + +/** \brief cop: interface enable/disable junk filtration reply + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ + +define cop_whitelist_enable_disable_reply +{ + u32 context; + i32 retval; +}; + +/** \brief get_node_graph - get a copy of the vpp node graph + including the current set of graph arcs. + + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ + +define get_node_graph +{ + u32 client_index; + u32 context; +}; + +/** \brief get_node_graph_reply + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param reply_in_shmem - result from vlib_node_serialize, in shared + memory. Process with vlib_node_unserialize, remember to switch + heaps and free the result. +*/ + +define get_node_graph_reply +{ + u32 context; + i32 retval; + u64 reply_in_shmem; +}; + +/** \brief IOAM enable : Enable in-band OAM + @param id - profile id + @param seqno - To enable Seqno Processing + @param analyse - Enabling analysis of iOAM at decap node + @param pow_enable - Proof of Work enabled or not flag + @param trace_enable - iOAM Trace enabled or not flag +*/ +define ioam_enable +{ + u32 client_index; + u32 context; + u16 id; + u8 seqno; + u8 analyse; + u8 pot_enable; + u8 trace_enable; + u32 node_id; +}; + +/** \brief iOAM Trace profile add / del response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define ioam_enable_reply +{ + u32 context; + i32 retval; +}; + +/** \brief iOAM disable + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param index - MAP Domain index +*/ +define ioam_disable +{ + u32 client_index; + u32 context; + u16 id; +}; + +/** \brief iOAM disable response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define ioam_disable_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Add/del policer + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add policer if non-zero, else delete + @param name - policer name + @param cir - CIR + @param eir - EIR + @param cb - Committed Burst + @param eb - Excess or Peak Burst + @param rate_type - rate type + @param round_type - rounding type + @param type - policer algorithm + @param color_aware - 0=color-blind, 1=color-aware + @param conform_action_type - conform action type + @param conform_dscp - DSCP for conform mar-and-transmit action + @param exceed_action_type - exceed action type + @param exceed_dscp - DSCP for exceed mar-and-transmit action + @param violate_action_type - violate action type + @param violate_dscp - DSCP for violate mar-and-transmit action +*/ +define policer_add_del +{ + u32 client_index; + u32 context; + + u8 is_add; + u8 name[64]; + u32 cir; + u32 eir; + u64 cb; + u64 eb; + u8 rate_type; + u8 round_type; + u8 type; + u8 color_aware; + u8 conform_action_type; + u8 conform_dscp; + u8 exceed_action_type; + u8 exceed_dscp; + u8 violate_action_type; + u8 violate_dscp; +}; + +/** \brief Add/del policer response + @param context - sender context, to match reply w/ request + @param retval - return value for request + @param policer_index - for add, returned index of the new policer +*/ +define policer_add_del_reply +{ + u32 context; + i32 retval; + u32 policer_index; +}; + +/** \brief Get list of policers + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param match_name_valid - if 0 request all policers otherwise use match_name + @param match_name - policer name +*/ +define policer_dump +{ + u32 client_index; + u32 context; + + u8 match_name_valid; + u8 match_name[64]; +}; + +/** \brief Policer operational state response. + @param context - sender context, to match reply w/ request + @param name - policer name + @param cir - CIR + @param eir - EIR + @param cb - Committed Burst + @param eb - Excess or Peak Burst + @param rate_type - rate type + @param round_type - rounding type + @param type - policer algorithm + @param conform_action_type - conform action type + @param conform_dscp - DSCP for conform mar-and-transmit action + @param exceed_action_type - exceed action type + @param exceed_dscp - DSCP for exceed mar-and-transmit action + @param violate_action_type - violate action type + @param violate_dscp - DSCP for violate mar-and-transmit action + @param single_rate - 1 = single rate policer, 0 = two rate policer + @param color_aware - for hierarchical policing + @param scale - power-of-2 shift amount for lower rates + @param cir_tokens_per_period - number of tokens for each period + @param pir_tokens_per_period - number of tokens for each period for 2-rate policer + @param current_limit - current limit + @param current_bucket - current bucket + @param extended_limit - extended limit + @param extended_bucket - extended bucket + @param last_update_time - last update time +*/ +define policer_details +{ + u32 context; + + u8 name[64]; + u32 cir; + u32 eir; + u64 cb; + u64 eb; + u8 rate_type; + u8 round_type; + u8 type; + u8 conform_action_type; + u8 conform_dscp; + u8 exceed_action_type; + u8 exceed_dscp; + u8 violate_action_type; + u8 violate_dscp; + u8 single_rate; + u8 color_aware; + u32 scale; + u32 cir_tokens_per_period; + u32 pir_tokens_per_period; + u32 current_limit; + u32 current_bucket; + u32 extended_limit; + u32 extended_bucket; + u64 last_update_time; +}; + +/** \brief Set/unset policer classify interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface to set/unset policer classify + @param ip4_table_index - ip4 classify table index (~0 for skip) + @param ip6_table_index - ip6 classify table index (~0 for skip) + @param l2_table_index - l2 classify table index (~0 for skip) + @param is_add - Set if non-zero, else unset + Note: User is recommeneded to use just one valid table_index per call. + (ip4_table_index, ip6_table_index, or l2_table_index) +*/ +define policer_classify_set_interface +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 ip4_table_index; + u32 ip6_table_index; + u32 l2_table_index; + u8 is_add; +}; + +/** \brief Set/unset policer classify interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define policer_classify_set_interface_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Get list of policer classify interfaces and tables + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param type - classify table type +*/ +define policer_classify_dump +{ + u32 client_index; + u32 context; + u8 type; +}; + +/** \brief Policer iclassify operational state response. + @param context - sender context, to match reply w/ request + @param sw_if_index - software interface index + @param table_index - classify table index +*/ +define policer_classify_details +{ + u32 context; + u32 sw_if_index; + u32 table_index; +}; + +/** \brief Classify get table IDs request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define classify_table_ids +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply for classify get table IDs request + @param context - sender context which was passed in the request + @param count - number of ids returned in response + @param ids - array of classify table ids +*/ +define classify_table_ids_reply +{ + u32 context; + i32 retval; + u32 count; + u32 ids[count]; +}; + +/** \brief Classify table ids by interface index request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - index of the interface +*/ +define classify_table_by_interface +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Reply for classify table id by interface index request + @param context - sender context which was passed in the request + @param count - number of ids returned in response + @param sw_if_index - index of the interface + @param l2_table_id - l2 classify table index + @param ip4_table_id - ip4 classify table index + @param ip6_table_id - ip6 classify table index +*/ +define classify_table_by_interface_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; + u32 l2_table_id; + u32 ip4_table_id; + u32 ip6_table_id; +}; + +/** \brief Classify table info + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param table_id - classify table index +*/ +define classify_table_info +{ + u32 client_index; + u32 context; + u32 table_id; +}; + +/** \brief Reply for classify table info request + @param context - sender context which was passed in the request + @param count - number of ids returned in response + @param table_id - classify table index + @param nbuckets - number of buckets when adding a table + @param match_n_vectors - number of match vectors + @param skip_n_vectors - number of skip_n_vectors + @param active_sessions - number of sessions (active entries) + @param next_table_index - index of next table + @param miss_next_index - index of miss table + @param mask[] - match mask +*/ +define classify_table_info_reply +{ + u32 context; + i32 retval; + u32 table_id; + u32 nbuckets; + u32 match_n_vectors; + u32 skip_n_vectors; + u32 active_sessions; + u32 next_table_index; + u32 miss_next_index; + u32 mask_length; + u8 mask[mask_length]; +}; + +/** \brief Classify sessions dump request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param table_id - classify table index +*/ +define classify_session_dump +{ + u32 client_index; + u32 context; + u32 table_id; +}; + +/** \brief Reply for classify table session dump request + @param context - sender context which was passed in the request + @param count - number of ids returned in response + @param table_id - classify table index + @param hit_next_index - hit_next_index of session + @param opaque_index - for add, opaque_index of session + @param advance - advance value of session + @param match[] - match value for session +*/ +define classify_session_details +{ + u32 context; + i32 retval; + u32 table_id; + u32 hit_next_index; + i32 advance; + u32 opaque_index; + u32 match_length; + u8 match[match_length]; +}; + +/** \brief Configure IPFIX exporter process request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param collector_address - address of IPFIX collector + @param collector_port - port of IPFIX collector + @param src_address - address of IPFIX exporter + @param vrf_id - VRF / fib table ID + @param path_mtu - Path MTU between exporter and collector + @param template_interval - number of seconds after which to resend template + @param udp_checksum - UDP checksum calculation enable flag +*/ +define set_ipfix_exporter +{ + u32 client_index; + u32 context; + u8 collector_address[16]; + u16 collector_port; + u8 src_address[16]; + u32 vrf_id; + u32 path_mtu; + u32 template_interval; + u8 udp_checksum; +}; + +/** \brief Reply to IPFIX exporter configure request + @param context - sender context which was passed in the request +*/ +define set_ipfix_exporter_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IPFIX exporter dump request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define ipfix_exporter_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply to IPFIX exporter dump request + @param context - sender context which was passed in the request + @param collector_address - address of IPFIX collector + @param collector_port - port of IPFIX collector + @param src_address - address of IPFIX exporter + @param fib_index - fib table index + @param path_mtu - Path MTU between exporter and collector + @param template_interval - number of seconds after which to resend template + @param udp_checksum - UDP checksum calculation enable flag +*/ +define ipfix_exporter_details +{ + u32 context; + u8 collector_address[16]; + u16 collector_port; + u8 src_address[16]; + u32 vrf_id; + u32 path_mtu; + u32 template_interval; + u8 udp_checksum; +}; + +/** \brief IPFIX classify stream configure request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param domain_id - domain ID reported in IPFIX messages for classify stream + @param src_port - source port of UDP session for classify stream +*/ +define set_ipfix_classify_stream { + u32 client_index; + u32 context; + u32 domain_id; + u16 src_port; +}; + +/** \brief IPFIX classify stream configure response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define set_ipfix_classify_stream_reply { + u32 context; + i32 retval; +}; + +/** \brief IPFIX classify stream dump request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define ipfix_classify_stream_dump { + u32 client_index; + u32 context; +}; + +/** \brief Reply to IPFIX classify stream dump request + @param context - sender context, to match reply w/ request + @param domain_id - domain ID reported in IPFIX messages for classify stream + @param src_port - source port of UDP session for classify stream +*/ +define ipfix_classify_stream_details { + u32 context; + u32 domain_id; + u16 src_port; +}; + +/** \brief IPFIX add or delete classifier table request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param table_id - classifier table ID + @param ip_version - version of IP used in the classifier table + @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified +*/ +define ipfix_classify_table_add_del { + u32 client_index; + u32 context; + u32 table_id; + u8 ip_version; + u8 transport_protocol; + u8 is_add; +}; + +/** \brief IPFIX add classifier table response + @param context - sender context which was passed in the request +*/ +define ipfix_classify_table_add_del_reply { + u32 context; + i32 retval; +}; + +/** \brief IPFIX classify tables dump request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define ipfix_classify_table_dump { + u32 client_index; + u32 context; +}; + +/** \brief Reply to IPFIX classify tables dump request + @param context - sender context, to match reply w/ request + @param table_id - classifier table ID + @param ip_version - version of IP used in the classifier table + @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified +*/ +define ipfix_classify_table_details { + u32 context; + u32 table_id; + u8 ip_version; + u8 transport_protocol; +}; + +/** \brief Set/unset flow classify interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface to set/unset flow classify + @param ip4_table_index - ip4 classify table index (~0 for skip) + @param ip6_table_index - ip6 classify table index (~0 for skip) + @param l2_table_index - l2 classify table index (~0 for skip) + @param is_add - Set if non-zero, else unset + Note: User is recommeneded to use just one valid table_index per call. + (ip4_table_index, ip6_table_index, or l2_table_index) +*/ +define flow_classify_set_interface { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 ip4_table_index; + u32 ip6_table_index; + u8 is_add; +}; + +/** \brief Set/unset flow classify interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define flow_classify_set_interface_reply { + u32 context; + i32 retval; +}; + +/** \brief Get list of flow classify interfaces and tables + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param type - classify table type +*/ +define flow_classify_dump { + u32 client_index; + u32 context; + u8 type; +}; + +/** \brief Flow classify operational state response. + @param context - sender context, to match reply w/ request + @param sw_if_index - software interface index + @param table_index - classify table index +*/ +define flow_classify_details { + u32 context; + u32 sw_if_index; + u32 table_index; +}; + +/** \brief Query relative index via node names + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param node_name - name of node to find relative index from + @param next_name - next node from node_name to find relative index of +*/ +define get_next_index +{ + u32 client_index; + u32 context; + u8 node_name[64]; + u8 next_name[64]; +}; + +/** \brief Reply for get next node index + @param context - sender context which was passed in the request + @param retval - return value + @param next_index - index of the next_node +*/ +define get_next_index_reply +{ + u32 context; + i32 retval; + u32 next_index; +}; + +/** \brief PacketGenerator create interface request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param interface_id - interface index +*/ +define pg_create_interface +{ + u32 client_index; + u32 context; + u32 interface_id; +}; + +/** \brief PacketGenerator create interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define pg_create_interface_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief PacketGenerator capture packets on given interface request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param interface_id - pg interface index + @param is_enabled - 1 if enabling streams, 0 if disabling + @param count - number of packets to be captured + @param pcap_file - pacp file name to store captured packets +*/ +define pg_capture +{ + u32 client_index; + u32 context; + u32 interface_id; + u8 is_enabled; + u32 count; + u32 pcap_name_length; + u8 pcap_file_name[pcap_name_length]; +}; + +/** \brief PacketGenerator capture packets response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define pg_capture_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Enable / disable packet generator request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_enabled - 1 if enabling streams, 0 if disabling + @param stream - stream name to be enable/disabled, if not specified handle all streams +*/ +define pg_enable_disable +{ + u32 client_index; + u32 context; + u8 is_enabled; + u32 stream_name_length; + u8 stream_name[stream_name_length]; +}; + +/** \brief Reply for enable / disable packet generator + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define pg_enable_disable_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Configure IP source and L4 port-range check + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_ip6 - 1 if source address type is IPv6 + @param is_add - 1 if add, 0 if delete + @param mask_length - mask length for address entry + @param address - array of address bytes + @param number_of_ranges - length of low_port and high_port arrays (must match) + @param low_ports[32] - up to 32 low end of port range entries (must have corresponding high_ports entry) + @param high_ports[32] - up to 32 high end of port range entries (must have corresponding low_ports entry) + @param vrf_id - fib table/vrf id to associate the source and port-range check with + @note To specify a single port set low_port and high_port entry the same +*/ +define ip_source_and_port_range_check_add_del +{ + u32 client_index; + u32 context; + u8 is_ipv6; + u8 is_add; + u8 mask_length; + u8 address[16]; + u8 number_of_ranges; + u16 low_ports[32]; + u16 high_ports[32]; + u32 vrf_id; +}; + +/** \brief Configure IP source and L4 port-range check reply + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define ip_source_and_port_range_check_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Set interface source and L4 port-range request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param interface_id - interface index + @param tcp_vrf_id - VRF associated with source and TCP port-range check + @param udp_vrf_id - VRF associated with source and TCP port-range check +*/ +define ip_source_and_port_range_check_interface_add_del +{ + u32 client_index; + u32 context; + u8 is_add; + u32 sw_if_index; + u32 tcp_in_vrf_id; + u32 tcp_out_vrf_id; + u32 udp_in_vrf_id; + u32 udp_out_vrf_id; +}; + +/** \brief Set interface source and L4 port-range response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define ip_source_and_port_range_check_interface_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Delete sub interface request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - sw index of the interface that was created by create_subif +*/ +define delete_subif { + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Delete sub interface response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define delete_subif_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS pipe profile set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param pipe - pipe ID within its subport + @param profile - pipe profile ID +*/ +define sw_interface_set_dpdk_hqos_pipe { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 pipe; + u32 profile; +}; + +/** \brief DPDK interface HQoS pipe profile set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_pipe_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS subport parameters set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param tb_rate - subport token bucket rate (measured in bytes/second) + @param tb_size - subport token bucket size (measured in credits) + @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) + @param tc_period - enforcement period for rates (measured in milliseconds) +*/ +define sw_interface_set_dpdk_hqos_subport { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 tb_rate; + u32 tb_size; + u32 tc_rate[4]; + u32 tc_period; +}; + +/** \brief DPDK interface HQoS subport parameters set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_subport_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS tctbl entry set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param entry - entry index ID + @param tc - traffic class (0 .. 3) + @param queue - traffic class queue (0 .. 3) +*/ +define sw_interface_set_dpdk_hqos_tctbl { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 entry; + u32 tc; + u32 queue; +}; + +/** \brief DPDK interface HQoS tctbl entry set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_tctbl_reply { + u32 context; + i32 retval; +}; + +/** \brief L2 interface pbb tag rewrite configure request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface the operation is applied to + @param vtr_op - Choose from l2_vtr_op_t enum values + @param inner_tag - needed for translate_qinq vtr op only + @param outer_tag - needed for translate_qinq vtr op only + @param b_dmac - B-tag remote mac address, needed for any push or translate_qinq vtr op + @param b_smac - B-tag local mac address, needed for any push or translate qinq vtr op + @param b_vlanid - B-tag vlanid, needed for any push or translate qinq vtr op + @param i_sid - I-tag service id, needed for any push or translate qinq vtr op +*/ +define l2_interface_pbb_tag_rewrite +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 vtr_op; + u16 outer_tag; + u8 b_dmac[6]; + u8 b_smac[6]; + u16 b_vlanid; + u32 i_sid; +}; + +/** \brief L2 interface pbb tag rewrite response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define l2_interface_pbb_tag_rewrite_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Punt traffic to the host + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add punt if non-zero, else delete + @param ipv - L3 protocol 4 - IPv4, 6 - IPv6, ~0 - All + @param l4_protocol - L4 protocol to be punted, only UDP (0x11) is supported + @param l4_port - TCP/UDP port to be punted +*/ +define punt { + u32 client_index; + u32 context; + u8 is_add; + u8 ipv; + u8 l4_protocol; + u16 l4_port; +}; + +/** \brief Reply to the punt request + @param context - sender context which was passed in the request + @param retval - return code of punt request +*/ +define punt_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Feature path enable/disable request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param enable - 1 = on, 0 = off +*/ +define feature_enable_disable { + u32 client_index; + u32 context; + u32 sw_if_index; + u8 enable; + u8 arc_name[64]; + u8 feature_name[64]; +}; + +/** \brief Reply to the eature path enable/disable request + @param context - sender context which was passed in the request + @param retval - return code for the request +*/ +define feature_enable_disable_reply +{ + u32 context; + i32 retval; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/vpe_all_api_h.h b/src/vpp/api/vpe_all_api_h.h new file mode 100644 index 00000000..397cd807 --- /dev/null +++ b/src/vpp/api/vpe_all_api_h.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Add to the bottom of the #include list, or elves will steal your + * keyboard in the middle of the night! + */ + +/* Include the (first) vlib-api API definition layer */ +#include + +/* Include the (second) vnet API definition layer */ +#define included_from_layer_3 +#include +#undef included_from_layer_3 + +/* Include the current layer (third) vpp API definition layer */ +#include + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/vpe_msg_enum.h b/src/vpp/api/vpe_msg_enum.h new file mode 100644 index 00000000..4fcc1c8c --- /dev/null +++ b/src/vpp/api/vpe_msg_enum.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vpe_msg_enum_h +#define included_vpe_msg_enum_h + +#include + +#define vl_msg_id(n,h) n, +typedef enum +{ + VL_ILLEGAL_MESSAGE_ID = 0, +#include + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_vpe_msg_enum_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/vpp_get_metrics.c b/src/vpp/api/vpp_get_metrics.c new file mode 100644 index 00000000..3474133d --- /dev/null +++ b/src/vpp/api/vpp_get_metrics.c @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +svmdb_client_t *c; +volatile int signal_received; + +static void +unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc) +{ + static int once; + + if (once) + exit (1); + + once = 1; + signal_received = 1; +} + +static void +setup_signal_handlers (void) +{ + uword i; + struct sigaction sa; + + for (i = 1; i < 32; i++) + { + memset (&sa, 0, sizeof (sa)); + sa.sa_sigaction = (void *) unix_signal_handler; + sa.sa_flags = SA_SIGINFO; + + switch (i) + { + /* these signals take the default action */ + case SIGABRT: + case SIGKILL: + case SIGSTOP: + case SIGUSR1: + case SIGUSR2: + continue; + + /* ignore SIGPIPE, SIGCHLD */ + case SIGPIPE: + case SIGCHLD: + sa.sa_sigaction = (void *) SIG_IGN; + break; + + /* catch and handle all other signals */ + default: + break; + } + + if (sigaction (i, &sa, 0) < 0) + return clib_unix_warning (0, "sigaction %U", format_signal, i); + } +} + +int +main (int argc, char **argv) +{ + unformat_input_t input; + char *chroot_path = 0; + u8 *chroot_path_u8; + int interval = 0; + f64 *vector_ratep, *rx_ratep, *sig_error_ratep; + pid_t *vpp_pidp; + svmdb_map_args_t _ma, *ma = &_ma; + int uid, gid, rv; + struct passwd _pw, *pw; + struct group _grp, *grp; + char *s, buf[128]; + + unformat_init_command_line (&input, argv); + + uid = geteuid (); + gid = getegid (); + + while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (&input, "chroot %s", &chroot_path_u8)) + { + chroot_path = (char *) chroot_path_u8; + } + else if (unformat (&input, "interval %d", &interval)) + ; + else if (unformat (&input, "uid %d", &uid)) + ; + else if (unformat (&input, "gid %d", &gid)) + ; + else if (unformat (&input, "uid %s", &s)) + { + /* lookup the username */ + pw = NULL; + rv = getpwnam_r (s, &_pw, buf, sizeof (buf), &pw); + if (rv < 0) + { + fformat (stderr, "cannot fetch username %s", s); + exit (1); + } + if (pw == NULL) + { + fformat (stderr, "username %s does not exist", s); + exit (1); + } + vec_free (s); + uid = pw->pw_uid; + } + else if (unformat (&input, "gid %s", &s)) + { + /* lookup the group name */ + grp = NULL; + rv = getgrnam_r (s, &_grp, buf, sizeof (buf), &grp); + if (rv != 0) + { + fformat (stderr, "cannot fetch group %s", s); + exit (1); + } + if (grp == NULL) + { + fformat (stderr, "group %s does not exist", s); + exit (1); + } + vec_free (s); + gid = grp->gr_gid; + } + else + { + fformat (stderr, + "usage: vpp_get_metrics [chroot ] [interval ]\n"); + exit (1); + } + } + + setup_signal_handlers (); + + memset (ma, 0, sizeof (*ma)); + ma->root_path = chroot_path; + ma->uid = uid; + ma->gid = gid; + + c = svmdb_map (ma); + + vpp_pidp = + svmdb_local_get_variable_reference (c, SVMDB_NAMESPACE_VEC, "vpp_pid"); + vector_ratep = + svmdb_local_get_variable_reference (c, SVMDB_NAMESPACE_VEC, + "vpp_vector_rate"); + rx_ratep = + svmdb_local_get_variable_reference (c, SVMDB_NAMESPACE_VEC, + "vpp_input_rate"); + sig_error_ratep = + svmdb_local_get_variable_reference (c, SVMDB_NAMESPACE_VEC, + "vpp_sig_error_rate"); + + /* + * Make sure vpp is actually running. Otherwise, there's every + * chance that the database region will be wiped out by the + * process monitor script + */ + + if (vpp_pidp == 0 || vector_ratep == 0 || rx_ratep == 0 + || sig_error_ratep == 0) + { + fformat (stdout, "vpp not running\n"); + exit (1); + } + + do + { + /* + * Once vpp exits, the svm db region will be recreated... + * Can't use kill (*vpp_pidp, 0) if running as non-root / + * accessing the shared-VM database via group perms. + */ + if (*vpp_pidp == 0) + { + fformat (stdout, "vpp not running\n"); + exit (1); + } + fformat (stdout, + "%d: vpp_vector_rate=%.2f, vpp_input_rate=%f, vpp_sig_error_rate=%f\n", + *vpp_pidp, *vector_ratep, *rx_ratep, *sig_error_ratep); + + if (interval) + sleep (interval); + if (signal_received) + break; + } + while (interval); + + svmdb_unmap (c); + exit (0); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 738f3f2a170bace45180bc8718d5a7e75939b275 Mon Sep 17 00:00:00 2001 From: Pavel Kotucek Date: Mon, 9 Jan 2017 15:11:03 +0100 Subject: API refactoring : dpdk Change-Id: If2541be803a0303401b013390e117c26fd1d9739 Signed-off-by: Pavel Kotucek --- src/vat/api_format.c | 119 ++++++++++++++++--- src/vnet.am | 8 +- src/vnet/devices/dpdk/dpdk.api | 103 ++++++++++++++++ src/vnet/devices/dpdk/dpdk_api.c | 246 +++++++++++++++++++++++++++++++++++++++ src/vnet/vnet_all_api_h.h | 3 + src/vpp/api/api.c | 153 ------------------------ src/vpp/api/custom_dump.c | 17 ++- src/vpp/api/vpe.api | 83 +------------ 8 files changed, 475 insertions(+), 257 deletions(-) create mode 100644 src/vnet/devices/dpdk/dpdk.api create mode 100644 src/vnet/devices/dpdk/dpdk_api.c (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index e6c0f244..c00104de 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -3569,9 +3569,6 @@ _(sw_interface_set_mpls_enable_reply) \ _(sw_interface_set_vpath_reply) \ _(sw_interface_set_vxlan_bypass_reply) \ _(sw_interface_set_l2_bridge_reply) \ -_(sw_interface_set_dpdk_hqos_pipe_reply) \ -_(sw_interface_set_dpdk_hqos_subport_reply) \ -_(sw_interface_set_dpdk_hqos_tctbl_reply) \ _(bridge_domain_add_del_reply) \ _(sw_interface_set_l2_xconnect_reply) \ _(l2fib_add_del_reply) \ @@ -3671,6 +3668,13 @@ _(feature_enable_disable_reply) \ _(sw_interface_tag_add_del_reply) \ _(sw_interface_set_mtu_reply) +#if DPDK > 0 +#define foreach_standard_dpdk_reply_retval_handler \ +_(sw_interface_set_dpdk_hqos_pipe_reply) \ +_(sw_interface_set_dpdk_hqos_subport_reply) \ +_(sw_interface_set_dpdk_hqos_tctbl_reply) +#endif + #define _(n) \ static void vl_api_##n##_t_handler \ (vl_api_##n##_t * mp) \ @@ -3702,6 +3706,39 @@ foreach_standard_reply_retval_handler; foreach_standard_reply_retval_handler; #undef _ +#if DPDK > 0 +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = &vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_dpdk_reply_retval_handler; +#undef _ + +#define _(n) \ + static void vl_api_##n##_t_handler_json \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = &vat_main; \ + vat_json_node_t node; \ + vat_json_init_object(&node); \ + vat_json_object_add_int(&node, "retval", ntohl(mp->retval)); \ + vat_json_print(vam->ofp, &node); \ + vam->retval = ntohl(mp->retval); \ + vam->result_ready = 1; \ + } +foreach_standard_dpdk_reply_retval_handler; +#undef _ +#endif + /* * Table of message reply handlers, must include boilerplate handlers * we just generated @@ -3725,12 +3762,6 @@ _(SW_INTERFACE_SET_L2_XCONNECT_REPLY, \ sw_interface_set_l2_xconnect_reply) \ _(SW_INTERFACE_SET_L2_BRIDGE_REPLY, \ sw_interface_set_l2_bridge_reply) \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY, \ - sw_interface_set_dpdk_hqos_pipe_reply) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY, \ - sw_interface_set_dpdk_hqos_subport_reply) \ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY, \ - sw_interface_set_dpdk_hqos_tctbl_reply) \ _(BRIDGE_DOMAIN_ADD_DEL_REPLY, bridge_domain_add_del_reply) \ _(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ _(BRIDGE_DOMAIN_SW_IF_DETAILS, bridge_domain_sw_if_details) \ @@ -3924,6 +3955,16 @@ _(SW_INTERFACE_SET_MTU_REPLY, sw_interface_set_mtu_reply) \ _(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) +#if DPDK > 0 +#define foreach_vpe_dpdk_api_reply_msg \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY, \ + sw_interface_set_dpdk_hqos_pipe_reply) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY, \ + sw_interface_set_dpdk_hqos_subport_reply) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY, \ + sw_interface_set_dpdk_hqos_tctbl_reply) +#endif + /* M: construct, but don't yet send a message */ #define M(T,t) \ @@ -4724,6 +4765,7 @@ api_sw_interface_clear_stats (vat_main_t * vam) W; } +#if DPDK >0 static int api_sw_interface_set_dpdk_hqos_pipe (vat_main_t * vam) { @@ -4944,6 +4986,7 @@ api_sw_interface_set_dpdk_hqos_tctbl (vat_main_t * vam) /* NOTREACHED */ return 0; } +#endif static int api_sw_interface_add_del_address (vat_main_t * vam) @@ -17434,14 +17477,6 @@ _(sw_interface_set_l2_bridge, \ " | sw_if_index bd_id \n" \ "[shg ] [bvi]\n" \ "enable | disable") \ -_(sw_interface_set_dpdk_hqos_pipe, \ - "rx | sw_if_index subport pipe \n" \ - "profile \n") \ -_(sw_interface_set_dpdk_hqos_subport, \ - "rx | sw_if_index subport [rate ]\n" \ - "[bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ]\n") \ -_(sw_interface_set_dpdk_hqos_tctbl, \ - "rx | sw_if_index entry tc queue \n") \ _(bridge_domain_add_del, \ "bd_id [flood 1|0] [uu-flood 1|0] [forward 1|0] [learn 1|0] [arp-term 1|0] [del]\n") \ _(bridge_domain_dump, "[bd_id ]\n") \ @@ -17739,6 +17774,18 @@ _(sw_interface_set_mtu, " | sw_if_index mtu ") \ _(ip_neighbor_dump, "[ip6] | sw_if_index ") \ _(sw_interface_get_table, " | sw_if_index [ipv6]") +#if DPDK > 0 +#define foreach_vpe_dpdk_api_msg \ +_(sw_interface_set_dpdk_hqos_pipe, \ + "rx | sw_if_index subport pipe \n" \ + "profile \n") \ +_(sw_interface_set_dpdk_hqos_subport, \ + "rx | sw_if_index subport [rate ]\n" \ + "[bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ]\n") \ +_(sw_interface_set_dpdk_hqos_tctbl, \ + "rx | sw_if_index entry tc queue \n") +#endif + /* List of command functions, CLI names map directly to functions */ #define foreach_cli_function \ _(comment, "usage: comment ") \ @@ -17776,6 +17823,22 @@ _(unset, "usage: unset ") foreach_vpe_api_reply_msg; #undef _ +#if DPDK > 0 +#define _(N,n) \ + static void vl_api_##n##_t_handler_uni \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = &vat_main; \ + if (vam->json_output) { \ + vl_api_##n##_t_handler_json(mp); \ + } else { \ + vl_api_##n##_t_handler(mp); \ + } \ + } +foreach_vpe_dpdk_api_reply_msg; +#undef _ +#endif + void vat_api_hookup (vat_main_t * vam) { @@ -17789,6 +17852,18 @@ vat_api_hookup (vat_main_t * vam) foreach_vpe_api_reply_msg; #undef _ +#if DPDK > 0 +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler_uni, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_dpdk_api_reply_msg; +#undef _ +#endif + #if (VPP_API_TEST_BUILTIN==0) vl_msg_api_set_first_available_msg_id (VL_MSG_FIRST_AVAILABLE); #endif @@ -17803,11 +17878,21 @@ vat_api_hookup (vat_main_t * vam) #define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); foreach_vpe_api_msg; #undef _ +#if DPDK >0 +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_dpdk_api_msg; +#undef _ +#endif /* Help strings */ #define _(n,h) hash_set_mem (vam->help_by_name, #n, h); foreach_vpe_api_msg; #undef _ +#if DPDK >0 +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_dpdk_api_msg; +#undef _ +#endif /* CLI functions */ #define _(n,h) hash_set_mem (vam->function_by_name, #n, n); diff --git a/src/vnet.am b/src/vnet.am index 16ade4d1..bc0820a3 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -759,10 +759,14 @@ libvnet_la_SOURCES += \ vnet/devices/dpdk/init.c \ vnet/devices/dpdk/node.c \ vnet/devices/dpdk/hqos.c \ - vnet/devices/dpdk/cli.c + vnet/devices/dpdk/cli.c \ + vnet/devices/dpdk/dpdk_api.c nobase_include_HEADERS += \ - vnet/devices/dpdk/dpdk.h + vnet/devices/dpdk/dpdk.h \ + vnet/devices/dpdk/dpdk.api.h + +API_FILES += vnet/devices/dpdk/dpdk.api else libvnet_la_SOURCES += \ vnet/devices/nic/ixge.c \ diff --git a/src/vnet/devices/dpdk/dpdk.api b/src/vnet/devices/dpdk/dpdk.api new file mode 100644 index 00000000..21215d45 --- /dev/null +++ b/src/vnet/devices/dpdk/dpdk.api @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief DPDK interface HQoS pipe profile set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param pipe - pipe ID within its subport + @param profile - pipe profile ID +*/ +define sw_interface_set_dpdk_hqos_pipe { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 pipe; + u32 profile; +}; + +/** \brief DPDK interface HQoS pipe profile set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_pipe_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS subport parameters set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param tb_rate - subport token bucket rate (measured in bytes/second) + @param tb_size - subport token bucket size (measured in credits) + @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) + @param tc_period - enforcement period for rates (measured in milliseconds) +*/ +define sw_interface_set_dpdk_hqos_subport { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 tb_rate; + u32 tb_size; + u32 tc_rate[4]; + u32 tc_period; +}; + +/** \brief DPDK interface HQoS subport parameters set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_subport_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS tctbl entry set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param entry - entry index ID + @param tc - traffic class (0 .. 3) + @param queue - traffic class queue (0 .. 3) +*/ +define sw_interface_set_dpdk_hqos_tctbl { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 entry; + u32 tc; + u32 queue; +}; + +/** \brief DPDK interface HQoS tctbl entry set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_tctbl_reply { + u32 context; + i32 retval; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + \ No newline at end of file diff --git a/src/vnet/devices/dpdk/dpdk_api.c b/src/vnet/devices/dpdk/dpdk_api.c new file mode 100644 index 00000000..8faf5c2c --- /dev/null +++ b/src/vnet/devices/dpdk/dpdk_api.c @@ -0,0 +1,246 @@ +/* + *------------------------------------------------------------------ + * dpdk_api.c - dpdk interface api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#if DPDK > 0 +#include +#endif + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) + +static void + vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; + int rv = 0; + +#if DPDK > 0 + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + u32 pipe = ntohl (mp->pipe); + u32 profile = ntohl (mp->profile); + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); + + BAD_SW_IF_INDEX_LABEL; +#else + clib_warning ("setting HQoS pipe parameters without DPDK not implemented"); + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif /* DPDK */ + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); +} + +static void + vl_api_sw_interface_set_dpdk_hqos_subport_t_handler + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; + int rv = 0; + +#if DPDK > 0 + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + struct rte_sched_subport_params p; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + p.tb_rate = ntohl (mp->tb_rate); + p.tb_size = ntohl (mp->tb_size); + p.tc_rate[0] = ntohl (mp->tc_rate[0]); + p.tc_rate[1] = ntohl (mp->tc_rate[1]); + p.tc_rate[2] = ntohl (mp->tc_rate[2]); + p.tc_rate[3] = ntohl (mp->tc_rate[3]); + p.tc_period = ntohl (mp->tc_period); + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); + + BAD_SW_IF_INDEX_LABEL; +#else + clib_warning + ("setting HQoS subport parameters without DPDK not implemented"); + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif /* DPDK */ + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); +} + +static void + vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; + int rv = 0; + +#if DPDK > 0 + dpdk_main_t *dm = &dpdk_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 entry = ntohl (mp->entry); + u32 tc = ntohl (mp->tc); + u32 queue = ntohl (mp->queue); + u32 val, i; + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + { + clib_warning ("invalid traffic class !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + { + clib_warning ("invalid queue !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + + if (p == 0) + { + clib_warning ("worker thread registration AWOL !!"); + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto done; + } + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + for (i = 0; i < worker_thread_count; i++) + xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; + + BAD_SW_IF_INDEX_LABEL; +done: +#else + clib_warning ("setting HQoS DSCP table entry without DPDK not implemented"); + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif /* DPDK */ + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); +} + +/* + * dpdk_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_dpdk; +#undef _ +} + +static clib_error_t * +dpdk_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (dpdk_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index 1b4d6c45..d48e1540 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -30,6 +30,9 @@ #endif /* included_from_layer_3 */ #include +#if DPDK > 0 +#include +#endif #include #include #include diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 6289249c..46e28e9d 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -86,10 +86,6 @@ #undef __included_bihash_template_h__ #include -#if DPDK > 0 -#include -#endif - #include #include @@ -131,9 +127,6 @@ _(SW_INTERFACE_SET_VPATH, sw_interface_set_vpath) \ _(SW_INTERFACE_SET_VXLAN_BYPASS, sw_interface_set_vxlan_bypass) \ _(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \ _(SW_INTERFACE_SET_L2_BRIDGE, sw_interface_set_l2_bridge) \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) \ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ _(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ @@ -697,152 +690,6 @@ static void REPLY_MACRO (VL_API_SW_INTERFACE_SET_L2_BRIDGE_REPLY); } -static void - vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler - (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - u32 pipe = ntohl (mp->pipe); - u32 profile = ntohl (mp->profile); - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); - - BAD_SW_IF_INDEX_LABEL; -#else - clib_warning ("setting HQoS pipe parameters without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); -} - -static void - vl_api_sw_interface_set_dpdk_hqos_subport_t_handler - (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - struct rte_sched_subport_params p; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - p.tb_rate = ntohl (mp->tb_rate); - p.tb_size = ntohl (mp->tb_size); - p.tc_rate[0] = ntohl (mp->tc_rate[0]); - p.tc_rate[1] = ntohl (mp->tc_rate[1]); - p.tc_rate[2] = ntohl (mp->tc_rate[2]); - p.tc_rate[3] = ntohl (mp->tc_rate[3]); - p.tc_period = ntohl (mp->tc_period); - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); - - BAD_SW_IF_INDEX_LABEL; -#else - clib_warning - ("setting HQoS subport parameters without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); -} - -static void - vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler - (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 entry = ntohl (mp->entry); - u32 tc = ntohl (mp->tc); - u32 queue = ntohl (mp->queue); - u32 val, i; - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) - { - clib_warning ("invalid traffic class !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) - { - clib_warning ("invalid queue !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - - if (p == 0) - { - clib_warning ("worker thread registration AWOL !!"); - rv = VNET_API_ERROR_INVALID_VALUE_2; - goto done; - } - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; - for (i = 0; i < worker_thread_count; i++) - xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; - - BAD_SW_IF_INDEX_LABEL; -done: -#else - clib_warning ("setting HQoS DSCP table entry without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); -} - static void vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) { diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 1964533e..c2cd3d15 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -238,6 +238,7 @@ static void *vl_api_sw_interface_set_l2_bridge_t_print FINISH; } +#if DPDK > 0 static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) { @@ -287,6 +288,7 @@ static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print FINISH; } +#endif static void *vl_api_bridge_domain_add_del_t_print (vl_api_bridge_domain_add_del_t * mp, void *handle) @@ -3002,9 +3004,6 @@ _(BRIDGE_FLAGS, bridge_flags) \ _(CLASSIFY_ADD_DEL_TABLE, classify_add_del_table) \ _(CLASSIFY_ADD_DEL_SESSION, classify_add_del_session) \ _(SW_INTERFACE_SET_L2_BRIDGE, sw_interface_set_l2_bridge) \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport)\ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) \ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ _(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table) \ @@ -3128,6 +3127,18 @@ vl_msg_api_custom_dump_configure (api_main_t * am) = (void *) vl_api_##f##_t_print; foreach_custom_print_function; #undef _ + +#if DPDK > 0 + /* + * manually add DPDK hqos print handlers + */ + am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE] = + (void *) vl_api_sw_interface_set_dpdk_hqos_pipe_t_print; + am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT] = + (void *) vl_api_sw_interface_set_dpdk_hqos_subport_t_print; + am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL] = + (void *) vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print; +#endif } /* diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index abd0e8f1..3e4bcdf9 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -37,6 +37,7 @@ * IPSEC-GRE APIs: see .../vnet/vnet/ipsec-gre/{ipsec_gre.api, ipsec_gre_api.c} * LISP APIs: see .../vnet/vnet/lisp/{lisp.api, lisp_api.c} * LISP-GPE APIs: see .../vnet/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c} + * DPDK APIs: ... see /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} */ /** \brief Create a new subinterface with the given vlan id @@ -2606,88 +2607,6 @@ define delete_subif_reply { i32 retval; }; -/** \brief DPDK interface HQoS pipe profile set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param pipe - pipe ID within its subport - @param profile - pipe profile ID -*/ -define sw_interface_set_dpdk_hqos_pipe { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 pipe; - u32 profile; -}; - -/** \brief DPDK interface HQoS pipe profile set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_pipe_reply { - u32 context; - i32 retval; -}; - -/** \brief DPDK interface HQoS subport parameters set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param tb_rate - subport token bucket rate (measured in bytes/second) - @param tb_size - subport token bucket size (measured in credits) - @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) - @param tc_period - enforcement period for rates (measured in milliseconds) -*/ -define sw_interface_set_dpdk_hqos_subport { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 tb_rate; - u32 tb_size; - u32 tc_rate[4]; - u32 tc_period; -}; - -/** \brief DPDK interface HQoS subport parameters set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_subport_reply { - u32 context; - i32 retval; -}; - -/** \brief DPDK interface HQoS tctbl entry set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param entry - entry index ID - @param tc - traffic class (0 .. 3) - @param queue - traffic class queue (0 .. 3) -*/ -define sw_interface_set_dpdk_hqos_tctbl { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 entry; - u32 tc; - u32 queue; -}; - -/** \brief DPDK interface HQoS tctbl entry set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_tctbl_reply { - u32 context; - i32 retval; -}; - /** \brief L2 interface pbb tag rewrite configure request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request -- cgit 1.2.3-korg From 75152289284aaf1116d62c6cdef5a3b0c793fa15 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Mon, 9 Jan 2017 01:00:45 -0800 Subject: IPv6 NS RS tests and fixes includes Fix for VPP-584 with API change to remove prefix length from LL programming Change-Id: If860751c35e60255fb977f73bc33e8c2649e728e Signed-off-by: Neale Ranns --- src/vat/api_format.c | 5 +- src/vnet/interface_funcs.h | 8 ++ src/vnet/ip/icmp6.h | 4 - src/vnet/ip/ip.api | 2 - src/vnet/ip/ip6.h | 3 +- src/vnet/ip/ip6_forward.c | 6 +- src/vnet/ip/ip6_neighbor.c | 42 ++------ src/vnet/ip/ip_api.c | 3 +- src/vnet/rewrite.c | 7 +- src/vpp/api/custom_dump.c | 3 +- src/vpp/api/test_client.c | 2 - test/test_ip6.py | 237 ++++++++++++++++++++++++++++++++++++++++++++- test/vpp_interface.py | 14 +++ test/vpp_papi_provider.py | 16 +++ test/vpp_pg_interface.py | 5 +- 15 files changed, 300 insertions(+), 57 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index c00104de..75148207 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -7671,7 +7671,6 @@ api_sw_interface_ip6_set_link_local_address (vat_main_t * vam) f64 timeout; u32 sw_if_index; u8 sw_if_index_set = 0; - u32 address_length = 0; u8 v6_address_set = 0; ip6_address_t v6address; @@ -7682,8 +7681,7 @@ api_sw_interface_ip6_set_link_local_address (vat_main_t * vam) sw_if_index_set = 1; else if (unformat (i, "sw_if_index %d", &sw_if_index)) sw_if_index_set = 1; - else if (unformat (i, "%U/%d", - unformat_ip6_address, &v6address, &address_length)) + else if (unformat (i, "%U", unformat_ip6_address, &v6address)) v6_address_set = 1; else break; @@ -7706,7 +7704,6 @@ api_sw_interface_ip6_set_link_local_address (vat_main_t * vam) mp->sw_if_index = ntohl (sw_if_index); clib_memcpy (mp->address, &v6address, sizeof (v6address)); - mp->address_length = address_length; /* send it... */ S; diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h index b84d151c..ab808dfa 100644 --- a/src/vnet/interface_funcs.h +++ b/src/vnet/interface_funcs.h @@ -52,6 +52,14 @@ vnet_get_sw_interface (vnet_main_t * vnm, u32 sw_if_index) return pool_elt_at_index (vnm->interface_main.sw_interfaces, sw_if_index); } +always_inline vnet_sw_interface_t * +vnet_get_sw_interface_safe (vnet_main_t * vnm, u32 sw_if_index) +{ + if (!pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + return pool_elt_at_index (vnm->interface_main.sw_interfaces, sw_if_index); + return (NULL); +} + always_inline vnet_sw_interface_t * vnet_get_hw_sw_interface (vnet_main_t * vnm, u32 hw_if_index) { diff --git a/src/vnet/ip/icmp6.h b/src/vnet/ip/icmp6.h index a426512e..9a3487b1 100644 --- a/src/vnet/ip/icmp6.h +++ b/src/vnet/ip/icmp6.h @@ -37,10 +37,6 @@ "neighbor discovery unsupported interface") \ _ (ROUTER_SOLICITATION_RADV_NOT_CONFIG, \ "neighbor discovery not configured") \ - _ (ROUTER_SOLICITATION_DEST_UNKNOWN, \ - "router solicitations for unknown destination") \ - _ (ROUTER_SOLICITATION_SOURCE_UNKNOWN, \ - "router solicitations for unknown source") \ _ (ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL, \ "router advertisement source not link local") \ _ (ROUTER_ADVERTISEMENTS_TX, "router advertisements sent") \ diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api index c811e465..f2444805 100644 --- a/src/vnet/ip/ip.api +++ b/src/vnet/ip/ip.api @@ -311,7 +311,6 @@ define sw_interface_ip6_enable_disable_reply @param context - sender context, to match reply w/ request @param sw_if_index - interface to set link local on @param address[] - the new link local address - @param address_length - link local address length */ define sw_interface_ip6_set_link_local_address { @@ -319,7 +318,6 @@ define sw_interface_ip6_set_link_local_address u32 context; u32 sw_if_index; u8 address[16]; - u8 address_length; }; /** \brief IPv6 set link local address on interface response diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index 586b7c1b..f493db01 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -376,8 +376,7 @@ int ip6_interface_enabled (vlib_main_t * vm, u32 sw_if_index); clib_error_t *set_ip6_link_local_address (vlib_main_t * vm, u32 sw_if_index, - ip6_address_t * address, - u8 address_length); + ip6_address_t * address); void vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm, void *address_arg, diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index b5c79552..866a44e6 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -404,8 +404,10 @@ ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) } else { - ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0); - if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) + /* The ref count is 0 when an address is removed from an interface that has + * no address - this is not a ciritical error */ + if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] || + 0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) return; } diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 5a1c9e86..46c0e316 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -155,8 +155,6 @@ typedef struct /* Link local address to use (defaults to underlying physical for logical interfaces */ ip6_address_t link_local_address; - u8 link_local_prefix_len; - } ip6_radv_t; typedef struct @@ -1316,7 +1314,8 @@ icmp6_router_solicitation (vlib_main_t * vm, /* for solicited adverts - need to rate limit */ if (is_solicitation) { - if ((now - radv_info->last_radv_time) < + if (0 != radv_info->last_radv_time && + (now - radv_info->last_radv_time) < MIN_DELAY_BETWEEN_RAS) is_dropped = 1; else @@ -1523,16 +1522,6 @@ icmp6_router_solicitation (vlib_main_t * vm, error0 = ICMP6_ERROR_DST_LOOKUP_MISS; else { - ip_adjacency_t *adj0 = - ip_get_adjacency (&im->lookup_main, - adj_index0); - error0 = - ((adj0->rewrite_header.sw_if_index != - sw_if_index0 - || adj0->lookup_next_index != - IP_LOOKUP_NEXT_REWRITE) ? - ICMP6_ERROR_ROUTER_SOLICITATION_DEST_UNKNOWN - : error0); next0 = is_dropped ? next0 : ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW; @@ -2022,7 +2011,6 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, /* fill in default link-local address (this may be overridden) */ ip6_link_local_address_from_ethernet_address (&a->link_local_address, eth_if0->address); - a->link_local_prefix_len = 64; mhash_init (&a->address_to_prefix_index, sizeof (uword), sizeof (ip6_address_t)); @@ -3266,9 +3254,7 @@ disable_ip6_interface (vlib_main_t * vm, u32 sw_if_index) /* essentially "disables" ipv6 on this interface */ error = ip6_add_del_interface_address (vm, sw_if_index, &radv_info-> - link_local_address, - radv_info-> - link_local_prefix_len, + link_local_address, 128, 1 /* is_del */ ); ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, @@ -3372,7 +3358,6 @@ enable_ip6_interface (vlib_main_t * vm, u32 sw_if_index) else { radv_info->link_local_address = link_local_address; - radv_info->link_local_prefix_len = 64; } } } @@ -3585,8 +3570,7 @@ VLIB_CLI_COMMAND (ip6_nd_command, static) = clib_error_t * set_ip6_link_local_address (vlib_main_t * vm, - u32 sw_if_index, - ip6_address_t * address, u8 address_length) + u32 sw_if_index, ip6_address_t * address) { clib_error_t *error = 0; ip6_neighbor_main_t *nm = &ip6_neighbor_main; @@ -3615,22 +3599,18 @@ set_ip6_link_local_address (vlib_main_t * vm, /* delete the old one */ error = ip6_add_del_interface_address (vm, sw_if_index, &radv_info->link_local_address, - radv_info->link_local_prefix_len - /* address width */ , - 1 /* is_del */ ); + 128, 1 /* is_del */ ); if (!error) { /* add the new one */ error = ip6_add_del_interface_address (vm, sw_if_index, - address, address_length - /* address width */ , + address, 128, 0 /* is_del */ ); if (!error) { radv_info->link_local_address = *address; - radv_info->link_local_prefix_len = address_length; } } } @@ -3652,21 +3632,19 @@ set_ip6_link_local_address_cmd (vlib_main_t * vm, clib_error_t *error = 0; u32 sw_if_index; ip6_address_t ip6_addr; - u32 addr_len = 0; if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) { /* get the rest of the command */ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "%U/%d", - unformat_ip6_address, &ip6_addr, &addr_len)) + if (unformat (input, "%U", unformat_ip6_address, &ip6_addr)) break; else return (unformat_parse_error (input)); } } - error = set_ip6_link_local_address (vm, sw_if_index, &ip6_addr, addr_len); + error = set_ip6_link_local_address (vm, sw_if_index, &ip6_addr); return error; } @@ -3678,13 +3656,13 @@ set_ip6_link_local_address_cmd (vlib_main_t * vm, * * @cliexpar * Example of how to assign an IPv6 Link-local address to an interface: - * @cliexcmd{set ip6 link-local address GigabitEthernet2/0/0 FE80::AB8/64} + * @cliexcmd{set ip6 link-local address GigabitEthernet2/0/0 FE80::AB8} ?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip6_link_local_address_command, static) = { .path = "set ip6 link-local address", - .short_help = "set ip6 link-local address /", + .short_help = "set ip6 link-local address ", .function = set_ip6_link_local_address_cmd, }; /* *INDENT-ON* */ diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index cd9b7397..aafde464 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -1132,8 +1132,7 @@ static void error = set_ip6_link_local_address (vm, ntohl (mp->sw_if_index), - (ip6_address_t *) mp->address, - mp->address_length); + (ip6_address_t *) mp->address); if (error) { clib_error_report (error); diff --git a/src/vnet/rewrite.c b/src/vnet/rewrite.c index 53d548bc..8925ad61 100644 --- a/src/vnet/rewrite.c +++ b/src/vnet/rewrite.c @@ -79,8 +79,11 @@ format_vnet_rewrite (u8 * s, va_list * args) if (rw->sw_if_index != ~0) { vnet_sw_interface_t *si; - si = vnet_get_sw_interface (vnm, rw->sw_if_index); - s = format (s, "%U: ", format_vnet_sw_interface_name, vnm, si); + si = vnet_get_sw_interface_safe (vnm, rw->sw_if_index); + if (NULL != si) + s = format (s, "%U: ", format_vnet_sw_interface_name, vnm, si); + else + s = format (s, "DELETED"); } else s = format (s, "%v: ", next->name); diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index c2cd3d15..f14a031d 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -911,8 +911,7 @@ static void *vl_api_sw_interface_ip6_set_link_local_address_t_print s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); - s = format (s, "%U/%d ", format_ip6_address, mp->address, - mp->address_length); + s = format (s, "%U ", format_ip6_address, mp->address); FINISH; } diff --git a/src/vpp/api/test_client.c b/src/vpp/api/test_client.c index 5c568950..ceafc357 100644 --- a/src/vpp/api/test_client.c +++ b/src/vpp/api/test_client.c @@ -1196,8 +1196,6 @@ ip6_set_link_local_address (test_main_t * tm) clib_memcpy (mp->address, &tmp[0], 8); clib_memcpy (&mp->address[8], &tmp[1], 8); - mp->address_length = 64; - mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS); vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp); diff --git a/test/test_ip6.py b/test/test_ip6.py index e8b12f68..cd9c4b95 100644 --- a/test/test_ip6.py +++ b/test/test_ip6.py @@ -8,8 +8,18 @@ from vpp_sub_interface import VppSubInterface, VppDot1QSubint from scapy.packet import Raw from scapy.layers.l2 import Ether, Dot1Q -from scapy.layers.inet6 import IPv6, UDP +from scapy.layers.inet6 import IPv6, UDP, ICMPv6ND_NS, ICMPv6ND_RS, ICMPv6ND_RA, \ + ICMPv6NDOptSrcLLAddr, getmacbyip6, ICMPv6MRD_Solicitation from util import ppp +from scapy.utils6 import in6_getnsma, in6_getnsmac, in6_ptop, in6_islladdr, \ + in6_mactoifaceid +from scapy.utils import inet_pton, inet_ntop + + +def mk_ll_addr(mac): + euid = in6_mactoifaceid(mac) + addr = "fe80::" + euid + return addr class TestIPv6(VppTestCase): @@ -76,6 +86,12 @@ class TestIPv6(VppTestCase): def tearDown(self): """Run standard test teardown and log ``show ip6 neighbors``.""" + for i in self.sub_interfaces: + i.unconfig_ip6() + i.ip6_disable() + i.admin_down() + i.remove_vpp_config() + super(TestIPv6, self).tearDown() if not self.vpp_dead: self.logger.info(self.vapi.cli("show ip6 neighbors")) @@ -206,6 +222,225 @@ class TestIPv6(VppTestCase): pkts = i.parent.get_capture() self.verify_capture(i, pkts) + def send_and_assert_no_replies(self, intf, pkts, remark): + intf.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + intf.assert_nothing_captured(remark=remark) + + def test_ns(self): + """ IPv6 Neighbour Soliciatation Exceptions + + Test sceanrio: + - Send an NS Sourced from an address not covered by the link sub-net + - Send an NS to an mcast address the router has not joined + - Send NS for a target address the router does not onn. + """ + + # + # An NS from a non link source address + # + nsma = in6_getnsma(inet_pton(socket.AF_INET6, self.pg0.local_ip6)) + d = inet_ntop(socket.AF_INET6, nsma) + + p = (Ether(dst=in6_getnsmac(nsma)) / + IPv6(dst=d, src="2002::2") / + ICMPv6ND_NS(tgt=self.pg0.local_ip6) / + ICMPv6NDOptSrcLLAddr(lladdr=self.pg0.remote_mac)) + pkts = [p] + + self.send_and_assert_no_replies(self.pg0, pkts, + "No response to NS source by address not on sub-net") + + # + # An NS for sent to a solicited mcast group the router is not a member of + # FAILS + # + if 0: + nsma = in6_getnsma(inet_pton(socket.AF_INET6, "fd::ffff")) + d = inet_ntop(socket.AF_INET6, nsma) + + p = (Ether(dst=in6_getnsmac(nsma)) / + IPv6(dst=d, src=self.pg0.remote_ip6) / + ICMPv6ND_NS(tgt=self.pg0.local_ip6) / + ICMPv6NDOptSrcLLAddr(lladdr=self.pg0.remote_mac)) + pkts = [p] + + self.send_and_assert_no_replies(self.pg0, pkts, + "No response to NS sent to unjoined mcast address") + + # + # An NS whose target address is one the router does not own + # + nsma = in6_getnsma(inet_pton(socket.AF_INET6, self.pg0.local_ip6)) + d = inet_ntop(socket.AF_INET6, nsma) + + p = (Ether(dst=in6_getnsmac(nsma)) / + IPv6(dst=d, src=self.pg0.remote_ip6) / + ICMPv6ND_NS(tgt="fd::ffff") / + ICMPv6NDOptSrcLLAddr(lladdr=self.pg0.remote_mac)) + pkts = [p] + + self.send_and_assert_no_replies(self.pg0, pkts, + "No response to NS for unknown target") + + def send_and_expect_ra(self, intf, pkts, remark, src_ip=None): + if not src_ip: + src_ip = intf.remote_ip6 + intf.add_stream(pkts) + self.pg0.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + rx = intf.get_capture(1) + + self.assertEqual(len(rx), 1) + rx = rx[0] + + # the rx'd RA should be addressed to the sender's source + self.assertTrue(rx.haslayer(ICMPv6ND_RA)) + self.assertEqual(in6_ptop(rx[IPv6].dst), + in6_ptop(src_ip)) + + # and come from the router's link local + self.assertTrue(in6_islladdr(rx[IPv6].src)) + self.assertEqual(in6_ptop(rx[IPv6].src), + in6_ptop(mk_ll_addr(intf.local_mac))) + + def test_rs(self): + """ IPv6 Router Soliciatation Exceptions + + Test sceanrio: + """ + + # + # Before we begin change the IPv6 RA responses to use the unicast address + # that way we will not confuse them with the periodic Ras which go to the Mcast + # address + # + self.pg0.ip6_ra_config(send_unicast=1) + + # + # An RS from a link source address + # - expect an RA in return + # + p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) / + ICMPv6ND_RS()) + pkts = [p] + self.send_and_expect_ra(self.pg0, pkts, "Genuine RS") + + # + # For the next RS sent the RA should be rate limited + # + self.send_and_assert_no_replies(self.pg0, pkts, "RA rate limited") + + # + # When we reconfiure the IPv6 RA config, we reset the RA rate limiting, + # so we need to do this before each test below so as not to drop packets for + # rate limiting reasons. Test this works here. + # + self.pg0.ip6_ra_config(send_unicast=1) + self.send_and_expect_ra(self.pg0, pkts, "Rate limit reset RS") + + # + # An RS sent from a non-link local source + # + self.pg0.ip6_ra_config(send_unicast=1) + p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IPv6(dst=self.pg0.local_ip6, src="2002::ffff") / + ICMPv6ND_RS()) + pkts = [p] + self.send_and_assert_no_replies(self.pg0, pkts, + "RS from non-link source") + + # + # Source an RS from a link local address + # + self.pg0.ip6_ra_config(send_unicast=1) + ll = mk_ll_addr(self.pg0.remote_mac) + p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IPv6(dst=self.pg0.local_ip6, src=ll) / + ICMPv6ND_RS()) + pkts = [p] + self.send_and_expect_ra( + self.pg0, pkts, "RS sourced from link-local", src_ip=ll) + + # + # Source from the unspecified address ::. This happens when the RS is sent before + # the host has a configured address/sub-net, i.e. auto-config. + # Since the sender has no IP address, the reply comes back mcast - so the + # capture needs to not filter this. + # If we happen to pick up the periodic RA at this point then so be it, it's not + # an error. + # + self.pg0.ip6_ra_config(send_unicast=1) + p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IPv6(dst=self.pg0.local_ip6, src="::") / + ICMPv6ND_RS()) + pkts = [p] + + self.pg0.add_stream(pkts) + self.pg0.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg0.get_capture(1, filter_out_fn=None) + found = 0 + for rx in capture: + if (rx.haslayer(ICMPv6ND_RA)): + # and come from the router's link local + self.assertTrue(in6_islladdr(rx[IPv6].src)) + self.assertEqual(in6_ptop(rx[IPv6].src), + in6_ptop(mk_ll_addr(self.pg0.local_mac))) + # sent to the all hosts mcast + self.assertEqual(in6_ptop(rx[IPv6].dst), "ff02::1") + + found = 1 + self.assertTrue(found) + + @unittest.skip("Unsupported") + def test_mrs(self): + """ IPv6 Multicast Router Soliciatation Exceptions + + Test sceanrio: + """ + + # + # An RS from a link source address + # - expect an RA in return + # + nsma = in6_getnsma(inet_pton(socket.AF_INET6, self.pg0.local_ip6)) + d = inet_ntop(socket.AF_INET6, nsma) + + p = (Ether(dst=getmacbyip6("ff02::2")) / + IPv6(dst=d, src=self.pg0.remote_ip6) / + ICMPv6MRD_Solicitation()) + pkts = [p] + + self.pg0.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.pg0.assert_nothing_captured( + remark="No response to NS source by address not on sub-net") + + # + # An RS from a non link source address + # + nsma = in6_getnsma(inet_pton(socket.AF_INET6, self.pg0.local_ip6)) + d = inet_ntop(socket.AF_INET6, nsma) + + p = (Ether(dst=getmacbyip6("ff02::2")) / + IPv6(dst=d, src="2002::2") / + ICMPv6MRD_Solicitation()) + pkts = [p] + + self.send_and_assert_no_replies(self.pg0, pkts, + "RA rate limited") + self.pg0.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.pg0.assert_nothing_captured( + remark="No response to NS source by address not on sub-net") + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_interface.py b/test/vpp_interface.py index 6ccb92bb..856a5cc2 100644 --- a/test/vpp_interface.py +++ b/test/vpp_interface.py @@ -249,6 +249,12 @@ class VppInterface(object): """Configure IPv6 RA suppress on the VPP interface.""" self.test.vapi.sw_interface_ra_suppress(self.sw_if_index) + def ip6_ra_config(self, suppress=0, send_unicast=0): + """Configure IPv6 RA suppress on the VPP interface.""" + self.test.vapi.ip6_sw_interface_ra_config(self.sw_if_index, + suppress, + send_unicast) + def admin_up(self): """Put interface ADMIN-UP.""" self.test.vapi.sw_interface_set_flags(self.sw_if_index, admin_up_down=1) @@ -257,6 +263,14 @@ class VppInterface(object): """Put interface ADMIN-down.""" self.test.vapi.sw_interface_set_flags(self.sw_if_index, admin_up_down=0) + def ip6_enable(self): + """IPv6 Enable interface""" + self.test.vapi.ip6_sw_interface_enable_disable(self.sw_if_index, enable=1) + + def ip6_disable(self): + """Put interface ADMIN-DOWN.""" + self.test.vapi.ip6_sw_interface_enable_disable(self.sw_if_index, enable=0) + def add_sub_if(self, sub_if): """Register a sub-interface with this interface. diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index db530d98..0e4c0cdb 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -219,6 +219,22 @@ class VppPapiProvider(object): return self.api(self.papi.sw_interface_ip6nd_ra_config, {'sw_if_index': sw_if_index}) + def ip6_sw_interface_ra_config(self, sw_if_index, + suppress, + send_unicast,): + return self.api(self.papi.sw_interface_ip6nd_ra_config, + {'sw_if_index': sw_if_index, + 'suppress' : suppress, + 'send_unicast' : send_unicast}) + + def ip6_sw_interface_enable_disable(self, sw_if_index, enable): + """ + Enable/Disable An interface for IPv6 + """ + return self.api(self.papi.sw_interface_ip6_enable_disable, + {'sw_if_index': sw_if_index, + 'enable': enable}) + def vxlan_add_del_tunnel( self, src_addr, diff --git a/test/vpp_pg_interface.py b/test/vpp_pg_interface.py index aef0052e..b5929a4b 100644 --- a/test/vpp_pg_interface.py +++ b/test/vpp_pg_interface.py @@ -10,13 +10,14 @@ from scapy.layers.inet6 import IPv6, ICMPv6ND_NS, ICMPv6ND_NA,\ ICMPv6NDOptSrcLLAddr, ICMPv6NDOptDstLLAddr, ICMPv6ND_RA, RouterAlert, \ IPv6ExtHdrHopByHop from util import ppp, ppc -from scapy.utils6 import in6_getnsma, in6_getnsmac +from scapy.utils6 import in6_getnsma, in6_getnsmac, in6_ismaddr from scapy.utils import inet_pton, inet_ntop def is_ipv6_misc(p): """ Is packet one of uninteresting IPv6 broadcasts? """ if p.haslayer(ICMPv6ND_RA): - return True + if in6_ismaddr(p[IPv6].dst): + return True if p.haslayer(IPv6ExtHdrHopByHop): for o in p[IPv6ExtHdrHopByHop].options: if isinstance(o, RouterAlert): -- cgit 1.2.3-korg From 0f971d8c22adf89d3f8592ac0d207727f2b1a23a Mon Sep 17 00:00:00 2001 From: Pavel Kotucek Date: Tue, 3 Jan 2017 10:48:54 +0100 Subject: API refactoring : l2, mpls, sr Change-Id: Ic5f273dae607a1d3902489e65734c76f027dc30f Signed-off-by: Pavel Kotucek --- src/vnet.am | 15 +- src/vnet/l2/l2.api | 229 +++++++++++ src/vnet/l2/l2_api.c | 373 +++++++++++++++++- src/vnet/mpls/mpls.api | 246 ++++++++++++ src/vnet/mpls/mpls_api.c | 497 ++++++++++++++++++++++++ src/vnet/sr/sr.api | 119 ++++++ src/vnet/sr/sr_api.c | 279 ++++++++++++++ src/vnet/vnet_all_api_h.h | 2 + src/vpp/api/api.c | 947 +--------------------------------------------- src/vpp/api/vpe.api | 574 +--------------------------- 10 files changed, 1770 insertions(+), 1511 deletions(-) create mode 100644 src/vnet/mpls/mpls.api create mode 100644 src/vnet/mpls/mpls_api.c create mode 100644 src/vnet/sr/sr.api create mode 100644 src/vnet/sr/sr_api.c (limited to 'src/vpp/api') diff --git a/src/vnet.am b/src/vnet.am index bc0820a3..bca56227 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -498,15 +498,18 @@ libvnet_la_SOURCES += \ vnet/mpls/node.c \ vnet/mpls/interface.c \ vnet/mpls/mpls_tunnel.c \ - vnet/mpls/pg.c + vnet/mpls/pg.c \ + vnet/mpls/mpls_api.c nobase_include_HEADERS += \ vnet/mpls/mpls.h \ vnet/mpls/mpls_types.h \ vnet/mpls/mpls_tunnel.h \ vnet/mpls/packet.h \ - vnet/mpls/error.def + vnet/mpls/error.def \ + vnet/mpls/mpls.api.h +API_FILES += vnet/mpls/mpls.api ######################################## # Tunnel protocol: vxlan-gpe @@ -666,13 +669,17 @@ nobase_include_HEADERS += \ if WITH_IPV6SR libvnet_la_SOURCES += \ vnet/sr/sr.c \ - vnet/sr/sr_replicate.c + vnet/sr/sr_replicate.c \ + vnet/sr/sr_api.c endif nobase_include_HEADERS += \ vnet/sr/sr_packet.h \ vnet/sr/sr_error.def \ - vnet/sr/sr.h + vnet/sr/sr.h \ + vnet/sr/sr.api.h + +API_FILES += vnet/sr/sr.api ######################################## # DHCPv6 proxy diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index 5fce7944..5b24f259 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -36,3 +36,232 @@ define l2_xconnect_dump u32 context; }; +/** \brief l2 fib table entry structure + @param bd_id - the l2 fib / bridge domain table id + @param mac - the entry's mac address + @param sw_if_index - index of the interface + @param static_mac - the entry is statically configured. + @param filter_mac - the entry is a mac filter entry. + @param bvi_mac - the mac address is a bridge virtual interface +*/ +define l2_fib_table_entry +{ + u32 context; + u32 bd_id; + u64 mac; + u32 sw_if_index; + u8 static_mac; + u8 filter_mac; + u8 bvi_mac; +}; + +/** \brief Dump l2 fib (aka bridge domain) table + @param client_index - opaque cookie to identify the sender + @param bd_id - the l2 fib / bridge domain table identifier +*/ +define l2_fib_table_dump +{ + u32 client_index; + u32 context; + u32 bd_id; +}; + +/** \brief L2 fib clear table request, clear all mac entries in the l2 fib + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define l2_fib_clear_table +{ + u32 client_index; + u32 context; +}; + +/** \brief L2 fib clear table response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define l2_fib_clear_table_reply +{ + u32 context; + i32 retval; +}; + +/** \brief L2 FIB add entry request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mac - the entry's mac address + @param bd_id - the entry's bridge domain id + @param sw_if_index - the interface + @param is_add - If non zero add the entry, else delete it + @param static_mac - + @param filter_mac - +*/ +define l2fib_add_del +{ + u32 client_index; + u32 context; + u64 mac; + u32 bd_id; + u32 sw_if_index; + u8 is_add; + u8 static_mac; + u8 filter_mac; + u8 bvi_mac; +}; + +/** \brief L2 FIB add entry response + @param context - sender context, to match reply w/ request + @param retval - return code for the add l2fib entry request +*/ +define l2fib_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Set L2 flags request !!! TODO - need more info, feature bits in l2_input.h + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface + @param is_set - if non-zero, set the bits, else clear them + @param feature_bitmap - non-zero bits to set or clear +*/ +define l2_flags +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 is_set; + u32 feature_bitmap; +}; + +/** \brief Set L2 bits response + @param context - sender context, to match reply w/ request + @param retval - return code for the set l2 bits request +*/ +define l2_flags_reply +{ + u32 context; + i32 retval; + u32 resulting_feature_bitmap; +}; + +/** \brief L2 bridge domain add or delete request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the bridge domain to create + @param flood - enable/disable bcast/mcast flooding in the bd + @param uu_flood - enable/disable uknown unicast flood in the bd + @param forward - enable/disable forwarding on all interfaces in the bd + @param learn - enable/disable learning on all interfaces in the bd + @param arp_term - enable/disable arp termination in the bd + @param mac_age - mac aging time in min, 0 for disabled + @param is_add - add or delete flag +*/ +define bridge_domain_add_del +{ + u32 client_index; + u32 context; + u32 bd_id; + u8 flood; + u8 uu_flood; + u8 forward; + u8 learn; + u8 arp_term; + u8 mac_age; + u8 is_add; +}; + +/** \brief L2 bridge domain add or delete response + @param context - sender context, to match reply w/ request + @param retval - return code for the set bridge flags request +*/ +define bridge_domain_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief L2 bridge domain request operational state details + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the bridge domain id desired or ~0 to request all bds +*/ +define bridge_domain_dump +{ + u32 client_index; + u32 context; + u32 bd_id; +}; + +/** \brief L2 bridge domain operational state response + @param bd_id - the bridge domain id + @param flood - bcast/mcast flooding state on all interfaces in the bd + @param uu_flood - uknown unicast flooding state on all interfaces in the bd + @param forward - forwarding state on all interfaces in the bd + @param learn - learning state on all interfaces in the bd + @param arp_term - arp termination state on all interfaces in the bd + @param mac_age - mac aging time in min, 0 for disabled + @param n_sw_ifs - number of sw_if_index's in the domain +*/ +define bridge_domain_details +{ + u32 context; + u32 bd_id; + u8 flood; + u8 uu_flood; + u8 forward; + u8 learn; + u8 arp_term; + u8 mac_age; + u32 bvi_sw_if_index; + u32 n_sw_ifs; +}; + +/** \brief L2 bridge domain sw interface operational state response + @param bd_id - the bridge domain id + @param sw_if_index - sw_if_index in the domain + @param shg - split horizon group for the interface +*/ +define bridge_domain_sw_if_details +{ + u32 context; + u32 bd_id; + u32 sw_if_index; + u8 shg; +}; + +/** \brief Set bridge flags (such as L2_LEARN, L2_FWD, L2_FLOOD, + L2_UU_FLOOD, or L2_ARP_TERM) request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the bridge domain to set the flags for + @param is_set - if non-zero, set the flags, else clear them + @param feature_bitmap - bits that are non-zero to set or clear +*/ +define bridge_flags +{ + u32 client_index; + u32 context; + u32 bd_id; + u8 is_set; + u32 feature_bitmap; +}; + +/** \brief Set bridge flags response + @param context - sender context, to match reply w/ request + @param retval - return code for the set bridge flags request + @param resulting_feature_bitmap - the feature bitmap value after the request is implemented +*/ +define bridge_flags_reply +{ + u32 context; + i32 retval; + u32 resulting_feature_bitmap; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index ca4f593f..ef33509c 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -42,8 +43,18 @@ #include -#define foreach_vpe_api_msg \ -_(L2_XCONNECT_DUMP, l2_xconnect_dump) +#define foreach_vpe_api_msg \ +_(L2_XCONNECT_DUMP, l2_xconnect_dump) \ +_(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \ +_(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \ +_(L2_FIB_TABLE_ENTRY, l2_fib_table_entry) \ +_(L2FIB_ADD_DEL, l2fib_add_del) \ +_(L2_FLAGS, l2_flags) \ +_(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ +_(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ +_(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ +_(BRIDGE_DOMAIN_SW_IF_DETAILS, bridge_domain_sw_if_details) \ +_(BRIDGE_FLAGS, bridge_flags) static void send_l2_xconnect_details (unix_shared_memory_queue_t * q, u32 context, @@ -86,9 +97,365 @@ vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp) /* *INDENT-ON* */ } +static void +vl_api_l2_fib_clear_table_t_handler (vl_api_l2_fib_clear_table_t * mp) +{ + int rv = 0; + vl_api_l2_fib_clear_table_reply_t *rmp; + + /* DAW-FIXME: This API should only clear non-static l2fib entries, but + * that is not currently implemented. When that TODO is fixed + * this call should be changed to pass 1 instead of 0. + */ + l2fib_clear_table (0); + + REPLY_MACRO (VL_API_L2_FIB_CLEAR_TABLE_REPLY); +} + +static void +send_l2fib_table_entry (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + l2fib_entry_key_t * l2fe_key, + l2fib_entry_result_t * l2fe_res, u32 context) +{ + vl_api_l2_fib_table_entry_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_L2_FIB_TABLE_ENTRY); + + mp->bd_id = + ntohl (l2input_main.bd_configs[l2fe_key->fields.bd_index].bd_id); + + mp->mac = l2fib_make_key (l2fe_key->fields.mac, 0); + mp->sw_if_index = ntohl (l2fe_res->fields.sw_if_index); + mp->static_mac = l2fe_res->fields.static_mac; + mp->filter_mac = l2fe_res->fields.filter; + mp->bvi_mac = l2fe_res->fields.bvi; + mp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_l2_fib_table_entry_t_handler (vl_api_l2_fib_table_entry_t * mp) +{ + clib_warning ("BUG"); +} + +static void +vl_api_l2_fib_table_dump_t_handler (vl_api_l2_fib_table_dump_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + bd_main_t *bdm = &bd_main; + l2fib_entry_key_t *l2fe_key = NULL; + l2fib_entry_result_t *l2fe_res = NULL; + u32 ni, bd_id = ntohl (mp->bd_id); + u32 bd_index; + unix_shared_memory_queue_t *q; + uword *p; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* see l2fib_table_dump: ~0 means "any" */ + if (bd_id == ~0) + bd_index = ~0; + else + { + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p == 0) + return; + + bd_index = p[0]; + } + + l2fib_table_dump (bd_index, &l2fe_key, &l2fe_res); + + vec_foreach_index (ni, l2fe_key) + { + send_l2fib_table_entry (am, q, vec_elt_at_index (l2fe_key, ni), + vec_elt_at_index (l2fe_res, ni), mp->context); + } + vec_free (l2fe_key); + vec_free (l2fe_res); +} + +static void +vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp) +{ + bd_main_t *bdm = &bd_main; + l2input_main_t *l2im = &l2input_main; + vl_api_l2fib_add_del_reply_t *rmp; + int rv = 0; + u64 mac = 0; + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 bd_id = ntohl (mp->bd_id); + u32 bd_index; + u32 static_mac; + u32 filter_mac; + u32 bvi_mac; + uword *p; + + mac = mp->mac; + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (!p) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto bad_sw_if_index; + } + bd_index = p[0]; + + if (mp->is_add) + { + filter_mac = mp->filter_mac ? 1 : 0; + if (filter_mac == 0) + { + VALIDATE_SW_IF_INDEX (mp); + if (vec_len (l2im->configs) <= sw_if_index) + { + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + goto bad_sw_if_index; + } + else + { + l2_input_config_t *config; + config = vec_elt_at_index (l2im->configs, sw_if_index); + if (config->bridge == 0) + { + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + goto bad_sw_if_index; + } + } + } + static_mac = mp->static_mac ? 1 : 0; + bvi_mac = mp->bvi_mac ? 1 : 0; + l2fib_add_entry (mac, bd_index, sw_if_index, static_mac, filter_mac, + bvi_mac); + } + else + { + l2fib_del_entry (mac, bd_index); + } + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_L2FIB_ADD_DEL_REPLY); +} + +static void +vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp) +{ + vl_api_l2_flags_reply_t *rmp; + int rv = 0; + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 flags = ntohl (mp->feature_bitmap); + u32 rbm = 0; + + VALIDATE_SW_IF_INDEX (mp); + +#define _(a,b) \ + if (flags & L2INPUT_FEAT_ ## a) \ + rbm = l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_ ## a, mp->is_set); + foreach_l2input_feat; +#undef _ + + BAD_SW_IF_INDEX_LABEL; + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_L2_FLAGS_REPLY, + ({ + rmp->resulting_feature_bitmap = ntohl(rbm); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + bd_main_t *bdm = &bd_main; + vl_api_bridge_domain_add_del_reply_t *rmp; + int rv = 0; + u32 enable_flags = 0, disable_flags = 0; + u32 bd_id = ntohl (mp->bd_id); + u32 bd_index; + + if (mp->is_add) + { + bd_index = bd_find_or_add_bd_index (bdm, bd_id); + + if (mp->flood) + enable_flags |= L2_FLOOD; + else + disable_flags |= L2_FLOOD; + + if (mp->uu_flood) + enable_flags |= L2_UU_FLOOD; + else + disable_flags |= L2_UU_FLOOD; + + if (mp->forward) + enable_flags |= L2_FWD; + else + disable_flags |= L2_FWD; + + if (mp->arp_term) + enable_flags |= L2_ARP_TERM; + else + disable_flags |= L2_ARP_TERM; + + if (mp->learn) + enable_flags |= L2_LEARN; + else + disable_flags |= L2_LEARN; + + if (enable_flags) + bd_set_flags (vm, bd_index, enable_flags, 1 /* enable */ ); + + if (disable_flags) + bd_set_flags (vm, bd_index, disable_flags, 0 /* disable */ ); + + bd_set_mac_age (vm, bd_index, mp->mac_age); + } + else + rv = bd_delete_bd_index (bdm, bd_id); + + REPLY_MACRO (VL_API_BRIDGE_DOMAIN_ADD_DEL_REPLY); +} + +static void +vl_api_bridge_domain_details_t_handler (vl_api_bridge_domain_details_t * mp) +{ + clib_warning ("BUG"); +} + +static void + vl_api_bridge_domain_sw_if_details_t_handler + (vl_api_bridge_domain_sw_if_details_t * mp) +{ + clib_warning ("BUG"); +} + +static void +send_bridge_domain_details (unix_shared_memory_queue_t * q, + l2_bridge_domain_t * bd_config, + u32 n_sw_ifs, u32 context) +{ + vl_api_bridge_domain_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_BRIDGE_DOMAIN_DETAILS); + mp->bd_id = ntohl (bd_config->bd_id); + mp->flood = bd_feature_flood (bd_config); + mp->uu_flood = bd_feature_uu_flood (bd_config); + mp->forward = bd_feature_forward (bd_config); + mp->learn = bd_feature_learn (bd_config); + mp->arp_term = bd_feature_arp_term (bd_config); + mp->bvi_sw_if_index = ntohl (bd_config->bvi_sw_if_index); + mp->mac_age = bd_config->mac_age; + mp->n_sw_ifs = ntohl (n_sw_ifs); + mp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +send_bd_sw_if_details (l2input_main_t * l2im, + unix_shared_memory_queue_t * q, + l2_flood_member_t * member, u32 bd_id, u32 context) +{ + vl_api_bridge_domain_sw_if_details_t *mp; + l2_input_config_t *input_cfg; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_BRIDGE_DOMAIN_SW_IF_DETAILS); + mp->bd_id = ntohl (bd_id); + mp->sw_if_index = ntohl (member->sw_if_index); + input_cfg = vec_elt_at_index (l2im->configs, member->sw_if_index); + mp->shg = input_cfg->shg; + mp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_bridge_domain_dump_t_handler (vl_api_bridge_domain_dump_t * mp) +{ + bd_main_t *bdm = &bd_main; + l2input_main_t *l2im = &l2input_main; + unix_shared_memory_queue_t *q; + l2_bridge_domain_t *bd_config; + u32 bd_id, bd_index; + u32 end; + + q = vl_api_client_index_to_input_queue (mp->client_index); + + if (q == 0) + return; + + bd_id = ntohl (mp->bd_id); + + bd_index = (bd_id == ~0) ? 0 : bd_find_or_add_bd_index (bdm, bd_id); + end = (bd_id == ~0) ? vec_len (l2im->bd_configs) : bd_index + 1; + for (; bd_index < end; bd_index++) + { + bd_config = l2input_bd_config_from_index (l2im, bd_index); + /* skip dummy bd_id 0 */ + if (bd_config && (bd_config->bd_id > 0)) + { + u32 n_sw_ifs; + l2_flood_member_t *m; + + n_sw_ifs = vec_len (bd_config->members); + send_bridge_domain_details (q, bd_config, n_sw_ifs, mp->context); + + vec_foreach (m, bd_config->members) + { + send_bd_sw_if_details (l2im, q, m, bd_config->bd_id, mp->context); + } + } + } +} + +static void +vl_api_bridge_flags_t_handler (vl_api_bridge_flags_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + bd_main_t *bdm = &bd_main; + vl_api_bridge_flags_reply_t *rmp; + int rv = 0; + u32 bd_id = ntohl (mp->bd_id); + u32 bd_index; + u32 flags = ntohl (mp->feature_bitmap); + uword *p; + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p == 0) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto out; + } + + bd_index = p[0]; + + bd_set_flags (vm, bd_index, flags, mp->is_set); + +out: + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_BRIDGE_FLAGS_REPLY, + ({ + rmp->resulting_feature_bitmap = ntohl(flags); + })); + /* *INDENT-ON* */ +} /* - * vpe_api_hookup + * l2_api_hookup * Add vpe's API message handlers to the table. * vlib has alread mapped shared memory and * added the client registration handlers. diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api new file mode 100644 index 00000000..2e3bfaf5 --- /dev/null +++ b/src/vnet/mpls/mpls.api @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief Bind/Unbind an MPLS local label to an IP prefix. i.e. create + a per-prefix label entry. + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mb_mpls_table_id - The MPLS table-id the MPLS entry will be added in + @param mb_label - The MPLS label value to bind + @param mb_ip_table_id - The IP table-id of the IP prefix to bind to. + @param mb_create_table_if_needed - Create either/both tables if required. + @param mb_is_bind - Bind or unbind + @param mb_is_ip4 - The prefix to bind to is IPv4 + @param mb_address_length - Length of IP prefix + @param mb_address[16] - IP prefix/ +*/ +define mpls_ip_bind_unbind +{ + u32 client_index; + u32 context; + u32 mb_mpls_table_id; + u32 mb_label; + u32 mb_ip_table_id; + u8 mb_create_table_if_needed; + u8 mb_is_bind; + u8 mb_is_ip4; + u8 mb_address_length; + u8 mb_address[16]; +}; + +/** \brief Reply for MPLS IP bind/unbind request + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define mpls_ip_bind_unbind_reply +{ + u32 context; + i32 retval; +}; + +/** \brief MPLS tunnel Add / del route + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mt_is_add - Is this a route add or delete + @param mt_sw_if_index - The SW interface index of the tunnel to delete + @param mt_next_hop_proto_is_ip4 - The next-hop is IPV4 + @param mt_next_hop_weight - The weight, for UCMP + @param mt_next_hop[16] - the nextop address + @param mt_next_hop_sw_if_index - the next-hop SW interface + @param mt_next_hop_table_id - the next-hop table-id (if appropriate) + @param mt_next_hop_n_out_labels - the number of next-hop output labels + @param mt_next_hop_out_label_stack - the next-hop output label stack, outer most first +*/ +define mpls_tunnel_add_del +{ + u32 client_index; + u32 context; + u32 mt_sw_if_index; + u8 mt_is_add; + u8 mt_l2_only; + u8 mt_next_hop_proto_is_ip4; + u8 mt_next_hop_weight; + u8 mt_next_hop[16]; + u8 mt_next_hop_n_out_labels; + u32 mt_next_hop_sw_if_index; + u32 mt_next_hop_table_id; + u32 mt_next_hop_out_label_stack[mt_next_hop_n_out_labels]; +}; + +/** \brief Reply for MPLS tunnel add / del request + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param sw_if_index - SW interface index of the tunnel created +*/ +define mpls_tunnel_add_del_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Dump mpls eth tunnel table + @param client_index - opaque cookie to identify the sender + @param tunnel_index - eth tunnel identifier or -1 in case of all tunnels +*/ +define mpls_tunnel_dump +{ + u32 client_index; + u32 context; + i32 tunnel_index; +}; + +/** \brief mpls eth tunnel operational state response + @param tunnel_index - eth tunnel identifier + @param intfc_address - interface ipv4 addr + @param mask_width - interface ipv4 addr mask + @param hw_if_index - interface id + @param l2_only - + @param tunnel_dst_mac - + @param tx_sw_if_index - + @param encap_index - reference to mpls label table + @param nlabels - number of resolved labels + @param labels - resolved labels +*/ +define mpls_tunnel_details +{ + u32 context; + u32 tunnel_index; + u8 mt_l2_only; + u8 mt_sw_if_index; + u8 mt_next_hop_proto_is_ip4; + u8 mt_next_hop[16]; + u32 mt_next_hop_sw_if_index; + u32 mt_next_hop_table_id; + u32 mt_next_hop_n_labels; + u32 mt_next_hop_out_labels[mt_next_hop_n_labels]; +}; + +/** \brief MPLS Route Add / del route + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mr_label - The MPLS label value + @param mr_eos - The End of stack bit + @param mr_table_id - The MPLS table-id the route is added in + @param mr_classify_table_index - If this is a classify route, + this is the classify table index + @param mr_create_table_if_needed - If the MPLS or IP tables do not exist, + create them + @param mr_is_add - Is this a route add or delete + @param mr_is_classify - Is this route result a classify + @param mr_is_multipath - Is this route update a multipath - i.e. is this + a path addition to an existing route + @param mr_is_resolve_host - Recurse resolution constraint via a host prefix + @param mr_is_resolve_attached - Recurse resolution constraint via attached prefix + @param mr_next_hop_proto_is_ip4 - The next-hop is IPV4 + @param mr_next_hop_weight - The weight, for UCMP + @param mr_next_hop[16] - the nextop address + @param mr_next_hop_sw_if_index - the next-hop SW interface + @param mr_next_hop_table_id - the next-hop table-id (if appropriate) + @param mr_next_hop_n_out_labels - the number of labels in the label stack + @param mr_next_hop_out_label_stack - the next-hop output label stack, outer most first + @param next_hop_via_label - The next-hop is a resolved via a local label +*/ +define mpls_route_add_del +{ + u32 client_index; + u32 context; + u32 mr_label; + u8 mr_eos; + u32 mr_table_id; + u32 mr_classify_table_index; + u8 mr_create_table_if_needed; + u8 mr_is_add; + u8 mr_is_classify; + u8 mr_is_multipath; + u8 mr_is_resolve_host; + u8 mr_is_resolve_attached; + u8 mr_next_hop_proto_is_ip4; + u8 mr_next_hop_weight; + u8 mr_next_hop[16]; + u8 mr_next_hop_n_out_labels; + u32 mr_next_hop_sw_if_index; + u32 mr_next_hop_table_id; + u32 mr_next_hop_via_label; + u32 mr_next_hop_out_label_stack[mr_next_hop_n_out_labels]; +}; + +/** \brief Reply for MPLS route add / del request + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define mpls_route_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief FIB path + @param sw_if_index - index of the interface + @param weight - The weight, for UCMP + @param is_local - local if non-zero, else remote + @param is_drop - Drop the packet + @param is_unreach - Drop the packet and rate limit send ICMP unreachable + @param is_prohibit - Drop the packet and rate limit send ICMP prohibited + @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2 + @param next_hop[16] - the next hop address + + WARNING: this type is replicated, pending cleanup completion + +*/ +typeonly manual_print manual_endian define fib_path2 +{ + u32 sw_if_index; + u32 weight; + u8 is_local; + u8 is_drop; + u8 is_unreach; + u8 is_prohibit; + u8 afi; + u8 next_hop[16]; +}; + +/** \brief Dump MPLS fib table + @param client_index - opaque cookie to identify the sender +*/ +define mpls_fib_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief mpls FIB table response + @param table_id - MPLS fib table id + @param s_bit - End-of-stack bit + @param label - MPLS label value + @param count - the number of fib_path in path + @param path - array of of fib_path structures +*/ +manual_endian manual_print define mpls_fib_details +{ + u32 context; + u32 table_id; + u8 eos_bit; + u32 label; + u32 count; + vl_api_fib_path2_t path[count]; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + \ No newline at end of file diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c new file mode 100644 index 00000000..ebbeba69 --- /dev/null +++ b/src/vnet/mpls/mpls_api.c @@ -0,0 +1,497 @@ +/* + *------------------------------------------------------------------ + * mpls_api.c - mpls api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(MPLS_IP_BIND_UNBIND, mpls_ip_bind_unbind) \ +_(MPLS_ROUTE_ADD_DEL, mpls_route_add_del) \ +_(MPLS_TUNNEL_ADD_DEL, mpls_tunnel_add_del) \ +_(MPLS_TUNNEL_DUMP, mpls_tunnel_dump) \ +_(MPLS_TUNNEL_DETAILS, mpls_tunnel_details) \ +_(MPLS_FIB_DUMP, mpls_fib_dump) \ +_(MPLS_FIB_DETAILS, mpls_fib_details) + +extern void stats_dslock_with_hint (int hint, int tag); +extern void stats_dsunlock (void); + +static int +mpls_ip_bind_unbind_handler (vnet_main_t * vnm, + vl_api_mpls_ip_bind_unbind_t * mp) +{ + u32 mpls_fib_index, ip_fib_index; + + mpls_fib_index = + fib_table_find (FIB_PROTOCOL_MPLS, ntohl (mp->mb_mpls_table_id)); + + if (~0 == mpls_fib_index) + { + if (mp->mb_create_table_if_needed) + { + mpls_fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS, + ntohl (mp->mb_mpls_table_id)); + } + else + return VNET_API_ERROR_NO_SUCH_FIB; + } + + ip_fib_index = fib_table_find ((mp->mb_is_ip4 ? + FIB_PROTOCOL_IP4 : + FIB_PROTOCOL_IP6), + ntohl (mp->mb_ip_table_id)); + if (~0 == ip_fib_index) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib_prefix_t pfx = { + .fp_len = mp->mb_address_length, + }; + + if (mp->mb_is_ip4) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&pfx.fp_addr.ip4, mp->mb_address, + sizeof (pfx.fp_addr.ip4)); + } + else + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&pfx.fp_addr.ip6, mp->mb_address, + sizeof (pfx.fp_addr.ip6)); + } + + if (mp->mb_is_bind) + fib_table_entry_local_label_add (ip_fib_index, &pfx, + ntohl (mp->mb_label)); + else + fib_table_entry_local_label_remove (ip_fib_index, &pfx, + ntohl (mp->mb_label)); + + return (0); +} + +void +vl_api_mpls_ip_bind_unbind_t_handler (vl_api_mpls_ip_bind_unbind_t * mp) +{ + vl_api_mpls_ip_bind_unbind_reply_t *rmp; + vnet_main_t *vnm; + int rv; + + vnm = vnet_get_main (); + vnm->api_errno = 0; + + rv = mpls_ip_bind_unbind_handler (vnm, mp); + rv = (rv == 0) ? vnm->api_errno : rv; + + REPLY_MACRO (VL_API_MPLS_IP_BIND_UNBIND_REPLY); +} + +static int +mpls_route_add_del_t_handler (vnet_main_t * vnm, + vl_api_mpls_route_add_del_t * mp) +{ + u32 fib_index, next_hop_fib_index; + mpls_label_t *label_stack = NULL; + int rv, ii, n_labels;; + + fib_prefix_t pfx = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_eos = mp->mr_eos, + .fp_label = ntohl (mp->mr_label), + }; + if (pfx.fp_eos) + { + if (mp->mr_next_hop_proto_is_ip4) + { + pfx.fp_payload_proto = DPO_PROTO_IP4; + } + else + { + pfx.fp_payload_proto = DPO_PROTO_IP6; + } + } + else + { + pfx.fp_payload_proto = DPO_PROTO_MPLS; + } + + rv = add_del_route_check (FIB_PROTOCOL_MPLS, + mp->mr_table_id, + mp->mr_next_hop_sw_if_index, + dpo_proto_to_fib (pfx.fp_payload_proto), + mp->mr_next_hop_table_id, + mp->mr_create_table_if_needed, + &fib_index, &next_hop_fib_index); + + if (0 != rv) + return (rv); + + ip46_address_t nh; + memset (&nh, 0, sizeof (nh)); + + if (mp->mr_next_hop_proto_is_ip4) + memcpy (&nh.ip4, mp->mr_next_hop, sizeof (nh.ip4)); + else + memcpy (&nh.ip6, mp->mr_next_hop, sizeof (nh.ip6)); + + n_labels = mp->mr_next_hop_n_out_labels; + if (n_labels == 0) + ; + else if (1 == n_labels) + vec_add1 (label_stack, ntohl (mp->mr_next_hop_out_label_stack[0])); + else + { + vec_validate (label_stack, n_labels - 1); + for (ii = 0; ii < n_labels; ii++) + label_stack[ii] = ntohl (mp->mr_next_hop_out_label_stack[ii]); + } + + return (add_del_route_t_handler (mp->mr_is_multipath, mp->mr_is_add, 0, // mp->is_drop, + 0, // mp->is_unreach, + 0, // mp->is_prohibit, + 0, // mp->is_local, + mp->mr_is_classify, + mp->mr_classify_table_index, + mp->mr_is_resolve_host, + mp->mr_is_resolve_attached, + fib_index, &pfx, + mp->mr_next_hop_proto_is_ip4, + &nh, ntohl (mp->mr_next_hop_sw_if_index), + next_hop_fib_index, + mp->mr_next_hop_weight, + ntohl (mp->mr_next_hop_via_label), + label_stack)); +} + +void +vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp) +{ + vl_api_mpls_route_add_del_reply_t *rmp; + vnet_main_t *vnm; + int rv; + + vnm = vnet_get_main (); + vnm->api_errno = 0; + + rv = mpls_route_add_del_t_handler (vnm, mp); + + rv = (rv == 0) ? vnm->api_errno : rv; + + REPLY_MACRO (VL_API_MPLS_ROUTE_ADD_DEL_REPLY); +} + +static void +vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) +{ + vl_api_mpls_tunnel_add_del_reply_t *rmp; + int rv = 0; + u32 tunnel_sw_if_index; + int ii; + + stats_dslock_with_hint (1 /* release hint */ , 5 /* tag */ ); + + if (mp->mt_is_add) + { + fib_route_path_t rpath, *rpaths = NULL; + mpls_label_t *label_stack = NULL; + + memset (&rpath, 0, sizeof (rpath)); + + if (mp->mt_next_hop_proto_is_ip4) + { + rpath.frp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&rpath.frp_addr.ip4, + mp->mt_next_hop, sizeof (rpath.frp_addr.ip4)); + } + else + { + rpath.frp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&rpath.frp_addr.ip6, + mp->mt_next_hop, sizeof (rpath.frp_addr.ip6)); + } + rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index); + + for (ii = 0; ii < mp->mt_next_hop_n_out_labels; ii++) + vec_add1 (label_stack, ntohl (mp->mt_next_hop_out_label_stack[ii])); + + vec_add1 (rpaths, rpath); + + vnet_mpls_tunnel_add (rpaths, label_stack, + mp->mt_l2_only, &tunnel_sw_if_index); + vec_free (rpaths); + vec_free (label_stack); + } + else + { + tunnel_sw_if_index = ntohl (mp->mt_sw_if_index); + vnet_mpls_tunnel_del (tunnel_sw_if_index); + } + + stats_dsunlock (); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_MPLS_TUNNEL_ADD_DEL_REPLY, + ({ + rmp->sw_if_index = ntohl(tunnel_sw_if_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_mpls_tunnel_details_t_handler (vl_api_mpls_tunnel_details_t * mp) +{ + clib_warning ("BUG"); +} + +typedef struct mpls_tunnel_send_walk_ctx_t_ +{ + unix_shared_memory_queue_t *q; + u32 index; + u32 context; +} mpls_tunnel_send_walk_ctx_t; + +static void +send_mpls_tunnel_entry (u32 mti, void *arg) +{ + mpls_tunnel_send_walk_ctx_t *ctx; + vl_api_mpls_tunnel_details_t *mp; + const mpls_tunnel_t *mt; + u32 nlabels; + + ctx = arg; + + if (~0 != ctx->index && mti != ctx->index) + return; + + mt = mpls_tunnel_get (mti); + nlabels = vec_len (mt->mt_label_stack); + + mp = vl_msg_api_alloc (sizeof (*mp) + nlabels * sizeof (u32)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_MPLS_TUNNEL_DETAILS); + mp->context = ctx->context; + + mp->tunnel_index = ntohl (mti); + memcpy (mp->mt_next_hop_out_labels, + mt->mt_label_stack, nlabels * sizeof (u32)); + + // FIXME + + vl_msg_api_send_shmem (ctx->q, (u8 *) & mp); +} + +static void +vl_api_mpls_tunnel_dump_t_handler (vl_api_mpls_tunnel_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + mpls_tunnel_send_walk_ctx_t ctx = { + .q = q, + .index = ntohl (mp->tunnel_index), + .context = mp->context, + }; + mpls_tunnel_walk (send_mpls_tunnel_entry, &ctx); +} + +static void +vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp) +{ + clib_warning ("BUG"); +} + +static void +vl_api_mpls_fib_details_t_endian (vl_api_mpls_fib_details_t * mp) +{ + clib_warning ("BUG"); +} + +static void +vl_api_mpls_fib_details_t_print (vl_api_mpls_fib_details_t * mp) +{ + clib_warning ("BUG"); +} + +static void +send_mpls_fib_details (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + u32 table_id, u32 label, u32 eos, + fib_route_path_encode_t * api_rpaths, u32 context) +{ + vl_api_mpls_fib_details_t *mp; + fib_route_path_encode_t *api_rpath; + vl_api_fib_path2_t *fp; + int path_count; + + path_count = vec_len (api_rpaths); + mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp)); + if (!mp) + return; + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_MPLS_FIB_DETAILS); + mp->context = context; + + mp->table_id = htonl (table_id); + mp->eos_bit = eos; + mp->label = htonl (label); + + mp->count = htonl (path_count); + fp = mp->path; + vec_foreach (api_rpath, api_rpaths) + { + memset (fp, 0, sizeof (*fp)); + fp->weight = htonl (api_rpath->rpath.frp_weight); + fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index); + copy_fib_next_hop (api_rpath, fp); + fp++; + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + unix_shared_memory_queue_t *q; + mpls_main_t *mm = &mpls_main; + fib_table_t *fib_table; + fib_node_index_t lfei, *lfeip, *lfeis = NULL; + mpls_label_t key; + fib_prefix_t pfx; + u32 fib_index; + fib_route_path_encode_t *api_rpaths; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* *INDENT-OFF* */ + pool_foreach (fib_table, mm->fibs, + ({ + hash_foreach(key, lfei, fib_table->mpls.mf_entries, + ({ + vec_add1(lfeis, lfei); + })); + })); + /* *INDENT-ON* */ + vec_sort_with_function (lfeis, fib_entry_cmp_for_sort); + + vec_foreach (lfeip, lfeis) + { + fib_entry_get_prefix (*lfeip, &pfx); + fib_index = fib_entry_get_fib_index (*lfeip); + fib_table = fib_table_get (fib_index, pfx.fp_proto); + api_rpaths = NULL; + fib_entry_encode (*lfeip, &api_rpaths); + send_mpls_fib_details (am, q, + fib_table->ft_table_id, + pfx.fp_label, pfx.fp_eos, api_rpaths, mp->context); + vec_free (api_rpaths); + } + + vec_free (lfeis); +} + +/* + * mpls_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_mpls; +#undef _ +} + +static clib_error_t * +mpls_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Trace space for 8 MPLS encap labels + */ + am->api_trace_cfg[VL_API_MPLS_TUNNEL_ADD_DEL].size += 8 * sizeof (u32); + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (mpls_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sr/sr.api b/src/vnet/sr/sr.api new file mode 100644 index 00000000..3d017ce5 --- /dev/null +++ b/src/vnet/sr/sr.api @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief IPv6 segment routing tunnel add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add the tunnel if non-zero, else delete it + @param name[] - tunnel name (len. 64) + @param src_address[] - + @param dst_address[] - + @param dst_mask_width - + @param inner_vrf_id - + @param outer_vrf_id - + @param flags_net_byte_order - + @param n_segments - + @param n_tags - + @param segs_and_tags[] - + @param policy_name[] - name of policy to associate this tunnel to (len. 64) +*/ +define sr_tunnel_add_del +{ + u32 client_index; + u32 context; + u8 is_add; + u8 name[64]; + u8 src_address[16]; + u8 dst_address[16]; + u8 dst_mask_width; + u32 inner_vrf_id; + u32 outer_vrf_id; + u16 flags_net_byte_order; + u8 n_segments; + u8 n_tags; + u8 policy_name[64]; + u8 segs_and_tags[0]; +}; + +/** \brief IPv6 segment routing tunnel add / del response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define sr_tunnel_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IPv6 segment routing policy add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add the tunnel if non-zero, else delete it + @param name[] - policy name (len. 64) + @param tunnel_names[] - +*/ +define sr_policy_add_del +{ + u32 client_index; + u32 context; + u8 is_add; + u8 name[64]; + u8 tunnel_names[0]; +}; + +/** \brief IPv6 segment routing policy add / del response + @param context - sender context, to match reply w/ request + @param retval - return value for request + + +*/ +define sr_policy_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IPv6 segment routing multicast map to policy add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add the tunnel if non-zero, else delete it + @param multicast_address[] - IP6 multicast address + @param policy_name[] = policy name (len.64) +*/ +define sr_multicast_map_add_del +{ + u32 client_index; + u32 context; + u8 is_add; + u8 multicast_address[16]; + u8 policy_name[64]; +}; + +/** \brief IPv6 segment routing multicast map to policy add / del response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define sr_multicast_map_add_del_reply +{ + u32 context; + i32 retval; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + \ No newline at end of file diff --git a/src/vnet/sr/sr_api.c b/src/vnet/sr/sr_api.c new file mode 100644 index 00000000..6c6eb9b6 --- /dev/null +++ b/src/vnet/sr/sr_api.c @@ -0,0 +1,279 @@ +/* + *------------------------------------------------------------------ + * sr_api.c - ipv6 segment routing api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(SR_MULTICAST_MAP_ADD_DEL, sr_multicast_map_add_del) + +static void vl_api_sr_tunnel_add_del_t_handler + (vl_api_sr_tunnel_add_del_t * mp) +{ +#if IP6SR == 0 + clib_warning ("unimplemented"); +#else + ip6_sr_add_del_tunnel_args_t _a, *a = &_a; + int rv = 0; + vl_api_sr_tunnel_add_del_reply_t *rmp; + ip6_address_t *segments = 0, *seg; + ip6_address_t *tags = 0, *tag; + ip6_address_t *this_address; + int i; + + if (mp->n_segments == 0) + { + rv = -11; + goto out; + } + + memset (a, 0, sizeof (*a)); + a->src_address = (ip6_address_t *) & mp->src_address; + a->dst_address = (ip6_address_t *) & mp->dst_address; + a->dst_mask_width = mp->dst_mask_width; + a->flags_net_byte_order = mp->flags_net_byte_order; + a->is_del = (mp->is_add == 0); + a->rx_table_id = ntohl (mp->outer_vrf_id); + a->tx_table_id = ntohl (mp->inner_vrf_id); + + a->name = format (0, "%s", mp->name); + if (!(vec_len (a->name))) + a->name = 0; + + a->policy_name = format (0, "%s", mp->policy_name); + if (!(vec_len (a->policy_name))) + a->policy_name = 0; + + /* Yank segments and tags out of the API message */ + this_address = (ip6_address_t *) mp->segs_and_tags; + for (i = 0; i < mp->n_segments; i++) + { + vec_add2 (segments, seg, 1); + clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); + this_address++; + } + for (i = 0; i < mp->n_tags; i++) + { + vec_add2 (tags, tag, 1); + clib_memcpy (tag->as_u8, this_address->as_u8, sizeof (*this_address)); + this_address++; + } + + a->segments = segments; + a->tags = tags; + + rv = ip6_sr_add_del_tunnel (a); + +out: + + REPLY_MACRO (VL_API_SR_TUNNEL_ADD_DEL_REPLY); +#endif +} + +static void vl_api_sr_policy_add_del_t_handler + (vl_api_sr_policy_add_del_t * mp) +{ +#if IP6SR == 0 + clib_warning ("unimplemented"); +#else + ip6_sr_add_del_policy_args_t _a, *a = &_a; + int rv = 0; + vl_api_sr_policy_add_del_reply_t *rmp; + int i; + + memset (a, 0, sizeof (*a)); + a->is_del = (mp->is_add == 0); + + a->name = format (0, "%s", mp->name); + if (!(vec_len (a->name))) + { + rv = VNET_API_ERROR_NO_SUCH_NODE2; + goto out; + } + + if (!(mp->tunnel_names[0])) + { + rv = VNET_API_ERROR_NO_SUCH_NODE2; + goto out; + } + + // start deserializing tunnel_names + int num_tunnels = mp->tunnel_names[0]; //number of tunnels + u8 *deser_tun_names = mp->tunnel_names; + deser_tun_names += 1; //moving along + + u8 *tun_name = 0; + int tun_name_len = 0; + + for (i = 0; i < num_tunnels; i++) + { + tun_name_len = *deser_tun_names; + deser_tun_names += 1; + vec_resize (tun_name, tun_name_len); + memcpy (tun_name, deser_tun_names, tun_name_len); + vec_add1 (a->tunnel_names, tun_name); + deser_tun_names += tun_name_len; + tun_name = 0; + } + + rv = ip6_sr_add_del_policy (a); + +out: + + REPLY_MACRO (VL_API_SR_POLICY_ADD_DEL_REPLY); +#endif +} + +static void vl_api_sr_multicast_map_add_del_t_handler + (vl_api_sr_multicast_map_add_del_t * mp) +{ +#if IP6SR == 0 + clib_warning ("unimplemented"); +#else + ip6_sr_add_del_multicastmap_args_t _a, *a = &_a; + int rv = 0; + vl_api_sr_multicast_map_add_del_reply_t *rmp; + + memset (a, 0, sizeof (*a)); + a->is_del = (mp->is_add == 0); + + a->multicast_address = (ip6_address_t *) & mp->multicast_address; + a->policy_name = format (0, "%s", mp->policy_name); + + if (a->multicast_address == 0) + { + rv = -1; + goto out; + } + + if (!(a->policy_name)) + { + rv = -2; + goto out; + } + +#if DPDK > 0 /* Cannot call replicate without DPDK */ + rv = ip6_sr_add_del_multicastmap (a); +#else + clib_warning ("multicast replication without DPDK not implemented"); + rv = VNET_API_ERROR_UNIMPLEMENTED; +#endif /* DPDK */ + +out: + + REPLY_MACRO (VL_API_SR_MULTICAST_MAP_ADD_DEL_REPLY); +#endif +} + +/* + * sr_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_sr; +#undef _ +} + +static clib_error_t * +sr_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Manually register the sr tunnel add del msg, so we trace + * enough bytes to capture a typical segment list + */ + vl_msg_api_set_handlers (VL_API_SR_TUNNEL_ADD_DEL, + "sr_tunnel_add_del", + vl_api_sr_tunnel_add_del_t_handler, + vl_noop_handler, + vl_api_sr_tunnel_add_del_t_endian, + vl_api_sr_tunnel_add_del_t_print, 256, 1); + + + /* + * Manually register the sr policy add del msg, so we trace + * enough bytes to capture a typical tunnel name list + */ + vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD_DEL, + "sr_policy_add_del", + vl_api_sr_policy_add_del_t_handler, + vl_noop_handler, + vl_api_sr_policy_add_del_t_endian, + vl_api_sr_policy_add_del_t_print, 256, 1); + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (sr_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index d48e1540..1024f92c 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -50,6 +50,8 @@ #include #include #include +#include +#include /* * fd.io coding-style-patch-verification: ON diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 46e28e9d..3d6905dd 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -52,8 +52,6 @@ #include #include #include -#include -#include #include #include #if IPV6SR > 0 @@ -119,24 +117,14 @@ #define foreach_vpe_api_msg \ _(WANT_OAM_EVENTS, want_oam_events) \ _(OAM_ADD_DEL, oam_add_del) \ -_(MPLS_ROUTE_ADD_DEL, mpls_route_add_del) \ -_(MPLS_IP_BIND_UNBIND, mpls_ip_bind_unbind) \ _(IS_ADDRESS_REACHABLE, is_address_reachable) \ _(SW_INTERFACE_SET_MPLS_ENABLE, sw_interface_set_mpls_enable) \ _(SW_INTERFACE_SET_VPATH, sw_interface_set_vpath) \ _(SW_INTERFACE_SET_VXLAN_BYPASS, sw_interface_set_vxlan_bypass) \ _(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \ _(SW_INTERFACE_SET_L2_BRIDGE, sw_interface_set_l2_bridge) \ -_(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ -_(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ -_(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ -_(BRIDGE_DOMAIN_SW_IF_DETAILS, bridge_domain_sw_if_details) \ -_(L2FIB_ADD_DEL, l2fib_add_del) \ -_(L2_FLAGS, l2_flags) \ -_(BRIDGE_FLAGS, bridge_flags) \ _(CREATE_VLAN_SUBIF, create_vlan_subif) \ _(CREATE_SUBIF, create_subif) \ -_(MPLS_TUNNEL_ADD_DEL, mpls_tunnel_add_del) \ _(PROXY_ARP_ADD_DEL, proxy_arp_add_del) \ _(PROXY_ARP_INTFC_ENABLE_DISABLE, proxy_arp_intfc_enable_disable) \ _(VNET_GET_SUMMARY_STATS, vnet_get_summary_stats) \ @@ -159,12 +147,9 @@ _(GET_NODE_INDEX, get_node_index) \ _(ADD_NODE_NEXT, add_node_next) \ _(VXLAN_ADD_DEL_TUNNEL, vxlan_add_del_tunnel) \ _(VXLAN_TUNNEL_DUMP, vxlan_tunnel_dump) \ -_(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \ _(L2_INTERFACE_EFP_FILTER, l2_interface_efp_filter) \ _(L2_INTERFACE_VLAN_TAG_REWRITE, l2_interface_vlan_tag_rewrite) \ _(SHOW_VERSION, show_version) \ -_(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \ -_(L2_FIB_TABLE_ENTRY, l2_fib_table_entry) \ _(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \ _(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \ _(INTERFACE_NAME_RENUMBER, interface_name_renumber) \ @@ -178,15 +163,10 @@ _(COP_WHITELIST_ENABLE_DISABLE, cop_whitelist_enable_disable) \ _(GET_NODE_GRAPH, get_node_graph) \ _(IOAM_ENABLE, ioam_enable) \ _(IOAM_DISABLE, ioam_disable) \ -_(SR_MULTICAST_MAP_ADD_DEL, sr_multicast_map_add_del) \ _(POLICER_ADD_DEL, policer_add_del) \ _(POLICER_DUMP, policer_dump) \ _(POLICER_CLASSIFY_SET_INTERFACE, policer_classify_set_interface) \ _(POLICER_CLASSIFY_DUMP, policer_classify_dump) \ -_(MPLS_TUNNEL_DUMP, mpls_tunnel_dump) \ -_(MPLS_TUNNEL_DETAILS, mpls_tunnel_details) \ -_(MPLS_FIB_DUMP, mpls_fib_dump) \ -_(MPLS_FIB_DETAILS, mpls_fib_details) \ _(CLASSIFY_TABLE_IDS,classify_table_ids) \ _(CLASSIFY_TABLE_BY_INTERFACE, classify_table_by_interface) \ _(CLASSIFY_TABLE_INFO,classify_table_info) \ @@ -411,173 +391,6 @@ VLIB_REGISTER_NODE (vpe_resolver_process_node,static) = { }; /* *INDENT-ON* */ -static int -mpls_route_add_del_t_handler (vnet_main_t * vnm, - vl_api_mpls_route_add_del_t * mp) -{ - u32 fib_index, next_hop_fib_index; - mpls_label_t *label_stack = NULL; - int rv, ii, n_labels;; - - fib_prefix_t pfx = { - .fp_len = 21, - .fp_proto = FIB_PROTOCOL_MPLS, - .fp_eos = mp->mr_eos, - .fp_label = ntohl (mp->mr_label), - }; - if (pfx.fp_eos) - { - if (mp->mr_next_hop_proto_is_ip4) - { - pfx.fp_payload_proto = DPO_PROTO_IP4; - } - else - { - pfx.fp_payload_proto = DPO_PROTO_IP6; - } - } - else - { - pfx.fp_payload_proto = DPO_PROTO_MPLS; - } - - rv = add_del_route_check (FIB_PROTOCOL_MPLS, - mp->mr_table_id, - mp->mr_next_hop_sw_if_index, - dpo_proto_to_fib (pfx.fp_payload_proto), - mp->mr_next_hop_table_id, - mp->mr_create_table_if_needed, - &fib_index, &next_hop_fib_index); - - if (0 != rv) - return (rv); - - ip46_address_t nh; - memset (&nh, 0, sizeof (nh)); - - if (mp->mr_next_hop_proto_is_ip4) - memcpy (&nh.ip4, mp->mr_next_hop, sizeof (nh.ip4)); - else - memcpy (&nh.ip6, mp->mr_next_hop, sizeof (nh.ip6)); - - n_labels = mp->mr_next_hop_n_out_labels; - if (n_labels == 0) - ; - else if (1 == n_labels) - vec_add1 (label_stack, ntohl (mp->mr_next_hop_out_label_stack[0])); - else - { - vec_validate (label_stack, n_labels - 1); - for (ii = 0; ii < n_labels; ii++) - label_stack[ii] = ntohl (mp->mr_next_hop_out_label_stack[ii]); - } - - return (add_del_route_t_handler (mp->mr_is_multipath, mp->mr_is_add, 0, // mp->is_drop, - 0, // mp->is_unreach, - 0, // mp->is_prohibit, - 0, // mp->is_local, - mp->mr_is_classify, - mp->mr_classify_table_index, - mp->mr_is_resolve_host, - mp->mr_is_resolve_attached, - fib_index, &pfx, - mp->mr_next_hop_proto_is_ip4, - &nh, ntohl (mp->mr_next_hop_sw_if_index), - next_hop_fib_index, - mp->mr_next_hop_weight, - ntohl (mp->mr_next_hop_via_label), - label_stack)); -} - -void -vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp) -{ - vl_api_mpls_route_add_del_reply_t *rmp; - vnet_main_t *vnm; - int rv; - - vnm = vnet_get_main (); - vnm->api_errno = 0; - - rv = mpls_route_add_del_t_handler (vnm, mp); - - rv = (rv == 0) ? vnm->api_errno : rv; - - REPLY_MACRO (VL_API_MPLS_ROUTE_ADD_DEL_REPLY); -} - -static int -mpls_ip_bind_unbind_handler (vnet_main_t * vnm, - vl_api_mpls_ip_bind_unbind_t * mp) -{ - u32 mpls_fib_index, ip_fib_index; - - mpls_fib_index = - fib_table_find (FIB_PROTOCOL_MPLS, ntohl (mp->mb_mpls_table_id)); - - if (~0 == mpls_fib_index) - { - if (mp->mb_create_table_if_needed) - { - mpls_fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS, - ntohl (mp->mb_mpls_table_id)); - } - else - return VNET_API_ERROR_NO_SUCH_FIB; - } - - ip_fib_index = fib_table_find ((mp->mb_is_ip4 ? - FIB_PROTOCOL_IP4 : - FIB_PROTOCOL_IP6), - ntohl (mp->mb_ip_table_id)); - if (~0 == ip_fib_index) - return VNET_API_ERROR_NO_SUCH_FIB; - - fib_prefix_t pfx = { - .fp_len = mp->mb_address_length, - }; - - if (mp->mb_is_ip4) - { - pfx.fp_proto = FIB_PROTOCOL_IP4; - clib_memcpy (&pfx.fp_addr.ip4, mp->mb_address, - sizeof (pfx.fp_addr.ip4)); - } - else - { - pfx.fp_proto = FIB_PROTOCOL_IP6; - clib_memcpy (&pfx.fp_addr.ip6, mp->mb_address, - sizeof (pfx.fp_addr.ip6)); - } - - if (mp->mb_is_bind) - fib_table_entry_local_label_add (ip_fib_index, &pfx, - ntohl (mp->mb_label)); - else - fib_table_entry_local_label_remove (ip_fib_index, &pfx, - ntohl (mp->mb_label)); - - return (0); -} - -void -vl_api_mpls_ip_bind_unbind_t_handler (vl_api_mpls_ip_bind_unbind_t * mp) -{ - vl_api_mpls_route_add_del_reply_t *rmp; - vnet_main_t *vnm; - int rv; - - vnm = vnet_get_main (); - vnm->api_errno = 0; - - rv = mpls_ip_bind_unbind_handler (vnm, mp); - - rv = (rv == 0) ? vnm->api_errno : rv; - - REPLY_MACRO (VL_API_MPLS_ROUTE_ADD_DEL_REPLY); -} - static void vl_api_sw_interface_set_vpath_t_handler (vl_api_sw_interface_set_vpath_t * mp) { @@ -690,278 +503,6 @@ static void REPLY_MACRO (VL_API_SW_INTERFACE_SET_L2_BRIDGE_REPLY); } -static void -vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - bd_main_t *bdm = &bd_main; - vl_api_bridge_domain_add_del_reply_t *rmp; - int rv = 0; - u32 enable_flags = 0, disable_flags = 0; - u32 bd_id = ntohl (mp->bd_id); - u32 bd_index; - - if (mp->is_add) - { - bd_index = bd_find_or_add_bd_index (bdm, bd_id); - - if (mp->flood) - enable_flags |= L2_FLOOD; - else - disable_flags |= L2_FLOOD; - - if (mp->uu_flood) - enable_flags |= L2_UU_FLOOD; - else - disable_flags |= L2_UU_FLOOD; - - if (mp->forward) - enable_flags |= L2_FWD; - else - disable_flags |= L2_FWD; - - if (mp->arp_term) - enable_flags |= L2_ARP_TERM; - else - disable_flags |= L2_ARP_TERM; - - if (mp->learn) - enable_flags |= L2_LEARN; - else - disable_flags |= L2_LEARN; - - if (enable_flags) - bd_set_flags (vm, bd_index, enable_flags, 1 /* enable */ ); - - if (disable_flags) - bd_set_flags (vm, bd_index, disable_flags, 0 /* disable */ ); - - bd_set_mac_age (vm, bd_index, mp->mac_age); - } - else - rv = bd_delete_bd_index (bdm, bd_id); - - REPLY_MACRO (VL_API_BRIDGE_DOMAIN_ADD_DEL_REPLY); -} - -static void -vl_api_bridge_domain_details_t_handler (vl_api_bridge_domain_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void - vl_api_bridge_domain_sw_if_details_t_handler - (vl_api_bridge_domain_sw_if_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -send_bridge_domain_details (unix_shared_memory_queue_t * q, - l2_bridge_domain_t * bd_config, - u32 n_sw_ifs, u32 context) -{ - vl_api_bridge_domain_details_t *mp; - - mp = vl_msg_api_alloc (sizeof (*mp)); - memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_BRIDGE_DOMAIN_DETAILS); - mp->bd_id = ntohl (bd_config->bd_id); - mp->flood = bd_feature_flood (bd_config); - mp->uu_flood = bd_feature_uu_flood (bd_config); - mp->forward = bd_feature_forward (bd_config); - mp->learn = bd_feature_learn (bd_config); - mp->arp_term = bd_feature_arp_term (bd_config); - mp->bvi_sw_if_index = ntohl (bd_config->bvi_sw_if_index); - mp->mac_age = bd_config->mac_age; - mp->n_sw_ifs = ntohl (n_sw_ifs); - mp->context = context; - - vl_msg_api_send_shmem (q, (u8 *) & mp); -} - -static void -send_bd_sw_if_details (l2input_main_t * l2im, - unix_shared_memory_queue_t * q, - l2_flood_member_t * member, u32 bd_id, u32 context) -{ - vl_api_bridge_domain_sw_if_details_t *mp; - l2_input_config_t *input_cfg; - - mp = vl_msg_api_alloc (sizeof (*mp)); - memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_BRIDGE_DOMAIN_SW_IF_DETAILS); - mp->bd_id = ntohl (bd_id); - mp->sw_if_index = ntohl (member->sw_if_index); - input_cfg = vec_elt_at_index (l2im->configs, member->sw_if_index); - mp->shg = input_cfg->shg; - mp->context = context; - - vl_msg_api_send_shmem (q, (u8 *) & mp); -} - -static void -vl_api_bridge_domain_dump_t_handler (vl_api_bridge_domain_dump_t * mp) -{ - bd_main_t *bdm = &bd_main; - l2input_main_t *l2im = &l2input_main; - unix_shared_memory_queue_t *q; - l2_bridge_domain_t *bd_config; - u32 bd_id, bd_index; - u32 end; - - q = vl_api_client_index_to_input_queue (mp->client_index); - - if (q == 0) - return; - - bd_id = ntohl (mp->bd_id); - - bd_index = (bd_id == ~0) ? 0 : bd_find_or_add_bd_index (bdm, bd_id); - end = (bd_id == ~0) ? vec_len (l2im->bd_configs) : bd_index + 1; - for (; bd_index < end; bd_index++) - { - bd_config = l2input_bd_config_from_index (l2im, bd_index); - /* skip dummy bd_id 0 */ - if (bd_config && (bd_config->bd_id > 0)) - { - u32 n_sw_ifs; - l2_flood_member_t *m; - - n_sw_ifs = vec_len (bd_config->members); - send_bridge_domain_details (q, bd_config, n_sw_ifs, mp->context); - - vec_foreach (m, bd_config->members) - { - send_bd_sw_if_details (l2im, q, m, bd_config->bd_id, mp->context); - } - } - } -} - -static void -vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp) -{ - bd_main_t *bdm = &bd_main; - l2input_main_t *l2im = &l2input_main; - vl_api_l2fib_add_del_reply_t *rmp; - int rv = 0; - u64 mac = 0; - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 bd_id = ntohl (mp->bd_id); - u32 bd_index; - u32 static_mac; - u32 filter_mac; - u32 bvi_mac; - uword *p; - - mac = mp->mac; - - p = hash_get (bdm->bd_index_by_bd_id, bd_id); - if (!p) - { - rv = VNET_API_ERROR_NO_SUCH_ENTRY; - goto bad_sw_if_index; - } - bd_index = p[0]; - - if (mp->is_add) - { - filter_mac = mp->filter_mac ? 1 : 0; - if (filter_mac == 0) - { - VALIDATE_SW_IF_INDEX (mp); - if (vec_len (l2im->configs) <= sw_if_index) - { - rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; - goto bad_sw_if_index; - } - else - { - l2_input_config_t *config; - config = vec_elt_at_index (l2im->configs, sw_if_index); - if (config->bridge == 0) - { - rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; - goto bad_sw_if_index; - } - } - } - static_mac = mp->static_mac ? 1 : 0; - bvi_mac = mp->bvi_mac ? 1 : 0; - l2fib_add_entry (mac, bd_index, sw_if_index, static_mac, filter_mac, - bvi_mac); - } - else - { - l2fib_del_entry (mac, bd_index); - } - - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_L2FIB_ADD_DEL_REPLY); -} - -static void -vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp) -{ - vl_api_l2_flags_reply_t *rmp; - int rv = 0; - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 flags = ntohl (mp->feature_bitmap); - u32 rbm = 0; - - VALIDATE_SW_IF_INDEX (mp); - -#define _(a,b) \ - if (flags & L2INPUT_FEAT_ ## a) \ - rbm = l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_ ## a, mp->is_set); - foreach_l2input_feat; -#undef _ - - BAD_SW_IF_INDEX_LABEL; - - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_L2_FLAGS_REPLY, - ({ - rmp->resulting_feature_bitmap = ntohl(rbm); - })); - /* *INDENT-ON* */ -} - -static void -vl_api_bridge_flags_t_handler (vl_api_bridge_flags_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - bd_main_t *bdm = &bd_main; - vl_api_bridge_flags_reply_t *rmp; - int rv = 0; - u32 bd_id = ntohl (mp->bd_id); - u32 bd_index; - u32 flags = ntohl (mp->feature_bitmap); - uword *p; - - p = hash_get (bdm->bd_index_by_bd_id, bd_id); - if (p == 0) - { - rv = VNET_API_ERROR_NO_SUCH_ENTRY; - goto out; - } - - bd_index = p[0]; - - bd_set_flags (vm, bd_index, flags, mp->is_set); - -out: - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_BRIDGE_FLAGS_REPLY, - ({ - rmp->resulting_feature_bitmap = ntohl(flags); - })); - /* *INDENT-ON* */ -} - static void vl_api_bd_ip_mac_add_del_t_handler (vl_api_bd_ip_mac_add_del_t * mp) { @@ -1147,64 +688,6 @@ out: /* *INDENT-ON* */ } -static void -vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) -{ - vl_api_mpls_tunnel_add_del_reply_t *rmp; - int rv = 0; - stats_main_t *sm = &stats_main; - u32 tunnel_sw_if_index; - int ii; - - dslock (sm, 1 /* release hint */ , 5 /* tag */ ); - - if (mp->mt_is_add) - { - fib_route_path_t rpath, *rpaths = NULL; - mpls_label_t *label_stack = NULL; - - memset (&rpath, 0, sizeof (rpath)); - - if (mp->mt_next_hop_proto_is_ip4) - { - rpath.frp_proto = FIB_PROTOCOL_IP4; - clib_memcpy (&rpath.frp_addr.ip4, - mp->mt_next_hop, sizeof (rpath.frp_addr.ip4)); - } - else - { - rpath.frp_proto = FIB_PROTOCOL_IP6; - clib_memcpy (&rpath.frp_addr.ip6, - mp->mt_next_hop, sizeof (rpath.frp_addr.ip6)); - } - rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index); - - for (ii = 0; ii < mp->mt_next_hop_n_out_labels; ii++) - vec_add1 (label_stack, ntohl (mp->mt_next_hop_out_label_stack[ii])); - - vec_add1 (rpaths, rpath); - - vnet_mpls_tunnel_add (rpaths, label_stack, - mp->mt_l2_only, &tunnel_sw_if_index); - vec_free (rpaths); - vec_free (label_stack); - } - else - { - tunnel_sw_if_index = ntohl (mp->mt_sw_if_index); - vnet_mpls_tunnel_del (tunnel_sw_if_index); - } - - dsunlock (sm); - - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_MPLS_TUNNEL_ADD_DEL_REPLY, - ({ - rmp->sw_if_index = ntohl(tunnel_sw_if_index); - })); - /* *INDENT-ON* */ -} - static void vl_api_proxy_arp_add_del_t_handler (vl_api_proxy_arp_add_del_t * mp) { @@ -1929,164 +1412,6 @@ vl_api_set_arp_neighbor_limit_t_handler (vl_api_set_arp_neighbor_limit_t * mp) REPLY_MACRO (VL_API_SET_ARP_NEIGHBOR_LIMIT_REPLY); } -static void vl_api_sr_tunnel_add_del_t_handler - (vl_api_sr_tunnel_add_del_t * mp) -{ -#if IP6SR == 0 - clib_warning ("unimplemented"); -#else - ip6_sr_add_del_tunnel_args_t _a, *a = &_a; - int rv = 0; - vl_api_sr_tunnel_add_del_reply_t *rmp; - ip6_address_t *segments = 0, *seg; - ip6_address_t *tags = 0, *tag; - ip6_address_t *this_address; - int i; - - if (mp->n_segments == 0) - { - rv = -11; - goto out; - } - - memset (a, 0, sizeof (*a)); - a->src_address = (ip6_address_t *) & mp->src_address; - a->dst_address = (ip6_address_t *) & mp->dst_address; - a->dst_mask_width = mp->dst_mask_width; - a->flags_net_byte_order = mp->flags_net_byte_order; - a->is_del = (mp->is_add == 0); - a->rx_table_id = ntohl (mp->outer_vrf_id); - a->tx_table_id = ntohl (mp->inner_vrf_id); - - a->name = format (0, "%s", mp->name); - if (!(vec_len (a->name))) - a->name = 0; - - a->policy_name = format (0, "%s", mp->policy_name); - if (!(vec_len (a->policy_name))) - a->policy_name = 0; - - /* Yank segments and tags out of the API message */ - this_address = (ip6_address_t *) mp->segs_and_tags; - for (i = 0; i < mp->n_segments; i++) - { - vec_add2 (segments, seg, 1); - clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); - this_address++; - } - for (i = 0; i < mp->n_tags; i++) - { - vec_add2 (tags, tag, 1); - clib_memcpy (tag->as_u8, this_address->as_u8, sizeof (*this_address)); - this_address++; - } - - a->segments = segments; - a->tags = tags; - - rv = ip6_sr_add_del_tunnel (a); - -out: - - REPLY_MACRO (VL_API_SR_TUNNEL_ADD_DEL_REPLY); -#endif -} - -static void vl_api_sr_policy_add_del_t_handler - (vl_api_sr_policy_add_del_t * mp) -{ -#if IP6SR == 0 - clib_warning ("unimplemented"); -#else - ip6_sr_add_del_policy_args_t _a, *a = &_a; - int rv = 0; - vl_api_sr_policy_add_del_reply_t *rmp; - int i; - - memset (a, 0, sizeof (*a)); - a->is_del = (mp->is_add == 0); - - a->name = format (0, "%s", mp->name); - if (!(vec_len (a->name))) - { - rv = VNET_API_ERROR_NO_SUCH_NODE2; - goto out; - } - - if (!(mp->tunnel_names[0])) - { - rv = VNET_API_ERROR_NO_SUCH_NODE2; - goto out; - } - - // start deserializing tunnel_names - int num_tunnels = mp->tunnel_names[0]; //number of tunnels - u8 *deser_tun_names = mp->tunnel_names; - deser_tun_names += 1; //moving along - - u8 *tun_name = 0; - int tun_name_len = 0; - - for (i = 0; i < num_tunnels; i++) - { - tun_name_len = *deser_tun_names; - deser_tun_names += 1; - vec_resize (tun_name, tun_name_len); - memcpy (tun_name, deser_tun_names, tun_name_len); - vec_add1 (a->tunnel_names, tun_name); - deser_tun_names += tun_name_len; - tun_name = 0; - } - - rv = ip6_sr_add_del_policy (a); - -out: - - REPLY_MACRO (VL_API_SR_POLICY_ADD_DEL_REPLY); -#endif -} - -static void vl_api_sr_multicast_map_add_del_t_handler - (vl_api_sr_multicast_map_add_del_t * mp) -{ -#if IP6SR == 0 - clib_warning ("unimplemented"); -#else - ip6_sr_add_del_multicastmap_args_t _a, *a = &_a; - int rv = 0; - vl_api_sr_multicast_map_add_del_reply_t *rmp; - - memset (a, 0, sizeof (*a)); - a->is_del = (mp->is_add == 0); - - a->multicast_address = (ip6_address_t *) & mp->multicast_address; - a->policy_name = format (0, "%s", mp->policy_name); - - if (a->multicast_address == 0) - { - rv = -1; - goto out; - } - - if (!(a->policy_name)) - { - rv = -2; - goto out; - } - -#if DPDK > 0 /* Cannot call replicate without DPDK */ - rv = ip6_sr_add_del_multicastmap (a); -#else - clib_warning ("multicast replication without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - -out: - - REPLY_MACRO (VL_API_SR_MULTICAST_MAP_ADD_DEL_REPLY); -#endif -} - #define foreach_classify_add_del_table_field \ _(table_index) \ _(nbuckets) \ @@ -2246,21 +1571,6 @@ static void vl_api_classify_set_interface_l2_tables_t_handler REPLY_MACRO (VL_API_CLASSIFY_SET_INTERFACE_L2_TABLES_REPLY); } -static void -vl_api_l2_fib_clear_table_t_handler (vl_api_l2_fib_clear_table_t * mp) -{ - int rv = 0; - vl_api_l2_fib_clear_table_reply_t *rmp; - - /* DAW-FIXME: This API should only clear non-static l2fib entries, but - * that is not currently implemented. When that TODO is fixed - * this call should be changed to pass 1 instead of 0. - */ - l2fib_clear_table (0); - - REPLY_MACRO (VL_API_L2_FIB_CLEAR_TABLE_REPLY); -} - extern void l2_efp_filter_configure (vnet_main_t * vnet_main, u32 sw_if_index, u32 enable); @@ -2321,76 +1631,6 @@ static void REPLY_MACRO (VL_API_L2_INTERFACE_VLAN_TAG_REWRITE_REPLY); } -static void -vl_api_l2_fib_table_entry_t_handler (vl_api_l2_fib_table_entry_t * mp) -{ - clib_warning ("BUG"); -} - -static void -send_l2fib_table_entry (vpe_api_main_t * am, - unix_shared_memory_queue_t * q, - l2fib_entry_key_t * l2fe_key, - l2fib_entry_result_t * l2fe_res, u32 context) -{ - vl_api_l2_fib_table_entry_t *mp; - - mp = vl_msg_api_alloc (sizeof (*mp)); - memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_L2_FIB_TABLE_ENTRY); - - mp->bd_id = - ntohl (l2input_main.bd_configs[l2fe_key->fields.bd_index].bd_id); - - mp->mac = l2fib_make_key (l2fe_key->fields.mac, 0); - mp->sw_if_index = ntohl (l2fe_res->fields.sw_if_index); - mp->static_mac = l2fe_res->fields.static_mac; - mp->filter_mac = l2fe_res->fields.filter; - mp->bvi_mac = l2fe_res->fields.bvi; - mp->context = context; - - vl_msg_api_send_shmem (q, (u8 *) & mp); -} - -static void -vl_api_l2_fib_table_dump_t_handler (vl_api_l2_fib_table_dump_t * mp) -{ - vpe_api_main_t *am = &vpe_api_main; - bd_main_t *bdm = &bd_main; - l2fib_entry_key_t *l2fe_key = NULL; - l2fib_entry_result_t *l2fe_res = NULL; - u32 ni, bd_id = ntohl (mp->bd_id); - u32 bd_index; - unix_shared_memory_queue_t *q; - uword *p; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - return; - - /* see l2fib_table_dump: ~0 means "any" */ - if (bd_id == ~0) - bd_index = ~0; - else - { - p = hash_get (bdm->bd_index_by_bd_id, bd_id); - if (p == 0) - return; - - bd_index = p[0]; - } - - l2fib_table_dump (bd_index, &l2fe_key, &l2fe_res); - - vec_foreach_index (ni, l2fe_key) - { - send_l2fib_table_entry (am, q, vec_elt_at_index (l2fe_key, ni), - vec_elt_at_index (l2fe_res, ni), mp->context); - } - vec_free (l2fe_key); - vec_free (l2fe_res); -} - static void vl_api_show_version_t_handler (vl_api_show_version_t * mp) { @@ -3357,167 +2597,6 @@ vl_api_policer_classify_dump_t_handler (vl_api_policer_classify_dump_t * mp) } } -static void -vl_api_mpls_tunnel_details_t_handler (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -typedef struct mpls_tunnel_send_walk_ctx_t_ -{ - unix_shared_memory_queue_t *q; - u32 index; - u32 context; -} mpls_tunnel_send_walk_ctx_t; - -static void -send_mpls_tunnel_entry (u32 mti, void *arg) -{ - mpls_tunnel_send_walk_ctx_t *ctx; - vl_api_mpls_tunnel_details_t *mp; - const mpls_tunnel_t *mt; - u32 nlabels; - - ctx = arg; - - if (~0 != ctx->index && mti != ctx->index) - return; - - mt = mpls_tunnel_get (mti); - nlabels = vec_len (mt->mt_label_stack); - - mp = vl_msg_api_alloc (sizeof (*mp) + nlabels * sizeof (u32)); - memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_MPLS_TUNNEL_DETAILS); - mp->context = ctx->context; - - mp->tunnel_index = ntohl (mti); - memcpy (mp->mt_next_hop_out_labels, - mt->mt_label_stack, nlabels * sizeof (u32)); - - // FIXME - - vl_msg_api_send_shmem (ctx->q, (u8 *) & mp); -} - -static void -vl_api_mpls_tunnel_dump_t_handler (vl_api_mpls_tunnel_dump_t * mp) -{ - unix_shared_memory_queue_t *q; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - return; - - mpls_tunnel_send_walk_ctx_t ctx = { - .q = q, - .index = ntohl (mp->tunnel_index), - .context = mp->context, - }; - mpls_tunnel_walk (send_mpls_tunnel_entry, &ctx); -} - -static void -vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_mpls_fib_details_t_endian (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_mpls_fib_details_t_print (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -send_mpls_fib_details (vpe_api_main_t * am, - unix_shared_memory_queue_t * q, - u32 table_id, u32 label, u32 eos, - fib_route_path_encode_t * api_rpaths, u32 context) -{ - vl_api_mpls_fib_details_t *mp; - fib_route_path_encode_t *api_rpath; - vl_api_fib_path2_t *fp; - int path_count; - - path_count = vec_len (api_rpaths); - mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp)); - if (!mp) - return; - memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_MPLS_FIB_DETAILS); - mp->context = context; - - mp->table_id = htonl (table_id); - mp->eos_bit = eos; - mp->label = htonl (label); - - mp->count = htonl (path_count); - fp = mp->path; - vec_foreach (api_rpath, api_rpaths) - { - memset (fp, 0, sizeof (*fp)); - fp->weight = htonl (api_rpath->rpath.frp_weight); - fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index); - copy_fib_next_hop (api_rpath, fp); - fp++; - } - - vl_msg_api_send_shmem (q, (u8 *) & mp); -} - -static void -vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) -{ - vpe_api_main_t *am = &vpe_api_main; - unix_shared_memory_queue_t *q; - mpls_main_t *mm = &mpls_main; - fib_table_t *fib_table; - fib_node_index_t lfei, *lfeip, *lfeis = NULL; - mpls_label_t key; - fib_prefix_t pfx; - u32 fib_index; - fib_route_path_encode_t *api_rpaths; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - return; - - /* *INDENT-OFF* */ - pool_foreach (fib_table, mm->fibs, - ({ - hash_foreach(key, lfei, fib_table->mpls.mf_entries, - ({ - vec_add1(lfeis, lfei); - })); - })); - vec_sort_with_function(lfeis, fib_entry_cmp_for_sort); - - vec_foreach(lfeip, lfeis) - { - fib_entry_get_prefix(*lfeip, &pfx); - fib_index = fib_entry_get_fib_index(*lfeip); - fib_table = fib_table_get(fib_index, pfx.fp_proto); - api_rpaths = NULL; - fib_entry_encode(*lfeip, &api_rpaths); - send_mpls_fib_details (am, q, - fib_table->ft_table_id, - pfx.fp_label, - pfx.fp_eos, - api_rpaths, - mp->context); - vec_free(api_rpaths); - } - - vec_free (lfeis); -} - static void vl_api_classify_table_ids_t_handler (vl_api_classify_table_ids_t * mp) { @@ -4487,32 +3566,8 @@ vpe_api_hookup (vlib_main_t * vm) #undef _ /* - * Manually register the sr tunnel add del msg, so we trace - * enough bytes to capture a typical segment list - */ - vl_msg_api_set_handlers (VL_API_SR_TUNNEL_ADD_DEL, - "sr_tunnel_add_del", - vl_api_sr_tunnel_add_del_t_handler, - vl_noop_handler, - vl_api_sr_tunnel_add_del_t_endian, - vl_api_sr_tunnel_add_del_t_print, 256, 1); - - - /* - * Manually register the sr policy add del msg, so we trace - * enough bytes to capture a typical tunnel name list - */ - vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD_DEL, - "sr_policy_add_del", - vl_api_sr_policy_add_del_t_handler, - vl_noop_handler, - vl_api_sr_policy_add_del_t_endian, - vl_api_sr_policy_add_del_t_print, 256, 1); - - /* - * Trace space for 8 MPLS encap labels, classifier mask+match + * Trace space for classifier mask+match */ - am->api_trace_cfg[VL_API_MPLS_TUNNEL_ADD_DEL].size += 8 * sizeof (u32); am->api_trace_cfg[VL_API_CLASSIFY_ADD_DEL_TABLE].size += 5 * sizeof (u32x4); am->api_trace_cfg[VL_API_CLASSIFY_ADD_DEL_SESSION].size += 5 * sizeof (u32x4); diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 3e4bcdf9..e784fa01 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -22,21 +22,24 @@ /* * Note: API placement cleanup in progress * If you're looking for interface APIs, please - * see .../vnet/vnet/{interface.api,interface_api.c} - * IP APIs: see .../vnet/vnet/ip/{ip.api, ip_api.c} - * TAP APIs: see .../vnet/vnet/unix/{tap.api, tap_api.c} - * VXLAN APIs: see .../vnet/vnet/vxlan/{vxlan.api, vxlan_api.c} + * see .../src/vnet/{interface.api,interface_api.c} + * IP APIs: see .../src/vnet/ip/{ip.api, ip_api.c} + * TAP APIs: see .../src/vnet/unix/{tap.api, tap_api.c} + * VXLAN APIs: see .../src/vnet/vxlan/{vxlan.api, vxlan_api.c} * AF-PACKET APIs: ... see /vnet/devices/af_packet/{af_packet.api, af_packet_api.c} - * NETMAP APIs: see ... /vnet/vnet/devices/netmap/{netmap.api, netmap_api.c} + * NETMAP APIs: see ... /src/vnet/devices/netmap/{netmap.api, netmap_api.c} * VHOST-USER APIs: see .../vnet/devices/virtio/{vhost_user.api, vhost_user_api.c} - * VXLAN GPE APIs: see .../vnet/vnet/vxlan-gpe/{vxlan_gpe.api, vxlan_gpe_api.c} - * GRE APIs: see .../vnet/vnet/gre/{gre.api, gre_api.c} - * L2TP APIs: see .../vnet/vnet/l2tp/{l2tp.api, l2tp_api.c} - * BFD APIs: see .../vnet/vnet/bfd/{bfd.api, bfd_api.c} - * IPSEC APIs: see .../vnet/vnet/ipsec/{ipsec.api, ipsec_api.c} - * IPSEC-GRE APIs: see .../vnet/vnet/ipsec-gre/{ipsec_gre.api, ipsec_gre_api.c} - * LISP APIs: see .../vnet/vnet/lisp/{lisp.api, lisp_api.c} - * LISP-GPE APIs: see .../vnet/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c} + * VXLAN GPE APIs: see .../src/vnet/vxlan-gpe/{vxlan_gpe.api, vxlan_gpe_api.c} + * GRE APIs: see .../src/vnet/gre/{gre.api, gre_api.c} + * L2 APIs: see .../src/vnet/l2/{l2.api, l2_api.c} + * L2TP APIs: see .../src/vnet/l2tp/{l2tp.api, l2tp_api.c} + * BFD APIs: see .../src/vnet/bfd/{bfd.api, bfd_api.c} + * IPSEC APIs: see .../src/vnet/ipsec/{ipsec.api, ipsec_api.c} + * IPSEC-GRE APIs: see .../src/vnet/ipsec-gre/{ipsec_gre.api, ipsec_gre_api.c} + * LISP APIs: see .../src/vnet/lisp/{lisp.api, lisp_api.c} + * LISP-GPE APIs: see .../src/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c} + * MPLS APIs: see .../src/vnet/mpls/{mpls.api, mpls_api.c} + * SR APIs: see .../src/vnet/sr/{sr.api, sr_api.c} * DPDK APIs: ... see /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} */ @@ -90,231 +93,6 @@ define sw_interface_set_mpls_enable_reply i32 retval; }; -/** \brief MPLS Route Add / del route - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param mr_label - The MPLS label value - @param mr_eos - The End of stack bit - @param mr_table_id - The MPLS table-id the route is added in - @param mr_classify_table_index - If this is a classify route, - this is the classify table index - @param mr_create_table_if_needed - If the MPLS or IP tables do not exist, - create them - @param mr_is_add - Is this a route add or delete - @param mr_is_classify - Is this route result a classify - @param mr_is_multipath - Is this route update a multipath - i.e. is this - a path addition to an existing route - @param mr_is_resolve_host - Recurse resolution constraint via a host prefix - @param mr_is_resolve_attached - Recurse resolution constraint via attached prefix - @param mr_next_hop_proto_is_ip4 - The next-hop is IPV4 - @param mr_next_hop_weight - The weight, for UCMP - @param mr_next_hop[16] - the nextop address - @param mr_next_hop_sw_if_index - the next-hop SW interface - @param mr_next_hop_table_id - the next-hop table-id (if appropriate) - @param mr_next_hop_n_out_labels - the number of labels in the label stack - @param mr_next_hop_out_label_stack - the next-hop output label stack, outer most first - @param next_hop_via_label - The next-hop is a resolved via a local label -*/ -define mpls_route_add_del -{ - u32 client_index; - u32 context; - u32 mr_label; - u8 mr_eos; - u32 mr_table_id; - u32 mr_classify_table_index; - u8 mr_create_table_if_needed; - u8 mr_is_add; - u8 mr_is_classify; - u8 mr_is_multipath; - u8 mr_is_resolve_host; - u8 mr_is_resolve_attached; - u8 mr_next_hop_proto_is_ip4; - u8 mr_next_hop_weight; - u8 mr_next_hop[16]; - u8 mr_next_hop_n_out_labels; - u32 mr_next_hop_sw_if_index; - u32 mr_next_hop_table_id; - u32 mr_next_hop_via_label; - u32 mr_next_hop_out_label_stack[mr_next_hop_n_out_labels]; -}; - -/** \brief Reply for MPLS route add / del request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define mpls_route_add_del_reply -{ - u32 context; - i32 retval; -}; - -/** \brief Dump MPLS fib table - @param client_index - opaque cookie to identify the sender -*/ -define mpls_fib_dump -{ - u32 client_index; - u32 context; -}; - -/** \brief FIB path - @param sw_if_index - index of the interface - @param weight - The weight, for UCMP - @param is_local - local if non-zero, else remote - @param is_drop - Drop the packet - @param is_unreach - Drop the packet and rate limit send ICMP unreachable - @param is_prohibit - Drop the packet and rate limit send ICMP prohibited - @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2 - @param next_hop[16] - the next hop address - - WARNING: this type is replicated, pending cleanup completion - -*/ -typeonly manual_print manual_endian define fib_path2 -{ - u32 sw_if_index; - u32 weight; - u8 is_local; - u8 is_drop; - u8 is_unreach; - u8 is_prohibit; - u8 afi; - u8 next_hop[16]; -}; - -/** \brief mpls FIB table response - @param table_id - MPLS fib table id - @param s_bit - End-of-stack bit - @param label - MPLS label value - @param count - the number of fib_path in path - @param path - array of of fib_path structures -*/ -manual_endian manual_print define mpls_fib_details -{ - u32 context; - u32 table_id; - u8 eos_bit; - u32 label; - u32 count; - vl_api_fib_path2_t path[count]; -}; - -/** \brief Bind/Unbind an MPLS local label to an IP prefix. i.e. create - a per-prefix label entry. - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param mb_mpls_table_id - The MPLS table-id the MPLS entry will be added in - @param mb_label - The MPLS label value to bind - @param mb_ip_table_id - The IP table-id of the IP prefix to bind to. - @param mb_create_table_if_needed - Create either/both tables if required. - @param mb_is_bind - Bind or unbind - @param mb_is_ip4 - The prefix to bind to is IPv4 - @param mb_address_length - Length of IP prefix - @param mb_address[16] - IP prefix/ -*/ -define mpls_ip_bind_unbind -{ - u32 client_index; - u32 context; - u32 mb_mpls_table_id; - u32 mb_label; - u32 mb_ip_table_id; - u8 mb_create_table_if_needed; - u8 mb_is_bind; - u8 mb_is_ip4; - u8 mb_address_length; - u8 mb_address[16]; -}; - -/** \brief Reply for MPLS IP bind/unbind request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define mpls_ip_bind_unbind_reply -{ - u32 context; - i32 retval; -}; - -/** \brief MPLS tunnel Add / del route - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param mt_is_add - Is this a route add or delete - @param mt_sw_if_index - The SW interface index of the tunnel to delete - @param mt_next_hop_proto_is_ip4 - The next-hop is IPV4 - @param mt_next_hop_weight - The weight, for UCMP - @param mt_next_hop[16] - the nextop address - @param mt_next_hop_sw_if_index - the next-hop SW interface - @param mt_next_hop_table_id - the next-hop table-id (if appropriate) - @param mt_next_hop_n_out_labels - the number of next-hop output labels - @param mt_next_hop_out_label_stack - the next-hop output label stack, outer most first -*/ -define mpls_tunnel_add_del -{ - u32 client_index; - u32 context; - u32 mt_sw_if_index; - u8 mt_is_add; - u8 mt_l2_only; - u8 mt_next_hop_proto_is_ip4; - u8 mt_next_hop_weight; - u8 mt_next_hop[16]; - u8 mt_next_hop_n_out_labels; - u32 mt_next_hop_sw_if_index; - u32 mt_next_hop_table_id; - u32 mt_next_hop_out_label_stack[mt_next_hop_n_out_labels]; -}; - -/** \brief Reply for MPLS tunnel add / del request - @param context - returned sender context, to match reply w/ request - @param retval - return code - @param sw_if_index - SW interface index of the tunnel created -*/ -define mpls_tunnel_add_del_reply -{ - u32 context; - i32 retval; - u32 sw_if_index; -}; - -/** \brief Dump mpls eth tunnel table - @param client_index - opaque cookie to identify the sender - @param tunnel_index - eth tunnel identifier or -1 in case of all tunnels -*/ -define mpls_tunnel_dump -{ - u32 client_index; - u32 context; - i32 tunnel_index; -}; - -/** \brief mpls eth tunnel operational state response - @param tunnel_index - eth tunnel identifier - @param intfc_address - interface ipv4 addr - @param mask_width - interface ipv4 addr mask - @param hw_if_index - interface id - @param l2_only - - @param tunnel_dst_mac - - @param tx_sw_if_index - - @param encap_index - reference to mpls label table - @param nlabels - number of resolved labels - @param labels - resolved labels -*/ -define mpls_tunnel_details -{ - u32 context; - u32 tunnel_index; - u8 mt_l2_only; - u8 mt_sw_if_index; - u8 mt_next_hop_proto_is_ip4; - u8 mt_next_hop[16]; - u32 mt_next_hop_sw_if_index; - u32 mt_next_hop_table_id; - u32 mt_next_hop_n_labels; - u32 mt_next_hop_out_labels[mt_next_hop_n_labels]; -}; - /** \brief Proxy ARP add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -797,102 +575,6 @@ define l2_patch_add_del_reply i32 retval; }; -/** \brief IPv6 segment routing tunnel add / del request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param is_add - add the tunnel if non-zero, else delete it - @param name[] - tunnel name (len. 64) - @param src_address[] - - @param dst_address[] - - @param dst_mask_width - - @param inner_vrf_id - - @param outer_vrf_id - - @param flags_net_byte_order - - @param n_segments - - @param n_tags - - @param segs_and_tags[] - - @param policy_name[] - name of policy to associate this tunnel to (len. 64) -*/ -define sr_tunnel_add_del -{ - u32 client_index; - u32 context; - u8 is_add; - u8 name[64]; - u8 src_address[16]; - u8 dst_address[16]; - u8 dst_mask_width; - u32 inner_vrf_id; - u32 outer_vrf_id; - u16 flags_net_byte_order; - u8 n_segments; - u8 n_tags; - u8 policy_name[64]; - u8 segs_and_tags[0]; -}; - -/** \brief IPv6 segment routing tunnel add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_tunnel_add_del_reply -{ - u32 context; - i32 retval; -}; - -/** \brief IPv6 segment routing policy add / del request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param is_add - add the tunnel if non-zero, else delete it - @param name[] - policy name (len. 64) - @param tunnel_names[] - -*/ -define sr_policy_add_del -{ - u32 client_index; - u32 context; - u8 is_add; - u8 name[64]; - u8 tunnel_names[0]; -}; - -/** \brief IPv6 segment routing policy add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_add_del_reply -{ - u32 context; - i32 retval; -}; - -/** \brief IPv6 segment routing multicast map to policy add / del request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param is_add - add the tunnel if non-zero, else delete it - @param multicast_address[] - IP6 multicast address - @param policy_name[] = policy name (len.64) -*/ -define sr_multicast_map_add_del -{ - u32 client_index; - u32 context; - u8 is_add; - u8 multicast_address[16]; - u8 policy_name[64]; -}; - -/** \brief IPv6 segment routing multicast map to policy add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_multicast_map_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface set vpath request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -973,95 +655,6 @@ define sw_interface_set_l2_bridge_reply i32 retval; }; -/** \brief L2 FIB add entry request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param mac - the entry's mac address - @param bd_id - the entry's bridge domain id - @param sw_if_index - the interface - @param is_add - If non zero add the entry, else delete it - @param static_mac - - @param filter_mac - -*/ -define l2fib_add_del -{ - u32 client_index; - u32 context; - u64 mac; - u32 bd_id; - u32 sw_if_index; - u8 is_add; - u8 static_mac; - u8 filter_mac; - u8 bvi_mac; -}; - -/** \brief L2 FIB add entry response - @param context - sender context, to match reply w/ request - @param retval - return code for the add l2fib entry request -*/ -define l2fib_add_del_reply -{ - u32 context; - i32 retval; -}; - -/** \brief Set L2 flags request !!! TODO - need more info, feature bits in l2_input.h - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - interface - @param is_set - if non-zero, set the bits, else clear them - @param feature_bitmap - non-zero bits to set or clear -*/ -define l2_flags -{ - u32 client_index; - u32 context; - u32 sw_if_index; - u8 is_set; - u32 feature_bitmap; -}; - -/** \brief Set L2 bits response - @param context - sender context, to match reply w/ request - @param retval - return code for the set l2 bits request -*/ -define l2_flags_reply -{ - u32 context; - i32 retval; - u32 resulting_feature_bitmap; -}; - -/** \brief Set bridge flags (such as L2_LEARN, L2_FWD, L2_FLOOD, - L2_UU_FLOOD, or L2_ARP_TERM) request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param bd_id - the bridge domain to set the flags for - @param is_set - if non-zero, set the flags, else clear them - @param feature_bitmap - bits that are non-zero to set or clear -*/ -define bridge_flags -{ - u32 client_index; - u32 context; - u32 bd_id; - u8 is_set; - u32 feature_bitmap; -}; - -/** \brief Set bridge flags response - @param context - sender context, to match reply w/ request - @param retval - return code for the set bridge flags request - @param resulting_feature_bitmap - the feature bitmap value after the request is implemented -*/ -define bridge_flags_reply -{ - u32 context; - i32 retval; - u32 resulting_feature_bitmap; -}; - /** \brief Set bridge domain ip to mac entry request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1335,26 +928,6 @@ define dhcp_proxy_config_2_reply i32 retval; }; -/** \brief L2 fib clear table request, clear all mac entries in the l2 fib - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request -*/ -define l2_fib_clear_table -{ - u32 client_index; - u32 context; -}; - -/** \brief L2 fib clear table response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_fib_clear_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 interface ethernet flow point filtering enable/disable request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1463,36 +1036,6 @@ define show_version_reply u8 build_directory[256]; }; -/** \brief l2 fib table entry structure - @param bd_id - the l2 fib / bridge domain table id - @param mac - the entry's mac address - @param sw_if_index - index of the interface - @param static_mac - the entry is statically configured. - @param filter_mac - the entry is a mac filter entry. - @param bvi_mac - the mac address is a bridge virtual interface -*/ -define l2_fib_table_entry -{ - u32 context; - u32 bd_id; - u64 mac; - u32 sw_if_index; - u8 static_mac; - u8 filter_mac; - u8 bvi_mac; -}; - -/** \brief Dump l2 fib (aka bridge domain) table - @param client_index - opaque cookie to identify the sender - @param bd_id - the l2 fib / bridge domain table identifier -*/ -define l2_fib_table_dump -{ - u32 client_index; - u32 context; - u32 bd_id; -}; - /* Gross kludge, DGMS */ define interface_name_renumber { @@ -1600,91 +1143,6 @@ define ip6_nd_event u8 mac_ip; }; -/** \brief L2 bridge domain add or delete request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param bd_id - the bridge domain to create - @param flood - enable/disable bcast/mcast flooding in the bd - @param uu_flood - enable/disable uknown unicast flood in the bd - @param forward - enable/disable forwarding on all interfaces in the bd - @param learn - enable/disable learning on all interfaces in the bd - @param arp_term - enable/disable arp termination in the bd - @param mac_age - mac aging time in min, 0 for disabled - @param is_add - add or delete flag -*/ -define bridge_domain_add_del -{ - u32 client_index; - u32 context; - u32 bd_id; - u8 flood; - u8 uu_flood; - u8 forward; - u8 learn; - u8 arp_term; - u8 mac_age; - u8 is_add; -}; - -/** \brief L2 bridge domain add or delete response - @param context - sender context, to match reply w/ request - @param retval - return code for the set bridge flags request -*/ -define bridge_domain_add_del_reply -{ - u32 context; - i32 retval; -}; - -/** \brief L2 bridge domain request operational state details - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param bd_id - the bridge domain id desired or ~0 to request all bds -*/ -define bridge_domain_dump -{ - u32 client_index; - u32 context; - u32 bd_id; -}; - -/** \brief L2 bridge domain operational state response - @param bd_id - the bridge domain id - @param flood - bcast/mcast flooding state on all interfaces in the bd - @param uu_flood - uknown unicast flooding state on all interfaces in the bd - @param forward - forwarding state on all interfaces in the bd - @param learn - learning state on all interfaces in the bd - @param arp_term - arp termination state on all interfaces in the bd - @param mac_age - mac aging time in min, 0 for disabled - @param n_sw_ifs - number of sw_if_index's in the domain -*/ -define bridge_domain_details -{ - u32 context; - u32 bd_id; - u8 flood; - u8 uu_flood; - u8 forward; - u8 learn; - u8 arp_term; - u8 mac_age; - u32 bvi_sw_if_index; - u32 n_sw_ifs; -}; - -/** \brief L2 bridge domain sw interface operational state response - @param bd_id - the bridge domain id - @param sw_if_index - sw_if_index in the domain - @param shg - split horizon group for the interface -*/ -define bridge_domain_sw_if_details -{ - u32 context; - u32 bd_id; - u32 sw_if_index; - u8 shg; -}; - /** \brief DHCP Client config add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request -- cgit 1.2.3-korg From c3af7bf104919be668d696fc1ac05893712dae06 Mon Sep 17 00:00:00 2001 From: Filip Tehlar Date: Fri, 13 Jan 2017 14:13:09 +0100 Subject: LISP: Fix gpe API Change-Id: Iba076fc13e3f870c49fc5ca971dc7b8799188a27 Signed-off-by: Filip Tehlar --- src/vat/api_format.c | 185 ++++++--------------------------- src/vnet/lisp-gpe/lisp_gpe.api | 54 ++++------ src/vnet/lisp-gpe/lisp_gpe.c | 45 ++++---- src/vnet/lisp-gpe/lisp_gpe_api.c | 113 +++++++------------- src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c | 2 +- src/vpp/api/custom_dump.c | 4 +- 6 files changed, 113 insertions(+), 290 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 75148207..176fe836 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -2452,103 +2452,6 @@ static void vat_json_object_add_uint (node, "vni", clib_net_to_host_u32 (mp->vni)); } -static u8 * -format_decap_next (u8 * s, va_list * args) -{ - u32 next_index = va_arg (*args, u32); - - switch (next_index) - { - case LISP_GPE_INPUT_NEXT_DROP: - return format (s, "drop"); - case LISP_GPE_INPUT_NEXT_IP4_INPUT: - return format (s, "ip4"); - case LISP_GPE_INPUT_NEXT_IP6_INPUT: - return format (s, "ip6"); - default: - return format (s, "unknown %d", next_index); - } - return s; -} - -static void -vl_api_lisp_gpe_tunnel_details_t_handler (vl_api_lisp_gpe_tunnel_details_t * - mp) -{ - vat_main_t *vam = &vat_main; - u8 *iid_str; - u8 *flag_str = NULL; - - iid_str = format (0, "%d (0x%x)", ntohl (mp->iid), ntohl (mp->iid)); - -#define _(n,v) if (mp->flags & v) flag_str = format (flag_str, "%s-bit ", #n); - foreach_lisp_gpe_flag_bit; -#undef _ - - print (vam->ofp, "%=20d%=30U%=16U%=16d%=16d%=16U" - "%=16d%=16d%=16sd=16d%=16s%=16s", - mp->tunnels, - mp->is_ipv6 ? format_ip6_address : format_ip4_address, - mp->source_ip, - mp->is_ipv6 ? format_ip6_address : format_ip4_address, - mp->destination_ip, - ntohl (mp->encap_fib_id), - ntohl (mp->decap_fib_id), - format_decap_next, ntohl (mp->dcap_next), - mp->ver_res >> 6, - flag_str, mp->next_protocol, mp->ver_res, mp->res, iid_str); - - vec_free (iid_str); -} - -static void - vl_api_lisp_gpe_tunnel_details_t_handler_json - (vl_api_lisp_gpe_tunnel_details_t * mp) -{ - vat_main_t *vam = &vat_main; - vat_json_node_t *node = NULL; - struct in6_addr ip6; - struct in_addr ip4; - u8 *next_decap_str; - - next_decap_str = format (0, "%U", format_decap_next, htonl (mp->dcap_next)); - - if (VAT_JSON_ARRAY != vam->json_tree.type) - { - ASSERT (VAT_JSON_NONE == vam->json_tree.type); - vat_json_init_array (&vam->json_tree); - } - node = vat_json_array_add (&vam->json_tree); - - vat_json_init_object (node); - vat_json_object_add_uint (node, "tunel", mp->tunnels); - if (mp->is_ipv6) - { - clib_memcpy (&ip6, mp->source_ip, sizeof (ip6)); - vat_json_object_add_ip6 (node, "source address", ip6); - clib_memcpy (&ip6, mp->destination_ip, sizeof (ip6)); - vat_json_object_add_ip6 (node, "destination address", ip6); - } - else - { - clib_memcpy (&ip4, mp->source_ip, sizeof (ip4)); - vat_json_object_add_ip4 (node, "source address", ip4); - clib_memcpy (&ip4, mp->destination_ip, sizeof (ip4)); - vat_json_object_add_ip4 (node, "destination address", ip4); - } - vat_json_object_add_uint (node, "fib encap", ntohl (mp->encap_fib_id)); - vat_json_object_add_uint (node, "fib decap", ntohl (mp->decap_fib_id)); - vat_json_object_add_string_copy (node, "decap next", next_decap_str); - vat_json_object_add_uint (node, "lisp version", mp->ver_res >> 6); - vat_json_object_add_uint (node, "flags", mp->flags); - vat_json_object_add_uint (node, "next protocol", mp->next_protocol); - vat_json_object_add_uint (node, "ver_res", mp->ver_res); - vat_json_object_add_uint (node, "res", mp->res); - vat_json_object_add_uint (node, "iid", ntohl (mp->iid)); - - vec_free (next_decap_str); -} - static void vl_api_show_lisp_map_register_state_reply_t_handler (vl_api_show_lisp_map_register_state_reply_t * mp) @@ -3895,7 +3798,6 @@ _(LISP_LOCATOR_DETAILS, lisp_locator_details) \ _(LISP_EID_TABLE_DETAILS, lisp_eid_table_details) \ _(LISP_EID_TABLE_MAP_DETAILS, lisp_eid_table_map_details) \ _(LISP_EID_TABLE_VNI_DETAILS, lisp_eid_table_vni_details) \ -_(LISP_GPE_TUNNEL_DETAILS, lisp_gpe_tunnel_details) \ _(LISP_MAP_RESOLVER_DETAILS, lisp_map_resolver_details) \ _(LISP_MAP_SERVER_DETAILS, lisp_map_server_details) \ _(LISP_ADJACENCIES_GET_REPLY, lisp_adjacencies_get_reply) \ @@ -13265,6 +13167,7 @@ typedef CLIB_PACKED(struct static int api_lisp_gpe_add_del_fwd_entry (vat_main_t * vam) { + u32 dp_table = 0, vni = 0;; unformat_input_t *input = vam->input; vl_api_lisp_gpe_add_del_fwd_entry_t *mp; f64 timeout = ~0; @@ -13272,10 +13175,11 @@ api_lisp_gpe_add_del_fwd_entry (vat_main_t * vam) lisp_eid_vat_t _rmt_eid, *rmt_eid = &_rmt_eid; lisp_eid_vat_t _lcl_eid, *lcl_eid = &_lcl_eid; u8 rmt_eid_set = 0, lcl_eid_set = 0; - u32 action = ~0, p, w; + u32 action = ~0, w; ip4_address_t rmt_rloc4, lcl_rloc4; ip6_address_t rmt_rloc6, lcl_rloc6; - rloc_t *rmt_locs = 0, *lcl_locs = 0, rloc, *curr_rloc = 0; + vl_api_lisp_gpe_locator_t *rmt_locs = 0, *lcl_locs = 0, rloc, *curr_rloc = + 0; memset (&rloc, 0, sizeof (rloc)); @@ -13283,25 +13187,30 @@ api_lisp_gpe_add_del_fwd_entry (vat_main_t * vam) while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "del")) - { - is_add = 0; - } - else if (unformat (input, "rmt_eid %U", unformat_lisp_eid_vat, rmt_eid)) + is_add = 0; + else if (unformat (input, "add")) + is_add = 1; + else if (unformat (input, "reid %U", unformat_lisp_eid_vat, rmt_eid)) { rmt_eid_set = 1; } - else if (unformat (input, "lcl_eid %U", unformat_lisp_eid_vat, lcl_eid)) + else if (unformat (input, "leid %U", unformat_lisp_eid_vat, lcl_eid)) { lcl_eid_set = 1; } - else if (unformat (input, "p %d w %d", &p, &w)) + else if (unformat (input, "vrf %d", &dp_table)) + ; + else if (unformat (input, "bd %d", &dp_table)) + ; + else if (unformat (input, "vni %d", &vni)) + ; + else if (unformat (input, "w %d", &w)) { if (!curr_rloc) { errmsg ("No RLOC configured for setting priority/weight!"); return -99; } - curr_rloc->priority = p; curr_rloc->weight = w; } else if (unformat (input, "loc-pair %U %U", unformat_ip4_address, @@ -13310,12 +13219,12 @@ api_lisp_gpe_add_del_fwd_entry (vat_main_t * vam) rloc.is_ip4 = 1; clib_memcpy (&rloc.addr, &lcl_rloc4, sizeof (lcl_rloc4)); - rloc.priority = rloc.weight = 0; + rloc.weight = 0; vec_add1 (lcl_locs, rloc); clib_memcpy (&rloc.addr, &rmt_rloc4, sizeof (rmt_rloc4)); vec_add1 (rmt_locs, rloc); - /* priority and weight saved in rmt loc */ + /* weight saved in rmt loc */ curr_rloc = &rmt_locs[vec_len (rmt_locs) - 1]; } else if (unformat (input, "loc-pair %U %U", unformat_ip6_address, @@ -13323,12 +13232,12 @@ api_lisp_gpe_add_del_fwd_entry (vat_main_t * vam) { rloc.is_ip4 = 0; clib_memcpy (&rloc.addr, &lcl_rloc6, sizeof (lcl_rloc6)); - rloc.priority = rloc.weight = 0; + rloc.weight = 0; vec_add1 (lcl_locs, rloc); clib_memcpy (&rloc.addr, &rmt_rloc6, sizeof (rmt_rloc6)); vec_add1 (rmt_locs, rloc); - /* priority and weight saved in rmt loc */ + /* weight saved in rmt loc */ curr_rloc = &rmt_locs[vec_len (rmt_locs) - 1]; } else if (unformat (input, "action %d", &action)) @@ -13361,23 +13270,28 @@ api_lisp_gpe_add_del_fwd_entry (vat_main_t * vam) } /* Construct the API message */ - M (LISP_GPE_ADD_DEL_FWD_ENTRY, lisp_gpe_add_del_fwd_entry); + M2 (LISP_GPE_ADD_DEL_FWD_ENTRY, lisp_gpe_add_del_fwd_entry, + sizeof (vl_api_lisp_gpe_locator_t) * vec_len (rmt_locs) * 2); mp->is_add = is_add; lisp_eid_put_vat (mp->rmt_eid, rmt_eid->addr, rmt_eid->type); lisp_eid_put_vat (mp->lcl_eid, lcl_eid->addr, lcl_eid->type); mp->eid_type = rmt_eid->type; + mp->dp_table = clib_host_to_net_u32 (dp_table); + mp->vni = clib_host_to_net_u32 (vni); mp->rmt_len = rmt_eid->len; mp->lcl_len = lcl_eid->len; mp->action = action; if (0 != rmt_locs && 0 != lcl_locs) { - mp->loc_num = vec_len (rmt_locs); - clib_memcpy (mp->lcl_locs, lcl_locs, - (sizeof (rloc_t) * vec_len (lcl_locs))); - clib_memcpy (mp->rmt_locs, rmt_locs, - (sizeof (rloc_t) * vec_len (rmt_locs))); + mp->loc_num = clib_host_to_net_u32 (vec_len (rmt_locs) * 2); + clib_memcpy (mp->locs, lcl_locs, + (sizeof (vl_api_lisp_gpe_locator_t) * vec_len (lcl_locs))); + + u32 offset = sizeof (vl_api_lisp_gpe_locator_t) * vec_len (lcl_locs); + clib_memcpy (((u8 *) mp->locs) + offset, rmt_locs, + (sizeof (vl_api_lisp_gpe_locator_t) * vec_len (rmt_locs))); } vec_free (lcl_locs); vec_free (rmt_locs); @@ -14666,38 +14580,6 @@ api_lisp_eid_table_dump (vat_main_t * vam) return 0; } -static int -api_lisp_gpe_tunnel_dump (vat_main_t * vam) -{ - vl_api_lisp_gpe_tunnel_dump_t *mp; - f64 timeout = ~0; - - if (!vam->json_output) - { - print (vam->ofp, "%=20s%=30s%=16s%=16s%=16s%=16s" - "%=16s%=16s%=16s%=16s%=16s", - "Tunel", "Source", "Destination", "Fib encap", "Fib decap", - "Decap next", "Lisp version", "Flags", "Next protocol", - "ver_res", "res", "iid"); - } - - M (LISP_GPE_TUNNEL_DUMP, lisp_gpe_tunnel_dump); - /* send it... */ - S; - - /* Use a control ping for synchronization */ - { - vl_api_control_ping_t *mp; - M (CONTROL_PING, control_ping); - S; - } - /* Wait for a reply... */ - W; - - /* NOTREACHED */ - return 0; -} - static int api_lisp_adjacencies_get (vat_main_t * vam) { @@ -17676,8 +17558,8 @@ _(lisp_add_del_local_eid,"vni eid " \ "/ | " \ "locator-set [del]" \ "[key-id sha1|sha256 secret-key ]") \ -_(lisp_gpe_add_del_fwd_entry, "rmt_eid [lcl_eid ] vni " \ - "dp_table loc-pair ... [del]") \ +_(lisp_gpe_add_del_fwd_entry, "reid [leid ] vni " \ + "vrf/bd loc-pair w ... [del]") \ _(lisp_add_del_map_resolver, " [del]") \ _(lisp_add_del_map_server, " [del]") \ _(lisp_gpe_enable_disable, "enable|disable") \ @@ -17702,7 +17584,6 @@ _(lisp_eid_table_dump, "[eid / | ] [vni] " \ "[local] | [remote]") \ _(lisp_eid_table_vni_dump, "") \ _(lisp_eid_table_map_dump, "l2|l3") \ -_(lisp_gpe_tunnel_dump, "") \ _(lisp_map_resolver_dump, "") \ _(lisp_map_server_dump, "") \ _(lisp_adjacencies_get, "vni ") \ diff --git a/src/vnet/lisp-gpe/lisp_gpe.api b/src/vnet/lisp-gpe/lisp_gpe.api index 3956b97d..2a79bece 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.api +++ b/src/vnet/lisp-gpe/lisp_gpe.api @@ -13,6 +13,18 @@ * limitations under the License. */ +/** \brief LISP locator structure + @param is_ip4 - whether addr is IPv4 or v6 + @param weight - locator weight + @param addr - IPv4/6 address +*/ +typeonly manual_print manual_endian define lisp_gpe_locator +{ + u8 is_ip4; + u8 weight; + u8 addr[16]; +}; + /** \brief add or delete lisp gpe tunnel @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -27,12 +39,11 @@ @param lcl_len - local prefix len @param vni - virtual network identifier @param dp_table - vrf/bridge domain id - @param loc_num - number of locators - @param lcl_locs - array of local locators - @param rmt_locs - array of remote locators @param action - negative action when 0 locators configured + @param loc_num - number of locators + @param locs - array of remote locators */ -define lisp_gpe_add_del_fwd_entry +manual_print manual_endian define lisp_gpe_add_del_fwd_entry { u32 client_index; u32 context; @@ -44,10 +55,9 @@ define lisp_gpe_add_del_fwd_entry u8 lcl_len; u32 vni; u32 dp_table; - u32 loc_num; - u8 lcl_locs[loc_num]; - u8 rmt_locs[loc_num]; u8 action; + u32 loc_num; + vl_api_lisp_gpe_locator_t locs[loc_num]; }; /** \brief Reply for gpe_fwd_entry add/del @@ -107,37 +117,9 @@ define lisp_gpe_add_del_iface_reply i32 retval; }; -define lisp_gpe_tunnel_details -{ - u32 context; - u32 tunnels; - u8 is_ipv6; - u8 source_ip[16]; - u8 destination_ip[16]; - u32 encap_fib_id; - u32 decap_fib_id; - u32 dcap_next; - u8 lisp_ver; - u8 next_protocol; - u8 flags; - u8 ver_res; - u8 res; - u32 iid; -}; - -/** \brief Request for gpe tunnel summary status - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - */ -define lisp_gpe_tunnel_dump -{ - u32 client_index; - u32 context; -}; - /* * Local Variables: * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/lisp-gpe/lisp_gpe.c b/src/vnet/lisp-gpe/lisp_gpe.c index fbda8687..3fd78c6a 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.c +++ b/src/vnet/lisp-gpe/lisp_gpe.c @@ -38,11 +38,15 @@ lisp_gpe_add_del_fwd_entry_command_fn (vlib_main_t * vm, ip_address_t lloc, rloc; clib_error_t *error = 0; gid_address_t _reid, *reid = &_reid, _leid, *leid = &_leid; - u8 reid_set = 0, leid_set = 0, is_negative = 0, vrf_set = 0, vni_set = 0; - u32 vni, vrf, action = ~0, p, w; + u8 reid_set = 0, leid_set = 0, is_negative = 0, dp_table_set = 0, + vni_set = 0; + u32 vni = 0, dp_table = 0, action = ~0, w; locator_pair_t pair, *pairs = 0; int rv; + memset (leid, 0, sizeof (*leid)); + memset (reid, 0, sizeof (*reid)); + /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -67,46 +71,46 @@ lisp_gpe_add_del_fwd_entry_command_fn (vlib_main_t * vm, gid_address_vni (reid) = vni; vni_set = 1; } - else if (unformat (line_input, "vrf %u", &vrf)) + else if (unformat (line_input, "vrf %u", &dp_table)) { - vrf_set = 1; + dp_table_set = 1; } - else if (unformat (line_input, "bd %u", &vrf)) + else if (unformat (line_input, "bd %u", &dp_table)) { - vrf_set = 1; + dp_table_set = 1; } else if (unformat (line_input, "negative action %U", unformat_negative_mapping_action, &action)) { is_negative = 1; } - else if (unformat (line_input, "loc-pair %U %U p %d w %d", + else if (unformat (line_input, "loc-pair %U %U w %d", unformat_ip_address, &lloc, - unformat_ip_address, &rloc, &p, &w)) + unformat_ip_address, &rloc, &w)) { pair.lcl_loc = lloc; pair.rmt_loc = rloc; - pair.priority = p; pair.weight = w; vec_add1 (pairs, pair); } else { error = unformat_parse_error (line_input); + vlib_cli_output (vm, "parse error: '%U'", + format_unformat_error, line_input); goto done; } } - unformat_free (line_input); - if (!vni_set || !vrf_set) + if (!vni_set || !dp_table_set) { - error = clib_error_return (0, "vni and vrf must be set!"); + vlib_cli_output (vm, "vni and vrf/bd must be set!"); goto done; } if (!reid_set) { - error = clib_error_return (0, "remote eid must be set!"); + vlib_cli_output (vm, "remote eid must be set!"); goto done; } @@ -114,7 +118,7 @@ lisp_gpe_add_del_fwd_entry_command_fn (vlib_main_t * vm, { if (~0 == action) { - error = clib_error_return (0, "no action set for negative tunnel!"); + vlib_cli_output (vm, "no action set for negative tunnel!"); goto done; } } @@ -122,7 +126,7 @@ lisp_gpe_add_del_fwd_entry_command_fn (vlib_main_t * vm, { if (vec_len (pairs) == 0) { - error = clib_error_return (0, "expected ip4/ip6 locators."); + vlib_cli_output (vm, "expected ip4/ip6 locators"); goto done; } } @@ -142,7 +146,7 @@ lisp_gpe_add_del_fwd_entry_command_fn (vlib_main_t * vm, a->is_add = is_add; a->is_negative = is_negative; a->vni = vni; - a->table_id = vrf; + a->table_id = dp_table; gid_address_copy (&a->lcl_eid, leid); gid_address_copy (&a->rmt_eid, reid); a->locator_pairs = pairs; @@ -150,11 +154,12 @@ lisp_gpe_add_del_fwd_entry_command_fn (vlib_main_t * vm, rv = vnet_lisp_gpe_add_del_fwd_entry (a, 0); if (0 != rv) { - error = clib_error_return (0, "failed to %s gpe tunnel!", - is_add ? "add" : "delete"); + vlib_cli_output (vm, "failed to %s gpe tunnel!", + is_add ? "add" : "delete"); } done: + unformat_free (line_input); vec_free (pairs); return error; } @@ -162,8 +167,8 @@ done: /* *INDENT-OFF* */ VLIB_CLI_COMMAND (lisp_gpe_add_del_fwd_entry_command, static) = { .path = "lisp gpe entry", - .short_help = "lisp gpe entry add/del vni vrf [leid ]" - "reid [loc-pair p w ] " + .short_help = "lisp gpe entry add/del vni vrf/bd [leid ]" + "reid [loc-pair w ] " "[negative action ]", .function = lisp_gpe_add_del_fwd_entry_command_fn, }; diff --git a/src/vnet/lisp-gpe/lisp_gpe_api.c b/src/vnet/lisp-gpe/lisp_gpe_api.c index 176ded50..93b60532 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_api.c +++ b/src/vnet/lisp-gpe/lisp_gpe_api.c @@ -28,6 +28,11 @@ #include +#define vl_api_lisp_gpe_locator_pair_t_endian vl_noop_handler +#define vl_api_lisp_gpe_locator_pair_t_print vl_noop_handler +#define vl_api_lisp_gpe_add_del_fwd_entry_t_endian vl_noop_handler +#define vl_api_lisp_gpe_add_del_fwd_entry_t_print vl_noop_handler + #define vl_typedefs /* define message structures */ #include #undef vl_typedefs @@ -47,43 +52,33 @@ #define foreach_vpe_api_msg \ _(LISP_GPE_ADD_DEL_FWD_ENTRY, lisp_gpe_add_del_fwd_entry) \ _(LISP_GPE_ENABLE_DISABLE, lisp_gpe_enable_disable) \ -_(LISP_GPE_ADD_DEL_IFACE, lisp_gpe_add_del_iface) \ -_(LISP_GPE_TUNNEL_DUMP, lisp_gpe_tunnel_dump) - -/** Used for transferring locators via VPP API */ -/* *INDENT-OFF* */ -typedef CLIB_PACKED (struct { - u8 is_ip4; /**< is locator an IPv4 address */ - u8 priority; /**< locator priority */ - u8 weight; /**< locator weight */ - u8 addr[16]; /**< IPv4/IPv6 address */ -}) rloc_t; -/* *INDENT-ON* */ +_(LISP_GPE_ADD_DEL_IFACE, lisp_gpe_add_del_iface) static locator_pair_t * -unformat_lisp_loc_pairs (void *lcl_locs, void *rmt_locs, u32 rloc_num) +unformat_lisp_loc_pairs (void *locs, u32 rloc_num) { u32 i; - locator_pair_t *pairs = 0, pair; - rloc_t *r; + locator_pair_t *pairs = 0, pair, *p; + vl_api_lisp_gpe_locator_t *r; for (i = 0; i < rloc_num; i++) { /* local locator */ - r = &((rloc_t *) lcl_locs)[i]; - memset (&pair.lcl_loc, 0, sizeof (pair.lcl_loc)); + r = &((vl_api_lisp_gpe_locator_t *) locs)[i]; + memset (&pair, 0, sizeof (pair)); ip_address_set (&pair.lcl_loc, &r->addr, r->is_ip4 ? IP4 : IP6); - /* remote locators */ - r = &((rloc_t *) rmt_locs)[i]; - memset (&pair.rmt_loc, 0, sizeof (pair.rmt_loc)); - ip_address_set (&pair.rmt_loc, &r->addr, r->is_ip4 ? IP4 : IP6); - - pair.priority = r->priority; pair.weight = r->weight; - vec_add1 (pairs, pair); } + + for (i = rloc_num; i < rloc_num * 2; i++) + { + /* remote locators */ + r = &((vl_api_lisp_gpe_locator_t *) locs)[i]; + p = &pairs[i - rloc_num]; + ip_address_set (&p->rmt_loc, &r->addr, r->is_ip4 ? IP4 : IP6); + } return pairs; } @@ -119,6 +114,15 @@ unformat_lisp_eid_api (gid_address_t * dst, u32 vni, u8 type, void *src, return 0; } +static void + lisp_gpe_add_del_fwd_entry_t_net_to_host + (vl_api_lisp_gpe_add_del_fwd_entry_t * mp) +{ + mp->vni = clib_net_to_host_u32 (mp->vni); + mp->dp_table = clib_net_to_host_u32 (mp->dp_table); + mp->loc_num = clib_net_to_host_u32 (mp->loc_num); +} + static void vl_api_lisp_gpe_add_del_fwd_entry_t_handler (vl_api_lisp_gpe_add_del_fwd_entry_t * mp) @@ -128,6 +132,7 @@ static void locator_pair_t *pairs = 0; int rv = 0; + lisp_gpe_add_del_fwd_entry_t_net_to_host (mp); memset (a, 0, sizeof (a[0])); rv = unformat_lisp_eid_api (&a->rmt_eid, mp->vni, mp->eid_type, @@ -135,7 +140,12 @@ static void rv |= unformat_lisp_eid_api (&a->lcl_eid, mp->vni, mp->eid_type, mp->lcl_eid, mp->lcl_len); - pairs = unformat_lisp_loc_pairs (mp->lcl_locs, mp->rmt_locs, mp->loc_num); + if (mp->loc_num % 2 != 0) + { + rv = -1; + goto send_reply; + } + pairs = unformat_lisp_loc_pairs (mp->locs, mp->loc_num / 2); if (rv || 0 == pairs) goto send_reply; @@ -198,59 +208,6 @@ vl_api_lisp_gpe_add_del_iface_t_handler (vl_api_lisp_gpe_add_del_iface_t * mp) REPLY_MACRO (VL_API_LISP_GPE_ADD_DEL_IFACE_REPLY); } -static void -send_lisp_gpe_fwd_entry_details (lisp_gpe_fwd_entry_t * lfe, - unix_shared_memory_queue_t * q, u32 context) -{ - vl_api_lisp_gpe_tunnel_details_t *rmp; - lisp_gpe_main_t *lgm = &lisp_gpe_main; - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_LISP_GPE_TUNNEL_DETAILS); - - rmp->tunnels = lfe - lgm->lisp_fwd_entry_pool; - - rmp->is_ipv6 = ip_prefix_version (&(lfe->key->rmt.ippref)) == IP6 ? 1 : 0; - ip_address_copy_addr (rmp->source_ip, - &ip_prefix_addr (&(lfe->key->rmt.ippref))); - ip_address_copy_addr (rmp->destination_ip, - &ip_prefix_addr (&(lfe->key->rmt.ippref))); - - rmp->encap_fib_id = htonl (0); - rmp->decap_fib_id = htonl (lfe->eid_fib_index); - rmp->iid = htonl (lfe->key->vni); - rmp->context = context; - - vl_msg_api_send_shmem (q, (u8 *) & rmp); -} - -static void -vl_api_lisp_gpe_tunnel_dump_t_handler (vl_api_lisp_gpe_tunnel_dump_t * mp) -{ - unix_shared_memory_queue_t *q = NULL; - lisp_gpe_main_t *lgm = &lisp_gpe_main; - lisp_gpe_fwd_entry_t *lfe = NULL; - - if (pool_elts (lgm->lisp_fwd_entry_pool) == 0) - { - return; - } - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - { - return; - } - - /* *INDENT-OFF* */ - pool_foreach(lfe, lgm->lisp_fwd_entry_pool, - ({ - send_lisp_gpe_fwd_entry_details(lfe, q, mp->context); - })); - /* *INDENT-ON* */ -} - /* * lisp_gpe_api_hookup * Add vpe's API message handlers to the table. diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c index 26a93a87..e05f0e01 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c +++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c @@ -913,7 +913,7 @@ format_lisp_fwd_path (u8 * s, va_list ap) { lisp_fwd_path_t *lfp = va_arg (ap, lisp_fwd_path_t *); - s = format (s, "priority:%d weight:%d ", lfp->priority, lfp->weight); + s = format (s, "weight:%d ", lfp->weight); s = format (s, "adj:[%U]\n", format_lisp_gpe_adjacency, lisp_gpe_adjacency_get (lfp->lisp_adj), diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index f14a031d..4cbb7082 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -2948,8 +2948,7 @@ _(lisp_map_resolver_dump) \ _(lisp_map_server_dump) \ _(show_lisp_rloc_probe_state) \ _(show_lisp_map_register_state) \ -_(show_lisp_map_request_mode) \ -_(lisp_gpe_tunnel_dump) +_(show_lisp_map_request_mode) #define _(f) \ static void * vl_api_ ## f ## _t_print \ @@ -3094,7 +3093,6 @@ _(LISP_ADD_DEL_LOCATOR, lisp_add_del_locator) \ _(LISP_EID_TABLE_DUMP, lisp_eid_table_dump) \ _(LISP_EID_TABLE_MAP_DUMP, lisp_eid_table_map_dump) \ _(LISP_EID_TABLE_VNI_DUMP, lisp_eid_table_vni_dump) \ -_(LISP_GPE_TUNNEL_DUMP, lisp_gpe_tunnel_dump) \ _(LISP_MAP_RESOLVER_DUMP, lisp_map_resolver_dump) \ _(LISP_MAP_SERVER_DUMP, lisp_map_server_dump) \ _(LISP_LOCATOR_SET_DUMP, lisp_locator_set_dump) \ -- cgit 1.2.3-korg From 2feaffcb4af8e311b56328015bcfd82f5b15626c Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Sat, 14 Jan 2017 10:30:50 -0500 Subject: Provision linux stack ip4 and ip6 addresses for tap interfaces To simplify system configuration. Converted existing code to use an argument structure, instead of [one or two too many] discrete parameters. Change-Id: I3eddfa74eeed918c1b04a6285fba494651594332 Signed-off-by: Dave Barach --- src/vat/api_format.c | 25 +++++ src/vnet/api_errno.h | 2 +- src/vnet/unix/tap.api | 6 ++ src/vnet/unix/tap_api.c | 43 ++++++-- src/vnet/unix/tapcli.c | 258 +++++++++++++++++++++++++++++++++------------- src/vnet/unix/tuntap.h | 51 +++++++-- src/vpp/api/custom_dump.c | 7 +- 7 files changed, 301 insertions(+), 91 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 176fe836..1babaf40 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -5790,6 +5790,12 @@ api_tap_connect (vat_main_t * vam) u8 name_set = 0; u8 *tap_name; u8 *tag = 0; + ip4_address_t ip4_address; + u32 ip4_mask_width; + int ip4_address_set = 0; + ip6_address_t ip6_address; + u32 ip6_mask_width; + int ip6_address_set = 0; memset (mac_address, 0, sizeof (mac_address)); @@ -5806,6 +5812,12 @@ api_tap_connect (vat_main_t * vam) name_set = 1; else if (unformat (i, "tag %s", &tag)) ; + else if (unformat (i, "address %U/%d", + unformat_ip4_address, &ip4_address, &ip4_mask_width)) + ip4_address_set = 1; + else if (unformat (i, "address %U/%d", + unformat_ip6_address, &ip6_address, &ip6_mask_width)) + ip6_address_set = 1; else break; } @@ -5837,6 +5849,19 @@ api_tap_connect (vat_main_t * vam) if (tag) clib_memcpy (mp->tag, tag, vec_len (tag)); + if (ip4_address_set) + { + mp->ip4_address_set = 1; + clib_memcpy (mp->ip4_address, &ip4_address, sizeof (mp->ip4_address)); + mp->ip4_mask_width = ip4_mask_width; + } + if (ip6_address_set) + { + mp->ip6_address_set = 1; + clib_memcpy (mp->ip6_address, &ip6_address, sizeof (mp->ip6_address)); + mp->ip6_mask_width = ip6_mask_width; + } + vec_free (tap_name); vec_free (tag); diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 65e3e591..7166da67 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -35,7 +35,7 @@ _(SYSCALL_ERROR_6, -16, "System call error #6") \ _(SYSCALL_ERROR_7, -17, "System call error #7") \ _(SYSCALL_ERROR_8, -18, "System call error #8") \ _(SYSCALL_ERROR_9, -19, "System call error #9") \ -_(SYSCALL_ERROR_10, -20, "System call error #9") \ +_(SYSCALL_ERROR_10, -20, "System call error #10") \ _(FEATURE_DISABLED, -30, "Feature disabled by configuration") \ _(INVALID_REGISTRATION, -31, "Invalid registration") \ _(NEXT_HOP_NOT_IN_FIB, -50, "Next hop not in FIB") \ diff --git a/src/vnet/unix/tap.api b/src/vnet/unix/tap.api index 9b16eadb..1fd0bb09 100644 --- a/src/vnet/unix/tap.api +++ b/src/vnet/unix/tap.api @@ -35,6 +35,12 @@ define tap_connect u8 mac_address[6]; u8 renumber; u32 custom_dev_instance; + u8 ip4_address_set; + u8 ip4_address[4]; + u8 ip4_mask_width; + u8 ip6_address_set; + u8 ip6_address[16]; + u8 ip6_mask_width; u8 tag[64]; }; diff --git a/src/vnet/unix/tap_api.c b/src/vnet/unix/tap_api.c index 99b79ba2..9b8d52a6 100644 --- a/src/vnet/unix/tap_api.c +++ b/src/vnet/unix/tap_api.c @@ -86,11 +86,30 @@ vl_api_tap_connect_t_handler (vl_api_tap_connect_t * mp) unix_shared_memory_queue_t *q; u32 sw_if_index = (u32) ~ 0; u8 *tag; + vnet_tap_connect_args_t _a, *ap = &_a; - rv = vnet_tap_connect_renumber (vm, mp->tap_name, - mp->use_random_mac ? 0 : mp->mac_address, - &sw_if_index, mp->renumber, - ntohl (mp->custom_dev_instance)); + memset (ap, 0, sizeof (*ap)); + + ap->intfc_name = mp->tap_name; + if (!mp->use_random_mac) + ap->hwaddr_arg = mp->mac_address; + ap->renumber = mp->renumber; + ap->sw_if_indexp = &sw_if_index; + ap->custom_dev_instance = ntohl (mp->custom_dev_instance); + if (mp->ip4_address_set) + { + ap->ip4_address = (ip4_address_t *) mp->ip4_address; + ap->ip4_mask_width = mp->ip4_mask_width; + ap->ip4_address_set = 1; + } + if (mp->ip6_address_set) + { + ap->ip6_address = (ip6_address_t *) mp->ip6_address; + ap->ip6_mask_width = mp->ip6_mask_width; + ap->ip6_address_set = 1; + } + + rv = vnet_tap_connect_renumber (vm, ap); /* Add tag if supplied */ if (rv == 0 && mp->tag[0]) @@ -121,11 +140,19 @@ vl_api_tap_modify_t_handler (vl_api_tap_modify_t * mp) unix_shared_memory_queue_t *q; u32 sw_if_index = (u32) ~ 0; vlib_main_t *vm = vlib_get_main (); + vnet_tap_connect_args_t _a, *ap = &_a; + + memset (ap, 0, sizeof (*ap)); + + ap->orig_sw_if_index = ntohl (mp->sw_if_index); + ap->intfc_name = mp->tap_name; + if (!mp->use_random_mac) + ap->hwaddr_arg = mp->mac_address; + ap->sw_if_indexp = &sw_if_index; + ap->renumber = mp->renumber; + ap->custom_dev_instance = ntohl (mp->custom_dev_instance); - rv = vnet_tap_modify (vm, ntohl (mp->sw_if_index), mp->tap_name, - mp->use_random_mac ? 0 : mp->mac_address, - &sw_if_index, mp->renumber, - ntohl (mp->custom_dev_instance)); + rv = vnet_tap_modify (vm, ap); q = vl_api_client_index_to_input_queue (mp->client_index); if (!q) diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index 9862a2bd..2d3082cb 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -41,6 +41,7 @@ #include #include +#include #include static vnet_device_class_t tapcli_dev_class; @@ -780,19 +781,23 @@ static tapcli_interface_t *tapcli_get_new_tapif() return ti; } +typedef struct +{ + ip6_address_t addr; + u32 mask_width; + unsigned int ifindex; +} ip6_ifreq_t; + /** * @brief Connect a TAP interface * * @param vm - vlib_main_t - * @param intfc_name - u8 - * @param hwaddr_arg - u8 - * @param sw_if_indexp - u32 + * @param ap - vnet_tap_connect_args_t * * @return rc - int * */ -int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, u8 *hwaddr_arg, - u32 * sw_if_indexp) +int vnet_tap_connect (vlib_main_t * vm, vnet_tap_connect_args_t *ap) { tapcli_main_t * tm = &tapcli_main; tapcli_interface_t * ti = NULL; @@ -815,7 +820,7 @@ int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, u8 *hwaddr_arg, return VNET_API_ERROR_SYSCALL_ERROR_1; memset (&ifr, 0, sizeof (ifr)); - strncpy(ifr.ifr_name, (char *) intfc_name, sizeof (ifr.ifr_name)-1); + strncpy(ifr.ifr_name, (char *) ap->intfc_name, sizeof (ifr.ifr_name)-1); ifr.ifr_flags = flags; if (ioctl (dev_net_tun_fd, TUNSETIFF, (void *)&ifr) < 0) { @@ -837,7 +842,7 @@ int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, u8 *hwaddr_arg, struct sockaddr_ll sll; memset (&ifr, 0, sizeof(ifr)); - strncpy (ifr.ifr_name, (char *) intfc_name, sizeof (ifr.ifr_name)-1); + strncpy (ifr.ifr_name, (char *) ap->intfc_name, sizeof (ifr.ifr_name)-1); if (ioctl (dev_tap_fd, SIOCGIFINDEX, &ifr) < 0 ) { rv = VNET_API_ERROR_SYSCALL_ERROR_4; @@ -888,11 +893,84 @@ int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, u8 *hwaddr_arg, goto error; } + if (ap->ip4_address_set) + { + struct sockaddr_in sin; + /* ip4: mask defaults to /24 */ + u32 mask = clib_host_to_net_u32 (0xFFFFFF00); + + sin.sin_family = AF_INET; + sin.sin_port = 0; + sin.sin_addr.s_addr = ap->ip4_address->as_u32; + memcpy (&ifr.ifr_ifru.ifru_addr, &sin, sizeof (sin)); + + if (ioctl (dev_tap_fd, SIOCSIFADDR, &ifr) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_10; + goto error; + } + + if (ap->ip4_mask_width > 0 && ap->ip4_mask_width < 33) + { + mask = ~0; + mask <<= (32 - ap->ip4_mask_width); + } + + mask = clib_host_to_net_u32(mask); + sin.sin_family = AF_INET; + sin.sin_port = 0; + sin.sin_addr.s_addr = mask; + memcpy (&ifr.ifr_ifru.ifru_addr, &sin, sizeof (sin)); + + if (ioctl (dev_tap_fd, SIOCSIFNETMASK, &ifr) < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_10; + goto error; + } + } + + if (ap->ip6_address_set) + { + struct ifreq ifr2; + ip6_ifreq_t ifr6; + int sockfd6; + + sockfd6 = socket(AF_INET6, SOCK_DGRAM, IPPROTO_IP); + if (sockfd6 < 0) + { + rv = VNET_API_ERROR_SYSCALL_ERROR_10; + goto error; + } + + memset (&ifr2, 0, sizeof(ifr)); + strncpy (ifr2.ifr_name, (char *) ap->intfc_name, + sizeof (ifr2.ifr_name)-1); + if (ioctl (sockfd6, SIOCGIFINDEX, &ifr2) < 0 ) + { + close (sockfd6); + rv = VNET_API_ERROR_SYSCALL_ERROR_4; + goto error; + } + + memcpy (&ifr6.addr, ap->ip6_address, sizeof (ip6_address_t)); + ifr6.mask_width = ap->ip6_mask_width; + ifr6.ifindex = ifr2.ifr_ifindex; + + if (ioctl (sockfd6, SIOCSIFADDR, &ifr6) < 0) + { + close (sockfd6); + clib_unix_warning ("ifr6"); + rv = VNET_API_ERROR_SYSCALL_ERROR_10; + goto error; + } + close (sockfd6); + } + ti = tapcli_get_new_tapif(); ti->per_interface_next_index = ~0; - if (hwaddr_arg != 0) - clib_memcpy(hwaddr, hwaddr_arg, 6); + if (ap->hwaddr_arg != 0) + clib_memcpy(hwaddr, ap->hwaddr_arg, 6); else { f64 now = vlib_time_now(vm); @@ -937,8 +1015,8 @@ int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, u8 *hwaddr_arg, hw->max_supported_packet_bytes = TAP_MTU_MAX; hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] = hw->max_supported_packet_bytes - sizeof(ethernet_header_t); ti->sw_if_index = hw->sw_if_index; - if (sw_if_indexp) - *sw_if_indexp = hw->sw_if_index; + if (ap->sw_if_indexp) + *(ap->sw_if_indexp) = hw->sw_if_index; } ti->active = 1; @@ -972,16 +1050,15 @@ int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, u8 *hwaddr_arg, * @return rc - int * */ -int vnet_tap_connect_renumber (vlib_main_t * vm, u8 * intfc_name, - u8 *hwaddr_arg, u32 * sw_if_indexp, - u8 renumber, u32 custom_dev_instance) +int vnet_tap_connect_renumber (vlib_main_t * vm, + vnet_tap_connect_args_t *ap) { - int rv = vnet_tap_connect(vm, intfc_name, hwaddr_arg, sw_if_indexp); + int rv = vnet_tap_connect(vm, ap); - if (!rv && renumber) - vnet_interface_name_renumber (*sw_if_indexp, custom_dev_instance); + if (!rv && ap->renumber) + vnet_interface_name_renumber (*(ap->sw_if_indexp), ap->custom_dev_instance); - return rv; + return rv; } /** @@ -1118,18 +1195,14 @@ VLIB_CLI_COMMAND (tap_delete_command, static) = { * @return rc - int * */ -int vnet_tap_modify (vlib_main_t * vm, u32 orig_sw_if_index, - u8 * intfc_name, u8 *hwaddr_arg, - u32 * sw_if_indexp, - u8 renumber, u32 custom_dev_instance) +int vnet_tap_modify (vlib_main_t * vm, vnet_tap_connect_args_t *ap) { - int rv = vnet_tap_delete (vm, orig_sw_if_index); + int rv = vnet_tap_delete (vm, ap->orig_sw_if_index); if (rv) - return rv; + return rv; - rv = vnet_tap_connect_renumber(vm, intfc_name, hwaddr_arg, sw_if_indexp, - renumber, custom_dev_instance); + rv = vnet_tap_connect_renumber(vm, ap); return rv; } @@ -1155,6 +1228,7 @@ tap_modify_command_fn (vlib_main_t * vm, u32 new_sw_if_index = ~0; int user_hwaddr = 0; u8 hwaddr[6]; + vnet_tap_connect_args_t _a, *ap= &_a; if (tm->is_disabled) { @@ -1179,14 +1253,19 @@ tap_modify_command_fn (vlib_main_t * vm, user_hwaddr = 1; - int rc = vnet_tap_modify (vm, sw_if_index, intfc_name, - (user_hwaddr == 1 ? hwaddr : 0), - &new_sw_if_index, 0, 0); + memset (ap, 0, sizeof(*ap)); + ap->orig_sw_if_index = sw_if_index; + ap->intfc_name = intfc_name; + ap->sw_if_indexp = &new_sw_if_index; + if (user_hwaddr) + ap->hwaddr_arg = hwaddr; + + int rc = vnet_tap_modify (vm, ap); if (!rc) { vlib_cli_output (vm, "Modified %U for Linux tap '%s'", format_vnet_sw_if_index_name, tm->vnet_main, - new_sw_if_index, intfc_name); + *(ap->sw_if_indexp), ap->intfc_name); } else { vlib_cli_output (vm, "Error during modification of tap interface. (rc: %d)", rc); } @@ -1216,82 +1295,117 @@ tap_connect_command_fn (vlib_main_t * vm, vlib_cli_command_t * cmd) { u8 * intfc_name; + unformat_input_t _line_input, *line_input = &_line_input; + vnet_tap_connect_args_t _a, *ap= &_a; tapcli_main_t * tm = &tapcli_main; u8 hwaddr[6]; u8 *hwaddr_arg = 0; u32 sw_if_index; + ip4_address_t ip4_address; + int ip4_address_set = 0; + ip6_address_t ip6_address; + int ip6_address_set = 0; + u32 ip4_mask_width = 0; + u32 ip6_mask_width = 0; if (tm->is_disabled) + return clib_error_return (0, "device disabled..."); + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - return clib_error_return (0, "device disabled..."); + if (unformat(line_input, "hwaddr %U", unformat_ethernet_address, + &hwaddr)) + hwaddr_arg = hwaddr; + + /* It is here for backward compatibility */ + else if (unformat(line_input, "hwaddr random")) + ; + + else if (unformat (line_input, "address %U/%d", + unformat_ip4_address, &ip4_address, &ip4_mask_width)) + ip4_address_set = 1; + + else if (unformat (line_input, "address %U/%d", + unformat_ip6_address, &ip6_address, &ip6_mask_width)) + ip6_address_set = 1; + + else if (unformat (line_input, "%s", &intfc_name)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); } - - if (unformat (input, "%s", &intfc_name)) - ; - else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - if (unformat(input, "hwaddr %U", unformat_ethernet_address, - &hwaddr)) - hwaddr_arg = hwaddr; + memset (ap, 0, sizeof (*ap)); - /* It is here for backward compatibility */ - if (unformat(input, "hwaddr random")) - ; + ap->intfc_name = intfc_name; + ap->hwaddr_arg = hwaddr_arg; + if (ip4_address_set) + { + ap->ip4_address = &ip4_address; + ap->ip4_mask_width = ip4_mask_width; + ap->ip4_address_set = 1; + } + if (ip6_address_set) + { + ap->ip6_address = &ip6_address; + ap->ip6_mask_width = ip6_mask_width; + ap->ip6_address_set = 1; + } - int rv = vnet_tap_connect(vm, intfc_name, hwaddr_arg, &sw_if_index); - if (rv) { - switch (rv) { - case VNET_API_ERROR_SYSCALL_ERROR_1: - vlib_cli_output (vm, "Couldn't open /dev/net/tun"); - break; + ap->sw_if_indexp = &sw_if_index; + int rv = vnet_tap_connect(vm, ap); + + switch (rv) + { + case VNET_API_ERROR_SYSCALL_ERROR_1: + return clib_error_return (0, "Couldn't open /dev/net/tun"); + case VNET_API_ERROR_SYSCALL_ERROR_2: - vlib_cli_output (vm, "Error setting flags on '%s'", intfc_name); - break; + return clib_error_return (0, "Error setting flags on '%s'", intfc_name); case VNET_API_ERROR_SYSCALL_ERROR_3: - vlib_cli_output (vm, "Couldn't open provisioning socket"); - break; + return clib_error_return (0, "Couldn't open provisioning socket"); case VNET_API_ERROR_SYSCALL_ERROR_4: - vlib_cli_output (vm, "Couldn't get if_index"); - break; + return clib_error_return (0, "Couldn't get if_index"); case VNET_API_ERROR_SYSCALL_ERROR_5: - vlib_cli_output (vm, "Couldn't bind provisioning socket"); - break; + return clib_error_return (0, "Couldn't bind provisioning socket"); case VNET_API_ERROR_SYSCALL_ERROR_6: - vlib_cli_output (0, "Couldn't set device non-blocking flag"); - break; + return clib_error_return (0, "Couldn't set device non-blocking flag"); case VNET_API_ERROR_SYSCALL_ERROR_7: - vlib_cli_output (0, "Couldn't set device MTU"); - break; + return clib_error_return (0, "Couldn't set device MTU"); case VNET_API_ERROR_SYSCALL_ERROR_8: - vlib_cli_output (0, "Couldn't get interface flags"); - break; + return clib_error_return (0, "Couldn't get interface flags"); case VNET_API_ERROR_SYSCALL_ERROR_9: - vlib_cli_output (0, "Couldn't set intfc admin state up"); - break; + return clib_error_return (0, "Couldn't set intfc admin state up"); + + case VNET_API_ERROR_SYSCALL_ERROR_10: + return clib_error_return (0, "Couldn't set intfc address/mask"); case VNET_API_ERROR_INVALID_REGISTRATION: - vlib_cli_output (0, "Invalid registration"); + return clib_error_return (0, "Invalid registration"); + + case 0: break; + default: - vlib_cli_output (0, "Unknown error: %d", rv); - break; + return clib_error_return (0, "Unknown error: %d", rv); } - return 0; - } - vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index); + vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, + vnet_get_main(), sw_if_index); return 0; - } +} VLIB_CLI_COMMAND (tap_connect_command, static) = { .path = "tap connect", diff --git a/src/vnet/unix/tuntap.h b/src/vnet/unix/tuntap.h index d7f96cae..7c2d5510 100644 --- a/src/vnet/unix/tuntap.h +++ b/src/vnet/unix/tuntap.h @@ -22,15 +22,48 @@ */ void register_tuntap_inject_node_name (char *name); -int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, - u8 *hwaddr_arg, u32 * sw_if_indexp); -int vnet_tap_connect_renumber (vlib_main_t * vm, u8 * intfc_name, - u8 *hwaddr_arg, u32 * sw_if_indexp, - u8 renumber, u32 custom_dev_instance); +/** arguments structure for vnet_tap_connect, vnet_tap_connect_renumber, etc. + */ + +typedef struct +{ + /** Interface name */ + u8 *intfc_name; + /** Mac address */ + u8 *hwaddr_arg; + /** Please set the indicated ip4 address/mask on the interface */ + u8 ip4_address_set; + /** Please set the indicated ip4 address/mask on the interface */ + u8 ip6_address_set; + /** Renumber the (existing) interface */ + u8 renumber; + /** (optional) ip4 address to set */ + ip4_address_t *ip4_address; + /** (optional) ip4 mask width to set */ + u32 ip4_mask_width; + /** (optional) ip6 address to set */ + ip6_address_t *ip6_address; + /** (optional) ip6 mask width to set */ + u32 ip6_mask_width; + /** Output parameter: result sw_if_index */ + u32 *sw_if_indexp; + /** Custom device instance */ + u32 custom_dev_instance; + /** original sw_if_index (renumber) */ + u32 orig_sw_if_index; +} vnet_tap_connect_args_t; + +/** Connect a tap interface */ +int vnet_tap_connect (vlib_main_t * vm, vnet_tap_connect_args_t *args); +/** Connect / renumber a tap interface */ +int vnet_tap_connect_renumber (vlib_main_t * vm, + vnet_tap_connect_args_t *args); + +/** Modify a tap interface */ +int vnet_tap_modify (vlib_main_t * vm, vnet_tap_connect_args_t *args); + +/** delete a tap interface */ int vnet_tap_delete(vlib_main_t *vm, u32 sw_if_index); -int vnet_tap_modify (vlib_main_t * vm, u32 orig_sw_if_index, - u8 * intfc_name, u8 *hwaddr_arg, - u32 * sw_if_indexp, - u8 renumber, u32 custom_dev_instance); + diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 4cbb7082..8f59f5ee 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -438,7 +438,12 @@ static void *vl_api_tap_connect_t_print s = format (s, "tag %s ", mp->tag); if (memcmp (mp->mac_address, null_mac, 6)) s = format (s, "mac %U ", format_ethernet_address, mp->mac_address); - + if (mp->ip4_address_set) + s = format (s, "address %U/%d ", format_ip4_address, mp->ip4_address, + mp->ip4_mask_width); + if (mp->ip6_address_set) + s = format (s, "address %U/%d ", format_ip6_address, mp->ip6_address, + mp->ip6_mask_width); FINISH; } -- cgit 1.2.3-korg From a9a951f8e5ed6e172fbfbdbb6cb690c67fa2f715 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 16 Jan 2017 22:06:10 +0100 Subject: Add --without-libssl configure parameter This replaces --without-ipsec and --without-ipv6sr and allows other parts of the code to be disabled if libssl is not available. Change-Id: Id97ff3685a7924d7f86622952e0405d94ceb5957 Signed-off-by: Damjan Marion --- build-data/platforms/arm32.mk | 4 ++-- build-data/platforms/dpaa2.mk | 8 ++++---- build-data/platforms/qppc.mk | 4 ++-- build-data/platforms/thunder.mk | 4 ++-- src/configure.ac | 6 ++---- src/vnet.am | 6 +++--- src/vnet/ipsec/ipsec_api.c | 24 ++++++++++++------------ src/vnet/ipsec/ipsec_output.c | 2 +- src/vpp/api/api.c | 2 +- 9 files changed, 29 insertions(+), 31 deletions(-) (limited to 'src/vpp/api') diff --git a/build-data/platforms/arm32.mk b/build-data/platforms/arm32.mk index 7b80061b..47d4ad5a 100644 --- a/build-data/platforms/arm32.mk +++ b/build-data/platforms/arm32.mk @@ -22,8 +22,8 @@ arm32_root_packages = vpp vlib vlib-api vnet svm vpp-api-test \ jvpp gmod vlib_configure_args_arm32 = --with-pre-data=128 -vnet_configure_args_arm32 = --with-dpdk --without-ipsec --without-ipv6sr -vpp_configure_args_arm32 = --with-dpdk --without-ipsec --without-ipv6sr +vnet_configure_args_arm32 = --with-dpdk --without-libssl +vpp_configure_args_arm32 = --with-dpdk --without-libssl arm32_dpdk_arch = "armv7a" arm32_dpdk_target = "arm-armv7a-linuxapp-gcc" diff --git a/build-data/platforms/dpaa2.mk b/build-data/platforms/dpaa2.mk index 0ec627a4..2d4745ac 100644 --- a/build-data/platforms/dpaa2.mk +++ b/build-data/platforms/dpaa2.mk @@ -42,10 +42,10 @@ dpaa2_dpdk_make_extra_args = "CROSS=$(dpaa2_target)- DPDK_PKTMBUF_HEADROOM=256" endif endif -vpp_configure_args_dpaa2 = --with-dpdk --without-ipsec \ - --without-ipv6sr --with-sysroot=$(SYSROOT) -vnet_configure_args_dpaa2 = --with-dpdk --without-ipsec \ - --without-ipv6sr --with-sysroot=$(SYSROOT) +vpp_configure_args_dpaa2 = --with-dpdk --without-libssl \ + --with-sysroot=$(SYSROOT) +vnet_configure_args_dpaa2 = --with-dpdk --without-libssl \ + --with-sysroot=$(SYSROOT) # Set these parameters carefully. The vlib_buffer_t is 256 bytes, i.e. vlib_configure_args_dpaa2 = --with-pre-data=256 diff --git a/build-data/platforms/qppc.mk b/build-data/platforms/qppc.mk index 244747e7..983684fc 100644 --- a/build-data/platforms/qppc.mk +++ b/build-data/platforms/qppc.mk @@ -11,10 +11,10 @@ qppc_root_packages = vppinfra vlib vlib-api vnet svm \ vpp vpp-api-test vnet_configure_args_qppc = \ - --without-ipsec --without-ipv6sr + --without-libssl vpp_configure_args_qppc = \ - --without-ipsec --without-ipv6sr + --without-libssl vlib_configure_args_qppc = --with-pre-data=128 diff --git a/build-data/platforms/thunder.mk b/build-data/platforms/thunder.mk index f891f4a1..31b6a510 100644 --- a/build-data/platforms/thunder.mk +++ b/build-data/platforms/thunder.mk @@ -15,10 +15,10 @@ thunder_root_packages = vppinfra vlib-cavium-dpdk vnet-cavium-dpdk cavium-dpdk \ vpp-cavium-dpdk vpp-api-test-cavium-dpdk vnet-cavium-dpdk_configure_args_thunder = \ - --with-dpdk --without-ipsec --without-ipv6sr + --with-dpdk --without-libssl vpp-cavium-dpdk_configure_args_thunder = \ - --with-dpdk --without-ipsec --without-ipv6sr + --with-dpdk --without-libssl cavium-dpdk_configure_args_thunder = --with-headroom=256 diff --git a/src/configure.ac b/src/configure.ac index eb380d8b..b2234448 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -101,8 +101,7 @@ WITH_ARG(dpdk_crypto, [Use DPDK cryptodev]) WITH_ARG(dpdk_mlx5_pmd, [Use DPDK with mlx5 PMD]) # --without-X -WITHOUT_ARG(ipsec, [Disable IPSec]) -WITHOUT_ARG(ipv6sr, [Disable IPv6 SR]) +WITHOUT_ARG(libssl, [Disable libssl]) WITHOUT_ARG(apicli, [Disable binary api CLI]) AC_ARG_WITH(unix, @@ -133,8 +132,7 @@ AC_SUBST(APICLI, [-DVPP_API_TEST_BUILTIN=${n_with_apicli}]) AC_DEFINE_UNQUOTED(DPDK, [${n_with_dpdk}]) AC_DEFINE_UNQUOTED(DPDK_SHARED_LIB, [${n_enable_dpdk_shared}]) AC_DEFINE_UNQUOTED(DPDK_CRYPTO, [${n_with_dpdk_crypto}]) -AC_DEFINE_UNQUOTED(IPSEC, [${n_with_ipsec}]) -AC_DEFINE_UNQUOTED(IPV6SR, [${n_with_ipv6sr}]) +AC_DEFINE_UNQUOTED(WITH_LIBSSL, [${n_with_libssl}]) # Silence following noise: diff --git a/src/vnet.am b/src/vnet.am index 3b2a25e8..93dd1e6c 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -26,7 +26,7 @@ libvnet_la_DEPENDENCIES = \ libvnet_la_LIBADD = $(libvnet_la_DEPENDENCIES) -lm -lpthread -ldl -lrt $(DPDK_LD_ADD) libvnet_la_LDFLAGS = $(DPDK_LD_FLAGS) -if WITH_IPV6SR +if WITH_LIBSSL libvnet_la_LIBADD += -lcrypto endif @@ -372,7 +372,7 @@ API_FILES += vnet/bfd/bfd.api ######################################## # Layer 3 protocol: IPSec ######################################## -if WITH_IPSEC +if WITH_LIBSSL libvnet_la_SOURCES += \ vnet/ipsec/ipsec.c \ vnet/ipsec/ipsec_cli.c \ @@ -673,7 +673,7 @@ nobase_include_HEADERS += \ # ipv6 segment routing ######################################## -if WITH_IPV6SR +if WITH_LIBSSL libvnet_la_SOURCES += \ vnet/sr/sr.c \ vnet/sr/sr_replicate.c \ diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c index 30cc5bd2..9bcf63b4 100644 --- a/src/vnet/ipsec/ipsec_api.c +++ b/src/vnet/ipsec/ipsec_api.c @@ -26,7 +26,7 @@ #include -#if IPSEC > 0 +#if WITH_LIBSSL > 0 #include #include #endif /* IPSEC */ @@ -63,7 +63,7 @@ _(IKEV2_SET_LOCAL_KEY, ikev2_set_local_key) static void vl_api_ipsec_spd_add_del_t_handler (vl_api_ipsec_spd_add_del_t * mp) { -#if IPSEC == 0 +#if WITH_LIBSSL == 0 clib_warning ("unimplemented"); #else @@ -95,7 +95,7 @@ static void vl_api_ipsec_interface_add_del_spd_t_handler VALIDATE_SW_IF_INDEX (mp); -#if IPSEC > 0 +#if WITH_LIBSSL > 0 rv = ipsec_set_interface_spd (vm, sw_if_index, spd_id, mp->is_add); #else rv = VNET_API_ERROR_UNIMPLEMENTED; @@ -113,7 +113,7 @@ static void vl_api_ipsec_spd_add_del_entry_t_handler vl_api_ipsec_spd_add_del_entry_reply_t *rmp; int rv; -#if IPSEC > 0 +#if WITH_LIBSSL > 0 ipsec_policy_t p; memset (&p, 0, sizeof (p)); @@ -176,7 +176,7 @@ static void vl_api_ipsec_sad_add_del_entry_t_handler vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main (); vl_api_ipsec_sad_add_del_entry_reply_t *rmp; int rv; -#if IPSEC > 0 +#if WITH_LIBSSL > 0 ipsec_sa_t sa; memset (&sa, 0, sizeof (sa)); @@ -324,7 +324,7 @@ vl_api_ipsec_spd_dump_t_handler (vl_api_ipsec_spd_dump_t * mp) ipsec_spd_t *spd; uword *p; u32 spd_index; -#if IPSEC > 0 +#if WITH_LIBSSL > 0 q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) return; @@ -355,7 +355,7 @@ vl_api_ipsec_sa_set_key_t_handler (vl_api_ipsec_sa_set_key_t * mp) vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main (); vl_api_ipsec_sa_set_key_reply_t *rmp; int rv; -#if IPSEC > 0 +#if WITH_LIBSSL > 0 ipsec_sa_t sa; sa.id = ntohl (mp->sa_id); sa.crypto_key_len = mp->crypto_key_length; @@ -377,7 +377,7 @@ vl_api_ikev2_profile_add_del_t_handler (vl_api_ikev2_profile_add_del_t * mp) vl_api_ikev2_profile_add_del_reply_t *rmp; int rv = 0; -#if IPSEC > 0 +#if WITH_LIBSSL > 0 vlib_main_t *vm = vlib_get_main (); clib_error_t *error; u8 *tmp = format (0, "%s", mp->name); @@ -399,7 +399,7 @@ static void vl_api_ikev2_profile_set_auth_reply_t *rmp; int rv = 0; -#if IPSEC > 0 +#if WITH_LIBSSL > 0 vlib_main_t *vm = vlib_get_main (); clib_error_t *error; u8 *tmp = format (0, "%s", mp->name); @@ -423,7 +423,7 @@ vl_api_ikev2_profile_set_id_t_handler (vl_api_ikev2_profile_set_id_t * mp) vl_api_ikev2_profile_add_del_reply_t *rmp; int rv = 0; -#if IPSEC > 0 +#if WITH_LIBSSL > 0 vlib_main_t *vm = vlib_get_main (); clib_error_t *error; u8 *tmp = format (0, "%s", mp->name); @@ -447,7 +447,7 @@ vl_api_ikev2_profile_set_ts_t_handler (vl_api_ikev2_profile_set_ts_t * mp) vl_api_ikev2_profile_set_ts_reply_t *rmp; int rv = 0; -#if IPSEC > 0 +#if WITH_LIBSSL > 0 vlib_main_t *vm = vlib_get_main (); clib_error_t *error; u8 *tmp = format (0, "%s", mp->name); @@ -470,7 +470,7 @@ vl_api_ikev2_set_local_key_t_handler (vl_api_ikev2_set_local_key_t * mp) vl_api_ikev2_profile_set_ts_reply_t *rmp; int rv = 0; -#if IPSEC > 0 +#if WITH_LIBSSL > 0 vlib_main_t *vm = vlib_get_main (); clib_error_t *error; diff --git a/src/vnet/ipsec/ipsec_output.c b/src/vnet/ipsec/ipsec_output.c index 97977899..df93b5e4 100644 --- a/src/vnet/ipsec/ipsec_output.c +++ b/src/vnet/ipsec/ipsec_output.c @@ -27,7 +27,7 @@ #define ESP_NODE "esp-encrypt" #endif -#if IPSEC > 0 +#if WITH_LIBSSL > 0 #define foreach_ipsec_output_next \ _(DROP, "error-drop") \ diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 3d6905dd..3afc3383 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -54,7 +54,7 @@ #include #include #include -#if IPV6SR > 0 +#if WITH_LIBSSL > 0 #include #endif #include -- cgit 1.2.3-korg From 8f544964a3df144a441b136c2a01427eca731eea Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Wed, 18 Jan 2017 10:23:22 -0500 Subject: Fix coverity warnings, VPP-608 Change-Id: Ib0144ba3a9a09971d3946c932e8fed6d5c1ad278 Signed-off-by: Dave Barach --- src/plugins/snat/in2out.c | 8 ++++++-- src/plugins/snat/out2in.c | 8 ++++++-- src/vnet/devices/virtio/vhost-user.c | 8 ++++++-- src/vnet/unix/tapcli.c | 5 +++-- src/vpp/api/api_main.c | 10 +++++----- src/vppinfra/bihash_template.c | 11 ++++++----- 6 files changed, 32 insertions(+), 18 deletions(-) (limited to 'src/vpp/api') diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c index 76a6a12c..ba752cf0 100644 --- a/src/plugins/snat/in2out.c +++ b/src/plugins/snat/in2out.c @@ -1232,8 +1232,12 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0)) { /* No, assign next available worker (RR) */ - next_worker_index = sm->first_worker_index + - sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + next_worker_index = sm->first_worker_index; + if (vec_len (sm->workers)) + { + next_worker_index += + sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; + } /* add non-traslated packets worker lookup */ kv0.value = next_worker_index; diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c index f1329733..855e9efb 100644 --- a/src/plugins/snat/out2in.c +++ b/src/plugins/snat/out2in.c @@ -901,8 +901,12 @@ snat_out2in_worker_handoff_fn (vlib_main_t * vm, if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) { /* No, assign next available worker (RR) */ - next_worker_index = sm->first_worker_index + - sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + next_worker_index = sm->first_worker_index; + if (vec_len (sm->workers)) + { + next_worker_index += + sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; + } } else { diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 9a7c1dc0..ac142867 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2326,12 +2326,16 @@ vhost_user_process (vlib_main_t * vm, sizeof (sun.sun_path) - 1); /* Avoid hanging VPP if the other end does not accept */ - fcntl(sockfd, F_SETFL, O_NONBLOCK); + if (fcntl(sockfd, F_SETFL, O_NONBLOCK) < 0) + clib_unix_warning ("fcntl"); + if (connect (sockfd, (struct sockaddr *) &sun, sizeof (struct sockaddr_un)) == 0) { /* Set the socket to blocking as it was before */ - fcntl(sockfd, F_SETFL, 0); + if (fcntl(sockfd, F_SETFL, 0) < 0) + clib_unix_warning ("fcntl2"); + vui->sock_errno = 0; template.file_descriptor = sockfd; template.private_data = diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index 2d3082cb..e9dbf729 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -899,8 +899,9 @@ int vnet_tap_connect (vlib_main_t * vm, vnet_tap_connect_args_t *ap) /* ip4: mask defaults to /24 */ u32 mask = clib_host_to_net_u32 (0xFFFFFF00); + memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; - sin.sin_port = 0; + /* sin.sin_port = 0; */ sin.sin_addr.s_addr = ap->ip4_address->as_u32; memcpy (&ifr.ifr_ifru.ifru_addr, &sin, sizeof (sin)); @@ -1294,7 +1295,7 @@ tap_connect_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - u8 * intfc_name; + u8 * intfc_name = 0; unformat_input_t _line_input, *line_input = &_line_input; vnet_tap_connect_args_t _a, *ap= &_a; tapcli_main_t * tm = &tapcli_main; diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index db532061..fd6998f4 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -139,11 +139,11 @@ api_command_fn (vlib_main_t * vm, "%s error: %U\n", cmdp, format_api_error, vam, rv); - if (vam->regenerate_interface_table) - { - vam->regenerate_interface_table = 0; - api_sw_interface_dump (vam); - } + } + if (vam->regenerate_interface_table) + { + vam->regenerate_interface_table = 0; + api_sw_interface_dump (vam); } unformat_free (vam->input); return 0; diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c index 7c817a20..d8b97b5f 100644 --- a/src/vppinfra/bihash_template.c +++ b/src/vppinfra/bihash_template.c @@ -199,7 +199,7 @@ BV (split_and_rehash_linear) /* Find a free slot in the new linear scan bucket */ for (; j < new_length; j++) { - /* Old value in use? Forget it. */ + /* Old value not in use? Forget it. */ if (BV (clib_bihash_is_free) (&(old_values->kvp[i]))) goto doublebreak; @@ -212,11 +212,12 @@ BV (split_and_rehash_linear) j++; goto doublebreak; } - /* This should never happen... */ - clib_warning ("BUG: linear rehash failed!"); - BV (value_free) (h, new_values); - return 0; } + /* This should never happen... */ + clib_warning ("BUG: linear rehash failed!"); + BV (value_free) (h, new_values); + return 0; + doublebreak:; } return new_values; -- cgit 1.2.3-korg From fe6bdfd84573cd8813a211f9094ee734f088ce16 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Fri, 20 Jan 2017 19:50:09 -0500 Subject: binary-api debug CLI works with plugins Change-Id: I81f33f5153d5afac94b66b5a8cb91da77463af79 Signed-off-by: Dave Barach --- src/examples/sample-plugin/sample/sample_test.c | 37 +--- src/plugins/acl/acl_test.c | 51 +----- src/plugins/flowperpkt/flowperpkt_test.c | 45 +---- .../export-vxlan-gpe/vxlan_gpe_ioam_export_test.c | 46 +---- src/plugins/ioam/export/ioam_export_test.c | 44 +---- src/plugins/ioam/lib-pot/pot_test.c | 48 +---- src/plugins/ioam/lib-trace/trace_test.c | 48 +---- src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c | 56 +----- src/plugins/lb/lb_test.c | 35 +--- src/plugins/snat/snat_test.c | 57 +----- src/vat/api_format.c | 95 ++++------ src/vat/vat.h | 33 +--- src/vlib/unix/plugin.c | 9 +- src/vlibapi/api_helper_macros.h | 1 - src/vlibapi/vat_helper_macros.h | 76 ++++++++ src/vpp-api-test.am | 3 + src/vpp.am | 6 +- src/vpp/api/api_main.c | 39 ++++ src/vpp/api/plugin.c | 201 +++++++++++++++++++++ src/vpp/api/plugin.h | 61 +++++++ 20 files changed, 476 insertions(+), 515 deletions(-) create mode 100644 src/vlibapi/vat_helper_macros.h create mode 100644 src/vpp/api/plugin.c create mode 100644 src/vpp/api/plugin.h (limited to 'src/vpp/api') diff --git a/src/examples/sample-plugin/sample/sample_test.c b/src/examples/sample-plugin/sample/sample_test.c index dd1b0215..a47710ee 100644 --- a/src/examples/sample-plugin/sample/sample_test.c +++ b/src/examples/sample-plugin/sample/sample_test.c @@ -23,6 +23,7 @@ #include #include #include +#include uword unformat_sw_if_index (unformat_input_t * input, va_list * args); @@ -87,42 +88,6 @@ foreach_standard_reply_retval_handler; _(SAMPLE_MACSWAP_ENABLE_DISABLE_REPLY, sample_macswap_enable_disable_reply) -/* M: construct, but don't yet send a message */ - -#define M(T,t) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -#define M2(T,t,n) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -/* S: send a message */ -#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) - -/* W: wait for results, with timeout */ -#define W \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - return (vam->retval); \ - } \ - } \ - return -99; \ -} while(0); - static int api_sample_macswap_enable_disable (vat_main_t * vam) { sample_test_main_t * sm = &sample_test_main; diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c index a0e413e1..5cacf716 100644 --- a/src/plugins/acl/acl_test.c +++ b/src/plugins/acl/acl_test.c @@ -25,6 +25,7 @@ #include #include #include +#include uword unformat_sw_if_index (unformat_input_t * input, va_list * args); @@ -259,42 +260,6 @@ _(MACIP_ACL_INTERFACE_ADD_DEL_REPLY, macip_acl_interface_add_del_reply) \ _(MACIP_ACL_INTERFACE_GET_REPLY, macip_acl_interface_get_reply) \ _(ACL_PLUGIN_GET_VERSION_REPLY, acl_plugin_get_version_reply) -/* M: construct, but don't yet send a message */ - -#define M(T,t) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -#define M2(T,t,n) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -/* S: send a message */ -#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) - -/* W: wait for results, with timeout */ -#define W \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - return (vam->retval); \ - } \ - } \ - return -99; \ -} while(0); - static int api_acl_plugin_get_version (vat_main_t * vam) { acl_test_main_t * sm = &acl_test_main; @@ -520,7 +485,6 @@ static int api_acl_add_replace (vat_main_t * vam) static int api_acl_del (vat_main_t * vam) { - acl_test_main_t * sm = &acl_test_main; unformat_input_t * i = vam->input; f64 timeout; vl_api_acl_del_t * mp; @@ -544,7 +508,6 @@ static int api_acl_del (vat_main_t * vam) static int api_macip_acl_del (vat_main_t * vam) { - acl_test_main_t * sm = &acl_test_main; unformat_input_t * i = vam->input; f64 timeout; vl_api_acl_del_t * mp; @@ -568,7 +531,6 @@ static int api_macip_acl_del (vat_main_t * vam) static int api_acl_interface_add_del (vat_main_t * vam) { - acl_test_main_t * sm = &acl_test_main; unformat_input_t * i = vam->input; f64 timeout; vl_api_acl_interface_add_del_t * mp; @@ -636,7 +598,6 @@ static int api_acl_interface_add_del (vat_main_t * vam) static int api_macip_acl_interface_add_del (vat_main_t * vam) { - acl_test_main_t * sm = &acl_test_main; unformat_input_t * i = vam->input; f64 timeout; vl_api_macip_acl_interface_add_del_t * mp; @@ -687,7 +648,6 @@ static int api_macip_acl_interface_add_del (vat_main_t * vam) static int api_acl_interface_set_acl_list (vat_main_t * vam) { - acl_test_main_t * sm = &acl_test_main; unformat_input_t * i = vam->input; f64 timeout; vl_api_acl_interface_set_acl_list_t * mp; @@ -746,7 +706,6 @@ static int api_acl_interface_set_acl_list (vat_main_t * vam) static int api_acl_interface_list_dump (vat_main_t * vam) { - acl_test_main_t * sm = &acl_test_main; unformat_input_t * i = vam->input; f64 timeout; u32 sw_if_index = ~0; @@ -775,7 +734,6 @@ static int api_acl_interface_list_dump (vat_main_t * vam) static int api_acl_dump (vat_main_t * vam) { - acl_test_main_t * sm = &acl_test_main; unformat_input_t * i = vam->input; f64 timeout; u32 acl_index = ~0; @@ -802,7 +760,6 @@ static int api_acl_dump (vat_main_t * vam) static int api_macip_acl_dump (vat_main_t * vam) { - acl_test_main_t * sm = &acl_test_main; unformat_input_t * i = vam->input; f64 timeout; u32 acl_index = ~0; @@ -978,8 +935,8 @@ _(macip_acl_interface_add_del, " | sw_if_index [add|del] acl msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); if (sm->msg_id_base != (u16) ~0) - vat_api_hookup (vam); + acl_vat_api_hookup (vam); vec_free(name); diff --git a/src/plugins/flowperpkt/flowperpkt_test.c b/src/plugins/flowperpkt/flowperpkt_test.c index 716818ff..9211ebe3 100644 --- a/src/plugins/flowperpkt/flowperpkt_test.c +++ b/src/plugins/flowperpkt/flowperpkt_test.c @@ -19,6 +19,7 @@ #include #include #include +#include /** * @file vpp_api_test plugin @@ -88,47 +89,9 @@ foreach_standard_reply_retval_handler; _(FLOWPERPKT_TX_INTERFACE_ADD_DEL_REPLY, \ flowperpkt_tx_interface_add_del_reply) - -/* M: construct, but don't yet send a message */ - -#define M(T,t) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -#define M2(T,t,n) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -/* S: send a message */ -#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) - -/* W: wait for results, with timeout */ -#define W \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - return (vam->retval); \ - } \ - } \ - return -99; \ -} while(0); - static int api_flowperpkt_tx_interface_add_del (vat_main_t * vam) { - flowperpkt_test_main_t *sm = &flowperpkt_test_main; unformat_input_t *i = vam->input; f64 timeout; int enable_disable = 1; @@ -177,8 +140,8 @@ api_flowperpkt_tx_interface_add_del (vat_main_t * vam) #define foreach_vpe_api_msg \ _(flowperpkt_tx_interface_add_del, " [disable]") -void -vat_api_hookup (vat_main_t * vam) +static void +flowperpkt_vat_api_hookup (vat_main_t * vam) { flowperpkt_test_main_t *sm = &flowperpkt_test_main; /* Hook up handlers for replies from the data plane plug-in */ @@ -218,7 +181,7 @@ vat_plugin_register (vat_main_t * vam) /* Don't attempt to hook up API messages if the data plane plugin is AWOL */ if (sm->msg_id_base != (u16) ~ 0) - vat_api_hookup (vam); + flowperpkt_vat_api_hookup (vam); vec_free (name); diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c index 494263d9..5b641cc7 100644 --- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c @@ -23,7 +23,7 @@ #include #include #include - +#include /* Declare message IDs */ #include @@ -86,47 +86,9 @@ foreach_standard_reply_retval_handler; #define foreach_vpe_api_reply_msg \ _(VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE_REPLY, vxlan_gpe_ioam_export_enable_disable_reply) - -/* M: construct, but don't yet send a message */ - -#define M(T,t) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -#define M2(T,t,n) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -/* S: send a message */ -#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) - -/* W: wait for results, with timeout */ -#define W \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - return (vam->retval); \ - } \ - } \ - return -99; \ -} while(0); - static int api_vxlan_gpe_ioam_export_enable_disable (vat_main_t * vam) { - export_test_main_t *sm = &export_test_main; unformat_input_t *i = vam->input; f64 timeout; int is_disable = 0; @@ -160,8 +122,8 @@ api_vxlan_gpe_ioam_export_enable_disable (vat_main_t * vam) #define foreach_vpe_api_msg \ _(vxlan_gpe_ioam_export_enable_disable, " [disable]") -void -vat_api_hookup (vat_main_t * vam) +static void +vxlan_gpe_ioam_vat_api_hookup (vat_main_t * vam) { export_test_main_t *sm = &export_test_main; /* Hook up handlers for replies from the data plane plug-in */ @@ -199,7 +161,7 @@ vat_plugin_register (vat_main_t * vam) sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); if (sm->msg_id_base != (u16) ~ 0) - vat_api_hookup (vam); + vxlan_gpe_ioam_vat_api_hookup (vam); vec_free (name); diff --git a/src/plugins/ioam/export/ioam_export_test.c b/src/plugins/ioam/export/ioam_export_test.c index f991fc0c..a4ec80d0 100644 --- a/src/plugins/ioam/export/ioam_export_test.c +++ b/src/plugins/ioam/export/ioam_export_test.c @@ -23,6 +23,7 @@ #include #include #include +#include /* Declare message IDs */ @@ -87,46 +88,9 @@ foreach_standard_reply_retval_handler; _(IOAM_EXPORT_IP6_ENABLE_DISABLE_REPLY, ioam_export_ip6_enable_disable_reply) -/* M: construct, but don't yet send a message */ - -#define M(T,t) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -#define M2(T,t,n) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -/* S: send a message */ -#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) - -/* W: wait for results, with timeout */ -#define W \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - return (vam->retval); \ - } \ - } \ - return -99; \ -} while(0); - static int api_ioam_export_ip6_enable_disable (vat_main_t * vam) { - export_test_main_t *sm = &export_test_main; unformat_input_t *i = vam->input; f64 timeout; int is_disable = 0; @@ -159,8 +123,8 @@ api_ioam_export_ip6_enable_disable (vat_main_t * vam) #define foreach_vpe_api_msg \ _(ioam_export_ip6_enable_disable, " [disable]") -void -vat_api_hookup (vat_main_t * vam) +static void +ioam_export_vat_api_hookup (vat_main_t * vam) { export_test_main_t *sm = &export_test_main; /* Hook up handlers for replies from the data plane plug-in */ @@ -198,7 +162,7 @@ vat_plugin_register (vat_main_t * vam) sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); if (sm->msg_id_base != (u16) ~ 0) - vat_api_hookup (vam); + ioam_export_vat_api_hookup (vam); vec_free (name); diff --git a/src/plugins/ioam/lib-pot/pot_test.c b/src/plugins/ioam/lib-pot/pot_test.c index 2e870238..9f9d0c99 100644 --- a/src/plugins/ioam/lib-pot/pot_test.c +++ b/src/plugins/ioam/lib-pot/pot_test.c @@ -23,6 +23,7 @@ #include #include #include +#include /* Declare message IDs */ #include @@ -118,48 +119,9 @@ _(POT_PROFILE_ACTIVATE_REPLY, pot_profile_activate_reply) \ _(POT_PROFILE_DEL_REPLY, pot_profile_del_reply) \ _(POT_PROFILE_SHOW_CONFIG_DETAILS, pot_profile_show_config_details) - -/* M: construct, but don't yet send a message */ - -#define M(T,t) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -#define M2(T,t,n) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -/* S: send a message */ -#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) - -/* W: wait for results, with timeout */ -#define W \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - return (vam->retval); \ - } \ - } \ - return -99; \ -} while(0); - - static int api_pot_profile_add (vat_main_t *vam) { #define MAX_BITS 64 - pot_test_main_t * sm = &pot_test_main; unformat_input_t *input = vam->input; vl_api_pot_profile_add_t *mp; u8 *name = NULL; @@ -234,7 +196,6 @@ OUT: static int api_pot_profile_activate (vat_main_t *vam) { #define MAX_BITS 64 - pot_test_main_t * sm = &pot_test_main; unformat_input_t *input = vam->input; vl_api_pot_profile_activate_t *mp; u8 *name = NULL; @@ -275,7 +236,6 @@ OUT: static int api_pot_profile_del (vat_main_t *vam) { - pot_test_main_t * sm = &pot_test_main; vl_api_pot_profile_del_t *mp; f64 timeout; @@ -287,7 +247,6 @@ static int api_pot_profile_del (vat_main_t *vam) static int api_pot_profile_show_config_dump (vat_main_t *vam) { - pot_test_main_t * sm = &pot_test_main; unformat_input_t *input = vam->input; vl_api_pot_profile_show_config_dump_t *mp; f64 timeout; @@ -320,7 +279,8 @@ _(pot_profile_activate, "name id [0-1] ") \ _(pot_profile_del, "[id ]") \ _(pot_profile_show_config_dump, "id [0-1]") -void vat_api_hookup (vat_main_t *vam) +static void +pot_vat_api_hookup (vat_main_t *vam) { pot_test_main_t * sm = &pot_test_main; /* Hook up handlers for replies from the data plane plug-in */ @@ -357,7 +317,7 @@ clib_error_t * vat_plugin_register (vat_main_t *vam) sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); if (sm->msg_id_base != (u16) ~0) - vat_api_hookup (vam); + pot_vat_api_hookup (vam); vec_free(name); diff --git a/src/plugins/ioam/lib-trace/trace_test.c b/src/plugins/ioam/lib-trace/trace_test.c index 111dd461..f4f1d4d5 100644 --- a/src/plugins/ioam/lib-trace/trace_test.c +++ b/src/plugins/ioam/lib-trace/trace_test.c @@ -23,6 +23,7 @@ #include #include #include +#include /* Declare message IDs */ #include @@ -115,48 +116,9 @@ _(TRACE_PROFILE_ADD_REPLY, trace_profile_add_reply) \ _(TRACE_PROFILE_DEL_REPLY, trace_profile_del_reply) \ _(TRACE_PROFILE_SHOW_CONFIG_REPLY, trace_profile_show_config_reply) - -/* M: construct, but don't yet send a message */ - -#define M(T,t) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -#define M2(T,t,n) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -/* S: send a message */ -#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) - -/* W: wait for results, with timeout */ -#define W \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - return (vam->retval); \ - } \ - } \ - return -99; \ -} while(0); - - static int api_trace_profile_add (vat_main_t * vam) { - trace_test_main_t *sm = &trace_test_main; unformat_input_t *input = vam->input; vl_api_trace_profile_add_t *mp; u8 trace_type = 0; @@ -204,7 +166,6 @@ api_trace_profile_add (vat_main_t * vam) static int api_trace_profile_del (vat_main_t * vam) { - trace_test_main_t *sm = &trace_test_main; vl_api_trace_profile_del_t *mp; f64 timeout; @@ -217,7 +178,6 @@ api_trace_profile_del (vat_main_t * vam) static int api_trace_profile_show_config (vat_main_t * vam) { - trace_test_main_t *sm = &trace_test_main; vl_api_trace_profile_show_config_t *mp; f64 timeout; M (TRACE_PROFILE_SHOW_CONFIG, trace_profile_show_config); @@ -237,8 +197,8 @@ _(trace_profile_del, "[id ]") \ _(trace_profile_show_config, "[id ]") -void -vat_api_hookup (vat_main_t * vam) +static void +ioam_trace_vat_api_hookup (vat_main_t * vam) { trace_test_main_t *sm = &trace_test_main; /* Hook up handlers for replies from the data plane plug-in */ @@ -276,7 +236,7 @@ vat_plugin_register (vat_main_t * vam) sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); if (sm->msg_id_base != (u16) ~ 0) - vat_api_hookup (vam); + ioam_trace_vat_api_hookup (vam); vec_free (name); diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c index b5fee724..7c4088ee 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c @@ -23,6 +23,7 @@ #include #include #include +#include /* Declare message IDs */ #include @@ -96,49 +97,9 @@ _(VXLAN_GPE_IOAM_VNI_DISABLE_REPLY, vxlan_gpe_ioam_vni_disable_reply) \ _(VXLAN_GPE_IOAM_TRANSIT_ENABLE_REPLY, vxlan_gpe_ioam_transit_enable_reply) \ _(VXLAN_GPE_IOAM_TRANSIT_DISABLE_REPLY, vxlan_gpe_ioam_transit_disable_reply) \ - -/* M: construct, but don't yet send a message */ - -#define M(T,t) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -#define M2(T,t,n) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -/* S: send a message */ -#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) - -/* W: wait for results, with timeout */ -#define W \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - return (vam->retval); \ - } \ - } \ - return -99; \ -} while(0); - - static int api_vxlan_gpe_ioam_enable (vat_main_t * vam) { - vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; - unformat_input_t *input = vam->input; vl_api_vxlan_gpe_ioam_enable_t *mp; f64 timeout; @@ -179,7 +140,6 @@ api_vxlan_gpe_ioam_enable (vat_main_t * vam) static int api_vxlan_gpe_ioam_disable (vat_main_t * vam) { - vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; vl_api_vxlan_gpe_ioam_disable_t *mp; f64 timeout; @@ -192,8 +152,6 @@ api_vxlan_gpe_ioam_disable (vat_main_t * vam) static int api_vxlan_gpe_ioam_vni_enable (vat_main_t * vam) { - vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; - unformat_input_t *line_input = vam->input; vl_api_vxlan_gpe_ioam_vni_enable_t *mp; ip4_address_t local4, remote4; @@ -289,8 +247,6 @@ api_vxlan_gpe_ioam_vni_enable (vat_main_t * vam) static int api_vxlan_gpe_ioam_vni_disable (vat_main_t * vam) { - vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; - unformat_input_t *line_input = vam->input; vl_api_vxlan_gpe_ioam_vni_disable_t *mp; ip4_address_t local4, remote4; @@ -386,8 +342,6 @@ api_vxlan_gpe_ioam_vni_disable (vat_main_t * vam) static int api_vxlan_gpe_ioam_transit_enable (vat_main_t * vam) { - vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; - unformat_input_t *line_input = vam->input; vl_api_vxlan_gpe_ioam_transit_enable_t *mp; ip4_address_t local4; @@ -458,8 +412,6 @@ api_vxlan_gpe_ioam_transit_enable (vat_main_t * vam) static int api_vxlan_gpe_ioam_transit_disable (vat_main_t * vam) { - vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; - unformat_input_t *line_input = vam->input; vl_api_vxlan_gpe_ioam_transit_disable_t *mp; ip4_address_t local4; @@ -545,8 +497,8 @@ _(vxlan_gpe_ioam_transit_disable, ""\ "dst-ip [outer-fib-index ]") \ -void -vat_api_hookup (vat_main_t * vam) +static void +vxlan_gpe_vat_api_hookup (vat_main_t * vam) { vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; /* Hook up handlers for replies from the data plane plug-in */ @@ -584,7 +536,7 @@ vat_plugin_register (vat_main_t * vam) sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); if (sm->msg_id_base != (u16) ~ 0) - vat_api_hookup (vam); + vxlan_gpe_vat_api_hookup (vam); vec_free (name); diff --git a/src/plugins/lb/lb_test.c b/src/plugins/lb/lb_test.c index 8c2eaa91..7e3519a8 100644 --- a/src/plugins/lb/lb_test.c +++ b/src/plugins/lb/lb_test.c @@ -19,6 +19,7 @@ #include #include #include +#include //TODO: Move that to vat/plugin_api.c ////////////////////////// @@ -127,35 +128,8 @@ foreach_standard_reply_retval_handler; _(LB_ADD_DEL_VIP_REPLY, lb_add_del_vip_reply) \ _(LB_ADD_DEL_AS_REPLY, lb_add_del_as_reply) -/* M: construct, but don't yet send a message */ -#define M(T,t) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)); \ - memcpy (mp, &mps, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + lbtm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -/* S: send a message */ -#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) - -/* W: wait for results, with timeout */ -#define W \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - return (vam->retval); \ - } \ - } \ - return -99; \ -} while(0); - static int api_lb_conf (vat_main_t * vam) { - lb_test_main_t *lbtm = &lb_test_main; unformat_input_t *i = vam->input; f64 timeout; vl_api_lb_conf_t mps, *mp; @@ -177,7 +151,6 @@ static int api_lb_conf (vat_main_t * vam) static int api_lb_add_del_vip (vat_main_t * vam) { - lb_test_main_t *lbtm = &lb_test_main; unformat_input_t * i = vam->input; f64 timeout; vl_api_lb_add_del_vip_t mps, *mp; @@ -215,7 +188,6 @@ static int api_lb_add_del_vip (vat_main_t * vam) static int api_lb_add_del_as (vat_main_t * vam) { - lb_test_main_t *lbtm = &lb_test_main; unformat_input_t * i = vam->input; f64 timeout; vl_api_lb_add_del_as_t mps, *mp; @@ -246,7 +218,8 @@ _(lb_conf, " [gre4|gre6] [del]") \ _(lb_add_del_as, "
[del]") -void vat_api_hookup (vat_main_t *vam) +static void +lb_vat_api_hookup (vat_main_t *vam) { lb_test_main_t * lbtm = &lb_test_main; /* Hook up handlers for replies from the data plane plug-in */ @@ -285,7 +258,7 @@ clib_error_t * vat_plugin_register (vat_main_t *vam) lbtm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); if (lbtm->msg_id_base != (u16) ~0) - vat_api_hookup (vam); + lb_vat_api_hookup (vam); vec_free(name); diff --git a/src/plugins/snat/snat_test.c b/src/plugins/snat/snat_test.c index 013d7d9b..b601d7d9 100644 --- a/src/plugins/snat/snat_test.c +++ b/src/plugins/snat/snat_test.c @@ -21,6 +21,7 @@ #include #include #include +#include uword unformat_sw_if_index (unformat_input_t * input, va_list * args); @@ -103,44 +104,8 @@ _(SNAT_INTERFACE_ADDR_DETAILS, snat_interface_addr_details) \ _(SNAT_IPFIX_ENABLE_DISABLE_REPLY, \ snat_ipfix_enable_disable_reply) -/* M: construct, but don't yet send a message */ -#define M(T,t) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -#define M2(T,t,n) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -/* S: send a message */ -#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) - -/* W: wait for results, with timeout */ -#define W \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - return (vam->retval); \ - } \ - } \ - return -99; \ -} while(0); - static int api_snat_add_address_range (vat_main_t * vam) { - snat_test_main_t * sm = &snat_test_main; unformat_input_t * i = vam->input; f64 timeout; ip4_address_t start_addr, end_addr; @@ -200,7 +165,6 @@ static int api_snat_add_address_range (vat_main_t * vam) static int api_snat_interface_add_del_feature (vat_main_t * vam) { - snat_test_main_t * sm = &snat_test_main; unformat_input_t * i = vam->input; f64 timeout; vl_api_snat_interface_add_del_feature_t * mp; @@ -246,7 +210,6 @@ static int api_snat_interface_add_del_feature (vat_main_t * vam) static int api_snat_add_static_mapping(vat_main_t * vam) { - snat_test_main_t * sm = &snat_test_main; unformat_input_t * i = vam->input; f64 timeout; vl_api_snat_add_static_mapping_t * mp; @@ -338,7 +301,6 @@ static void vl_api_snat_static_mapping_details_t_handler static int api_snat_static_mapping_dump(vat_main_t * vam) { - snat_test_main_t * sm = &snat_test_main; f64 timeout; vl_api_snat_static_mapping_dump_t * mp; @@ -398,7 +360,6 @@ static void vl_api_snat_show_config_reply_t_handler static int api_snat_show_config(vat_main_t * vam) { - snat_test_main_t * sm = &snat_test_main; f64 timeout; vl_api_snat_show_config_t * mp; @@ -425,7 +386,6 @@ static void vl_api_snat_address_details_t_handler static int api_snat_address_dump(vat_main_t * vam) { - snat_test_main_t * sm = &snat_test_main; f64 timeout; vl_api_snat_address_dump_t * mp; @@ -460,7 +420,6 @@ static void vl_api_snat_interface_details_t_handler static int api_snat_interface_dump(vat_main_t * vam) { - snat_test_main_t * sm = &snat_test_main; f64 timeout; vl_api_snat_interface_dump_t * mp; @@ -485,7 +444,6 @@ static int api_snat_interface_dump(vat_main_t * vam) static int api_snat_set_workers (vat_main_t * vam) { - snat_test_main_t * sm = &snat_test_main; unformat_input_t * i = vam->input; f64 timeout; vl_api_snat_set_workers_t * mp; @@ -523,7 +481,6 @@ static void vl_api_snat_worker_details_t_handler static int api_snat_worker_dump(vat_main_t * vam) { - snat_test_main_t * sm = &snat_test_main; f64 timeout; vl_api_snat_worker_dump_t * mp; @@ -548,7 +505,6 @@ static int api_snat_worker_dump(vat_main_t * vam) static int api_snat_ipfix_enable_disable (vat_main_t * vam) { - snat_test_main_t * sm = &snat_test_main; unformat_input_t * i = vam->input; f64 timeout; vl_api_snat_add_del_interface_addr_t * mp; @@ -597,7 +553,6 @@ static void vl_api_snat_interface_addr_details_t_handler static int api_snat_interface_addr_dump(vat_main_t * vam) { - snat_test_main_t * sm = &snat_test_main; f64 timeout; vl_api_snat_interface_addr_dump_t * mp; @@ -622,7 +577,6 @@ static int api_snat_interface_addr_dump(vat_main_t * vam) static int api_snat_add_del_interface_addr (vat_main_t * vam) { - snat_test_main_t * sm = &snat_test_main; unformat_input_t * i = vam->input; f64 timeout; vl_api_snat_ipfix_enable_disable_t * mp; @@ -677,7 +631,8 @@ _(snat_interface_addr_dump, "") \ _(snat_ipfix_enable_disable, "[domain ] [src_port ] " \ "[disable]") -void vat_api_hookup (vat_main_t *vam) +static void +snat_vat_api_hookup (vat_main_t *vam) { snat_test_main_t * sm __attribute__((unused)) = &snat_test_main; /* Hook up handlers for replies from the data plane plug-in */ @@ -693,7 +648,9 @@ void vat_api_hookup (vat_main_t *vam) #undef _ /* API messages we can send */ -#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); +#define _(n,h) \ + hash_set_mem (vam->function_by_name, #n, api_##n); \ + clib_warning ("vam %llx add '%s' handler %llx", vam, #n, api_##n); foreach_vpe_api_msg; #undef _ @@ -715,7 +672,7 @@ clib_error_t * vat_plugin_register (vat_main_t *vam) sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); if (sm->msg_id_base != (u16) ~0) - vat_api_hookup (vam); + snat_vat_api_hookup (vam); vec_free(name); diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 1babaf40..653cf79f 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -70,6 +70,46 @@ #include #undef vl_printfun +#include + +f64 +vat_time_now (vat_main_t * vam) +{ +#if VPP_API_TEST_BUILTIN + return vlib_time_now (vam->vlib_main); +#else + return clib_time_now (&vam->clib_time); +#endif +} + +void +errmsg (char *fmt, ...) +{ + vat_main_t *vam = &vat_main; + va_list va; + u8 *s; + + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + + vec_add1 (s, 0); + +#if VPP_API_TEST_BUILTIN + vlib_cli_output (vam->vlib_main, (char *) s); +#else + { + if (vam->ifp != stdin) + fformat (vam->ofp, "%s(%d): \n", vam->current_file, + vam->input_line_number); + fformat (vam->ofp, (char *) s); + fflush (vam->ofp); + } +#endif + + vec_free (s); +} + static uword api_unformat_sw_if_index (unformat_input_t * input, va_list * args) { @@ -88,8 +128,6 @@ api_unformat_sw_if_index (unformat_input_t * input, va_list * args) return 1; } -void vat_suspend (vlib_main_t * vm, f64 interval); - #if VPP_API_TEST_BUILTIN == 0 /* Parse an IP4 address %d.%d.%d.%d. */ uword @@ -3867,59 +3905,6 @@ _(SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY, \ sw_interface_set_dpdk_hqos_tctbl_reply) #endif -/* M: construct, but don't yet send a message */ - -#define M(T,t) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc_as_if_client(sizeof(*mp)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -#define M2(T,t,n) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc_as_if_client(sizeof(*mp)+(n)); \ - memset (mp, 0, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T); \ - mp->client_index = vam->my_client_index; \ -} while(0); - - -/* S: send a message */ -#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) - -/* W: wait for results, with timeout */ -#define W \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - return (vam->retval); \ - } \ - vat_suspend (vam->vlib_main, 1e-3); \ - } \ - return -99; \ -} while(0); - -/* W2: wait for results, with timeout */ -#define W2(body) \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - (body); \ - return (vam->retval); \ - } \ - vat_suspend (vam->vlib_main, 1e-3); \ - } \ - return -99; \ -} while(0); - typedef struct { u8 *name; diff --git a/src/vat/vat.h b/src/vat/vat.h index 64be2f7f..3d7d96ae 100644 --- a/src/vat/vat.h +++ b/src/vat/vat.h @@ -190,36 +190,11 @@ typedef struct vlib_main_t *vlib_main; } vat_main_t; -vat_main_t vat_main; - -static inline f64 -vat_time_now (vat_main_t * vam) -{ -#if VPP_API_TEST_BUILTIN - return vlib_time_now (vam->vlib_main); -#else - return clib_time_now (&vam->clib_time); -#endif -} - -#if VPP_API_TEST_BUILTIN -#define errmsg(fmt,args...) \ -do { \ - vat_main_t *__vam = &vat_main; \ - vlib_cli_output (__vam->vlib_main, fmt, ##args); \ - } while(0); -#else -#define errmsg(fmt,args...) \ -do { \ - vat_main_t *__vam = &vat_main; \ - if(__vam->ifp != stdin) \ - fformat(__vam->ofp,"%s(%d): \n", __vam->current_file, \ - __vam->input_line_number); \ - fformat(__vam->ofp, fmt "\n", ##args); \ - fflush(__vam->ofp); \ -} while(0); -#endif +extern vat_main_t vat_main; +void vat_suspend (vlib_main_t * vm, f64 interval); +f64 vat_time_now (vat_main_t * vam); +void errmsg (char *fmt, ...); void vat_api_hookup (vat_main_t * vam); int api_sw_interface_dump (vat_main_t * vam); void do_one_file (vat_main_t * vam); diff --git a/src/vlib/unix/plugin.c b/src/vlib/unix/plugin.c index 987f7d05..40399090 100644 --- a/src/vlib/unix/plugin.c +++ b/src/vlib/unix/plugin.c @@ -264,10 +264,15 @@ vlib_plugins_show_cmd_fn (vlib_main_t * vm, return 0; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (plugins_show_cmd, static) = { -.path = "show plugins",.short_help = "show loaded plugins",.function = - vlib_plugins_show_cmd_fn,}; + .path = "show plugins", + .short_help = "show loaded plugins", + .function = vlib_plugins_show_cmd_fn, +}; +/* *INDENT-ON* */ + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vlibapi/api_helper_macros.h b/src/vlibapi/api_helper_macros.h index 16f34cfc..7f94e446 100644 --- a/src/vlibapi/api_helper_macros.h +++ b/src/vlibapi/api_helper_macros.h @@ -17,7 +17,6 @@ *------------------------------------------------------------------ */ - #ifndef __api_helper_macros_h__ #define __api_helper_macros_h__ diff --git a/src/vlibapi/vat_helper_macros.h b/src/vlibapi/vat_helper_macros.h new file mode 100644 index 00000000..7199364a --- /dev/null +++ b/src/vlibapi/vat_helper_macros.h @@ -0,0 +1,76 @@ +/* + *------------------------------------------------------------------ + * vat_helper_macros.h - collect api client helper macros in one place + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#ifndef __vat_helper_macros_h__ +#define __vat_helper_macros_h__ + +/* M: construct, but don't yet send a message */ + +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc_as_if_client(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +#define M2(T,t,n) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc_as_if_client(sizeof(*mp)+(n)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T); \ + mp->client_index = vam->my_client_index; \ +} while(0); + + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + vat_suspend (vam->vlib_main, 1e-5); \ + } \ + return -99; \ +} while(0); + +/* W2: wait for results, with timeout */ +#define W2(body) \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + (body); \ + return (vam->retval); \ + } \ + vat_suspend (vam->vlib_main, 1e-5); \ + } \ + return -99; \ +} while(0); + + +#endif /* __vat_helper_macros_h__ */ diff --git a/src/vpp-api-test.am b/src/vpp-api-test.am index 32610056..f0d5df62 100644 --- a/src/vpp-api-test.am +++ b/src/vpp-api-test.am @@ -44,7 +44,10 @@ vpp_api_test_LDADD = \ libvnet.la \ -lpthread -lm -lrt -ldl -lcrypto +vpp_api_test_LDFLAGS = -Wl,--export-dynamic + vpp_json_test_LDADD = libvppinfra.la -lm +vpp_json_test_LDFLAGS = -Wl,--export-dynamic nobase_include_HEADERS += \ vat/vat.h \ diff --git a/src/vpp.am b/src/vpp.am index 425f1e32..0b605ec5 100644 --- a/src/vpp.am +++ b/src/vpp.am @@ -28,7 +28,9 @@ bin_vpp_SOURCES += \ if WITH_APICLI bin_vpp_SOURCES += \ vpp/api/api_format.c \ - vpp/api/api_main.c + vpp/api/api_main.c \ + vpp/api/plugin.c \ + vpp/api/plugin.h endif # comment out to disable stats upload to gmond @@ -78,6 +80,8 @@ bin_vpp_LDADD = \ libvppinfra.la \ -lrt -lm -lpthread -ldl +bin_vpp_LDFLAGS = -Wl,--export-dynamic + if ENABLE_TESTS noinst_PROGRAMS += bin/test_client diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index fd6998f4..7bf7e8b7 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -42,11 +42,17 @@ static clib_error_t * api_main_init (vlib_main_t * vm) { vat_main_t *vam = &vat_main; + int rv; + int vat_plugin_init (vat_main_t * vam); vam->vlib_main = vm; vam->my_client_index = (u32) ~ 0; init_error_string_table (vam); vat_api_hookup (vam); + clib_warning ("vam %llx", vam); + rv = vat_plugin_init (vam); + if (rv) + clib_warning ("vat_plugin_init returned %d", rv); return 0; } @@ -183,6 +189,39 @@ api_cli_output (void *notused, const char *fmt, ...) vec_free (s); } +u16 +vl_client_get_first_plugin_msg_id (char *plugin_name) +{ + api_main_t *am = &api_main; + vl_api_msg_range_t *rp; + uword *p; + + p = hash_get_mem (am->msg_range_by_name, plugin_name); + if (p == 0) + return ~0; + + rp = vec_elt_at_index (am->msg_ranges, p[0]); + + return (rp->first_msg_id); +} + +uword +unformat_sw_if_index (unformat_input_t * input, va_list * args) +{ + u32 *result = va_arg (*args, u32 *); + vnet_main_t *vnm = vnet_get_main (); + u32 sw_if_index = ~0; + u8 *if_name; + uword *p; + + if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + *result = sw_if_index; + return 1; + } + return 0; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vpp/api/plugin.c b/src/vpp/api/plugin.c new file mode 100644 index 00000000..c1cc928c --- /dev/null +++ b/src/vpp/api/plugin.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * plugin.c: plugin handling + */ + +#include +#include +#include +#include + +plugin_main_t vat_plugin_main; + +static int +load_one_plugin (plugin_main_t * pm, plugin_info_t * pi) +{ + void *handle, *register_handle; + clib_error_t *(*fp) (vat_main_t *); + clib_error_t *error; + + handle = dlopen ((char *) pi->name, RTLD_LAZY); + + /* + * Note: this can happen if the plugin has an undefined symbol reference, + * so print a warning. Otherwise, the poor slob won't know what happened. + * Ask me how I know that... + */ + if (handle == 0) + { + clib_warning ("%s", dlerror ()); + return -1; + } + + pi->handle = handle; + + register_handle = dlsym (pi->handle, "vat_plugin_register"); + if (register_handle == 0) + return 0; + + fp = register_handle; + + error = (*fp) (pm->vat_main); + + if (error) + { + clib_error_report (error); + dlclose (handle); + return 1; + } + + clib_warning ("Loaded plugin: %s", pi->name); + + return 0; +} + +static u8 ** +split_plugin_path (plugin_main_t * pm) +{ + int i; + u8 **rv = 0; + u8 *path = pm->plugin_path; + u8 *this = 0; + + for (i = 0; i < vec_len (pm->plugin_path); i++) + { + if (path[i] != ':') + { + vec_add1 (this, path[i]); + continue; + } + vec_add1 (this, 0); + vec_add1 (rv, this); + this = 0; + } + if (this) + { + vec_add1 (this, 0); + vec_add1 (rv, this); + } + return rv; +} + +int +vat_load_new_plugins (plugin_main_t * pm) +{ + DIR *dp; + struct dirent *entry; + struct stat statb; + uword *p; + plugin_info_t *pi; + u8 **plugin_path; + int i; + + plugin_path = split_plugin_path (pm); + + for (i = 0; i < vec_len (plugin_path); i++) + { + dp = opendir ((char *) plugin_path[i]); + + if (dp == 0) + continue; + + while ((entry = readdir (dp))) + { + u8 *plugin_name; + + if (pm->plugin_name_filter) + { + int j; + for (j = 0; j < vec_len (pm->plugin_name_filter); j++) + if (entry->d_name[j] != pm->plugin_name_filter[j]) + goto next; + } + + plugin_name = format (0, "%s/%s%c", plugin_path[i], + entry->d_name, 0); + + /* unreadable */ + if (stat ((char *) plugin_name, &statb) < 0) + { + ignore: + vec_free (plugin_name); + continue; + } + + /* a dir or other things which aren't plugins */ + if (!S_ISREG (statb.st_mode)) + goto ignore; + + p = hash_get_mem (pm->plugin_by_name_hash, plugin_name); + if (p == 0) + { + vec_add2 (pm->plugin_info, pi, 1); + pi->name = plugin_name; + pi->file_info = statb; + + if (load_one_plugin (pm, pi)) + { + vec_free (plugin_name); + _vec_len (pm->plugin_info) = vec_len (pm->plugin_info) - 1; + continue; + } + memset (pi, 0, sizeof (*pi)); + hash_set_mem (pm->plugin_by_name_hash, plugin_name, + pi - pm->plugin_info); + } + next: + ; + } + closedir (dp); + vec_free (plugin_path[i]); + } + vec_free (plugin_path); + return 0; +} + +#define QUOTE_(x) #x +#define QUOTE(x) QUOTE_(x) + +/* + * Load plugins from /usr/lib/vpp_api_test_plugins by default + */ +char *vat_plugin_path = "/usr/lib/vpp_api_test_plugins"; + +char *vat_plugin_name_filter = 0; + +int +vat_plugin_init (vat_main_t * vam) +{ + plugin_main_t *pm = &vat_plugin_main; + + + pm->plugin_path = format (0, "%s%c", vat_plugin_path, 0); + if (vat_plugin_name_filter) + pm->plugin_name_filter = format (0, "%s%c", vat_plugin_name_filter, 0); + + pm->plugin_by_name_hash = hash_create_string (0, sizeof (uword)); + pm->vat_main = vam; + + return vat_load_new_plugins (pm); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/api/plugin.h b/src/vpp/api/plugin.h new file mode 100644 index 00000000..559ec52f --- /dev/null +++ b/src/vpp/api/plugin.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * plugin.h: plugin handling + */ + +#ifndef __included_plugin_h__ +#define __included_plugin_h__ + +#include +#include +#include + +typedef struct +{ + u8 *name; + struct stat file_info; + void *handle; +} plugin_info_t; + +typedef struct +{ + /* loaded plugin info */ + plugin_info_t *plugin_info; + uword *plugin_by_name_hash; + + /* path and name filter */ + u8 *plugin_path; + u8 *plugin_name_filter; + + /* convenience */ + vat_main_t *vat_main; + +} plugin_main_t; + +plugin_main_t vat_plugin_main; + +int vat_plugin_init (vat_main_t * vam); +int vat_load_new_plugins (plugin_main_t * pm); + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From d2c97d988b2fbc28f0905d1826b428967d09348a Mon Sep 17 00:00:00 2001 From: Pavel Kotucek Date: Tue, 24 Jan 2017 10:58:12 +0100 Subject: API refactoring : classify Change-Id: Ib75197ef8e5057e7f0d9361a10705c3743d05333 Signed-off-by: Pavel Kotucek --- src/vnet.am | 8 +- src/vnet/classify/classify.api | 356 +++++++++++++++++++++++++ src/vnet/classify/classify_api.c | 555 +++++++++++++++++++++++++++++++++++++++ src/vnet/vnet_all_api_h.h | 1 + src/vpp/api/api.c | 456 -------------------------------- src/vpp/api/vpe.api | 340 +----------------------- 6 files changed, 921 insertions(+), 795 deletions(-) create mode 100644 src/vnet/classify/classify.api create mode 100644 src/vnet/classify/classify_api.c (limited to 'src/vpp/api') diff --git a/src/vnet.am b/src/vnet.am index 93dd1e6c..660efcf5 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -270,13 +270,17 @@ libvnet_la_SOURCES += \ vnet/classify/policer_classify.c \ vnet/classify/flow_classify.c \ vnet/classify/flow_classify_node.c \ - vnet/classify/vnet_classify.h + vnet/classify/vnet_classify.h \ + vnet/classify/classify_api.c nobase_include_HEADERS += \ vnet/classify/vnet_classify.h \ vnet/classify/input_acl.h \ vnet/classify/policer_classify.h \ - vnet/classify/flow_classify.h + vnet/classify/flow_classify.h \ + vnet/classify/classify.api.h + +API_FILES += vnet/classify/classify.api ######################################## # Layer 3 protocols go here diff --git a/src/vnet/classify/classify.api b/src/vnet/classify/classify.api new file mode 100644 index 00000000..51ebd6c8 --- /dev/null +++ b/src/vnet/classify/classify.api @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief Add/Delete classification table request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add- if non-zero add the table, else delete it + @param del_chain - if non-zero delete the whole chain of tables + @param table_index - if add, reuturns index of the created table, else specifies the table to delete + @param nbuckets - number of buckets when adding a table + @param memory_size - memory size when adding a table + @param match_n_vectors - number of match vectors + @param next_table_index - index of next table + @param miss_next_index - index of miss table + @param current_data_flag - option to use current node's packet payload + as the starting point from where packets are classified, + This option is only valid for L2/L3 input ACL for now. + 0: by default, classify data from the buffer's start location + 1: classify packets from VPP node’s current data pointer + @param current_data_offset - a signed value to shift the start location of + the packet to be classified + For example, if input IP ACL node is used, L2 header’s first byte + can be accessible by configuring current_data_offset to -14 + if there is no vlan tag. + This is valid only if current_data_flag is set to 1. + @param mask[] - match mask +*/ +define classify_add_del_table +{ + u32 client_index; + u32 context; + u8 is_add; + u8 del_chain; + u32 table_index; + u32 nbuckets; + u32 memory_size; + u32 skip_n_vectors; + u32 match_n_vectors; + u32 next_table_index; + u32 miss_next_index; + u32 current_data_flag; + i32 current_data_offset; + u8 mask[0]; +}; + +/** \brief Add/Delete classification table response + @param context - sender context, to match reply w/ request + @param retval - return code for the table add/del requst + @param new_table_index - for add, returned index of the new table + @param skip_n_vectors - for add, returned value of skip_n_vectors in table + @param match_n_vectors -for add, returned value of match_n_vectors in table +*/ +define classify_add_del_table_reply +{ + u32 context; + i32 retval; + u32 new_table_index; + u32 skip_n_vectors; + u32 match_n_vectors; +}; + +/** \brief Classify add / del session request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add session if non-zero, else delete + @param table_index - index of the table to add/del the session, required + @param hit_next_index - for add, hit_next_index of new session, required + @param opaque_index - for add, opaque_index of new session + @param advance -for add, advance value for session + @param action - + 0: no action (by default) + metadata is not used. + 1: Classified IP packets will be looked up from the + specified ipv4 fib table (configured by metadata as VRF id). + Only valid for L3 input ACL node + 2: Classified IP packets will be looked up from the + specified ipv6 fib table (configured by metadata as VRF id). + Only valid for L3 input ACL node + @param metadata - valid only if action != 0 + VRF id if action is 1 or 2. + @param match[] - for add, match value for session, required +*/ +define classify_add_del_session +{ + u32 client_index; + u32 context; + u8 is_add; + u32 table_index; + u32 hit_next_index; + u32 opaque_index; + i32 advance; + u8 action; + u32 metadata; + u8 match[0]; +}; + +/** \brief Classify add / del session response + @param context - sender context, to match reply w/ request + @param retval - return code for the add/del session request +*/ +define classify_add_del_session_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Set/unset policer classify interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface to set/unset policer classify + @param ip4_table_index - ip4 classify table index (~0 for skip) + @param ip6_table_index - ip6 classify table index (~0 for skip) + @param l2_table_index - l2 classify table index (~0 for skip) + @param is_add - Set if non-zero, else unset + Note: User is recommeneded to use just one valid table_index per call. + (ip4_table_index, ip6_table_index, or l2_table_index) +*/ +define policer_classify_set_interface +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 ip4_table_index; + u32 ip6_table_index; + u32 l2_table_index; + u8 is_add; +}; + +/** \brief Set/unset policer classify interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define policer_classify_set_interface_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Get list of policer classify interfaces and tables + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param type - classify table type +*/ +define policer_classify_dump +{ + u32 client_index; + u32 context; + u8 type; +}; + +/** \brief Policer iclassify operational state response. + @param context - sender context, to match reply w/ request + @param sw_if_index - software interface index + @param table_index - classify table index +*/ +define policer_classify_details +{ + u32 context; + u32 sw_if_index; + u32 table_index; +}; + +/** \brief Classify get table IDs request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define classify_table_ids +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply for classify get table IDs request + @param context - sender context which was passed in the request + @param count - number of ids returned in response + @param ids - array of classify table ids +*/ +define classify_table_ids_reply +{ + u32 context; + i32 retval; + u32 count; + u32 ids[count]; +}; + +/** \brief Classify table ids by interface index request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - index of the interface +*/ +define classify_table_by_interface +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Reply for classify table id by interface index request + @param context - sender context which was passed in the request + @param count - number of ids returned in response + @param sw_if_index - index of the interface + @param l2_table_id - l2 classify table index + @param ip4_table_id - ip4 classify table index + @param ip6_table_id - ip6 classify table index +*/ +define classify_table_by_interface_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; + u32 l2_table_id; + u32 ip4_table_id; + u32 ip6_table_id; +}; + +/** \brief Classify table info + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param table_id - classify table index +*/ +define classify_table_info +{ + u32 client_index; + u32 context; + u32 table_id; +}; + +/** \brief Reply for classify table info request + @param context - sender context which was passed in the request + @param count - number of ids returned in response + @param table_id - classify table index + @param nbuckets - number of buckets when adding a table + @param match_n_vectors - number of match vectors + @param skip_n_vectors - number of skip_n_vectors + @param active_sessions - number of sessions (active entries) + @param next_table_index - index of next table + @param miss_next_index - index of miss table + @param mask[] - match mask +*/ +define classify_table_info_reply +{ + u32 context; + i32 retval; + u32 table_id; + u32 nbuckets; + u32 match_n_vectors; + u32 skip_n_vectors; + u32 active_sessions; + u32 next_table_index; + u32 miss_next_index; + u32 mask_length; + u8 mask[mask_length]; +}; + +/** \brief Classify sessions dump request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param table_id - classify table index +*/ +define classify_session_dump +{ + u32 client_index; + u32 context; + u32 table_id; +}; + +/** \brief Reply for classify table session dump request + @param context - sender context which was passed in the request + @param count - number of ids returned in response + @param table_id - classify table index + @param hit_next_index - hit_next_index of session + @param opaque_index - for add, opaque_index of session + @param advance - advance value of session + @param match[] - match value for session +*/ +define classify_session_details +{ + u32 context; + i32 retval; + u32 table_id; + u32 hit_next_index; + i32 advance; + u32 opaque_index; + u32 match_length; + u8 match[match_length]; +}; + +/** \brief Set/unset flow classify interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface to set/unset flow classify + @param ip4_table_index - ip4 classify table index (~0 for skip) + @param ip6_table_index - ip6 classify table index (~0 for skip) + @param l2_table_index - l2 classify table index (~0 for skip) + @param is_add - Set if non-zero, else unset + Note: User is recommeneded to use just one valid table_index per call. + (ip4_table_index, ip6_table_index, or l2_table_index) +*/ +define flow_classify_set_interface { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 ip4_table_index; + u32 ip6_table_index; + u8 is_add; +}; + +/** \brief Set/unset flow classify interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define flow_classify_set_interface_reply { + u32 context; + i32 retval; +}; + +/** \brief Get list of flow classify interfaces and tables + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param type - classify table type +*/ +define flow_classify_dump { + u32 client_index; + u32 context; + u8 type; +}; + +/** \brief Flow classify operational state response. + @param context - sender context, to match reply w/ request + @param sw_if_index - software interface index + @param table_index - classify table index +*/ +define flow_classify_details { + u32 context; + u32 sw_if_index; + u32 table_index; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + \ No newline at end of file diff --git a/src/vnet/classify/classify_api.c b/src/vnet/classify/classify_api.c new file mode 100644 index 00000000..77a8b434 --- /dev/null +++ b/src/vnet/classify/classify_api.c @@ -0,0 +1,555 @@ +/* + *------------------------------------------------------------------ + * classify_api.c - classify api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(CLASSIFY_ADD_DEL_TABLE, classify_add_del_table) \ +_(CLASSIFY_ADD_DEL_SESSION, classify_add_del_session) \ +_(CLASSIFY_TABLE_IDS,classify_table_ids) \ +_(CLASSIFY_TABLE_BY_INTERFACE, classify_table_by_interface) \ +_(CLASSIFY_TABLE_INFO,classify_table_info) \ +_(CLASSIFY_SESSION_DUMP,classify_session_dump) \ +_(CLASSIFY_SESSION_DETAILS,classify_session_details) \ +_(POLICER_CLASSIFY_SET_INTERFACE, policer_classify_set_interface) \ +_(POLICER_CLASSIFY_DUMP, policer_classify_dump) \ +_(FLOW_CLASSIFY_SET_INTERFACE, flow_classify_set_interface) \ +_(FLOW_CLASSIFY_DUMP, flow_classify_dump) + +#define foreach_classify_add_del_table_field \ +_(table_index) \ +_(nbuckets) \ +_(memory_size) \ +_(skip_n_vectors) \ +_(match_n_vectors) \ +_(next_table_index) \ +_(miss_next_index) \ +_(current_data_flag) \ +_(current_data_offset) + +static void vl_api_classify_add_del_table_t_handler + (vl_api_classify_add_del_table_t * mp) +{ + vl_api_classify_add_del_table_reply_t *rmp; + vnet_classify_main_t *cm = &vnet_classify_main; + vnet_classify_table_t *t; + int rv; + +#define _(a) u32 a; + foreach_classify_add_del_table_field; +#undef _ + +#define _(a) a = ntohl(mp->a); + foreach_classify_add_del_table_field; +#undef _ + + /* The underlying API fails silently, on purpose, so check here */ + if (mp->is_add == 0) /* delete */ + { + if (pool_is_free_index (cm->tables, table_index)) + { + rv = VNET_API_ERROR_NO_SUCH_TABLE; + goto out; + } + } + else /* add or update */ + { + if (table_index != ~0 && pool_is_free_index (cm->tables, table_index)) + table_index = ~0; + } + + rv = vnet_classify_add_del_table + (cm, mp->mask, nbuckets, memory_size, + skip_n_vectors, match_n_vectors, + next_table_index, miss_next_index, &table_index, + current_data_flag, current_data_offset, mp->is_add, mp->del_chain); + +out: + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_CLASSIFY_ADD_DEL_TABLE_REPLY, + ({ + if (rv == 0 && mp->is_add) + { + t = pool_elt_at_index (cm->tables, table_index); + rmp->skip_n_vectors = ntohl(t->skip_n_vectors); + rmp->match_n_vectors = ntohl(t->match_n_vectors); + rmp->new_table_index = ntohl(table_index); + } + else + { + rmp->skip_n_vectors = ~0; + rmp->match_n_vectors = ~0; + rmp->new_table_index = ~0; + } + })); + /* *INDENT-ON* */ +} + +static void vl_api_classify_add_del_session_t_handler + (vl_api_classify_add_del_session_t * mp) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + vl_api_classify_add_del_session_reply_t *rmp; + int rv; + u32 table_index, hit_next_index, opaque_index, metadata; + i32 advance; + u8 action; + + table_index = ntohl (mp->table_index); + hit_next_index = ntohl (mp->hit_next_index); + opaque_index = ntohl (mp->opaque_index); + advance = ntohl (mp->advance); + action = mp->action; + metadata = ntohl (mp->metadata); + + rv = vnet_classify_add_del_session + (cm, table_index, mp->match, hit_next_index, opaque_index, + advance, action, metadata, mp->is_add); + + REPLY_MACRO (VL_API_CLASSIFY_ADD_DEL_SESSION_REPLY); +} + +static void + vl_api_policer_classify_set_interface_t_handler + (vl_api_policer_classify_set_interface_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_policer_classify_set_interface_reply_t *rmp; + int rv; + u32 sw_if_index, ip4_table_index, ip6_table_index, l2_table_index; + + ip4_table_index = ntohl (mp->ip4_table_index); + ip6_table_index = ntohl (mp->ip6_table_index); + l2_table_index = ntohl (mp->l2_table_index); + sw_if_index = ntohl (mp->sw_if_index); + + VALIDATE_SW_IF_INDEX (mp); + + rv = vnet_set_policer_classify_intfc (vm, sw_if_index, ip4_table_index, + ip6_table_index, l2_table_index, + mp->is_add); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_POLICER_CLASSIFY_SET_INTERFACE_REPLY); +} + +static void +send_policer_classify_details (u32 sw_if_index, + u32 table_index, + unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_policer_classify_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_POLICER_CLASSIFY_DETAILS); + mp->context = context; + mp->sw_if_index = htonl (sw_if_index); + mp->table_index = htonl (table_index); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_policer_classify_dump_t_handler (vl_api_policer_classify_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + policer_classify_main_t *pcm = &policer_classify_main; + u32 *vec_tbl; + int i; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + vec_tbl = pcm->classify_table_index_by_sw_if_index[mp->type]; + + if (vec_len (vec_tbl)) + { + for (i = 0; i < vec_len (vec_tbl); i++) + { + if (vec_elt (vec_tbl, i) == ~0) + continue; + + send_policer_classify_details (i, vec_elt (vec_tbl, i), q, + mp->context); + } + } +} + +static void +vl_api_classify_table_ids_t_handler (vl_api_classify_table_ids_t * mp) +{ + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + vnet_classify_main_t *cm = &vnet_classify_main; + vnet_classify_table_t *t; + u32 *table_ids = 0; + u32 count; + + /* *INDENT-OFF* */ + pool_foreach (t, cm->tables, + ({ + vec_add1 (table_ids, ntohl(t - cm->tables)); + })); + /* *INDENT-ON* */ + count = vec_len (table_ids); + + vl_api_classify_table_ids_reply_t *rmp; + rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp) + count * sizeof (u32)); + rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_TABLE_IDS_REPLY); + rmp->context = mp->context; + rmp->count = ntohl (count); + clib_memcpy (rmp->ids, table_ids, count * sizeof (u32)); + rmp->retval = 0; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); + + vec_free (table_ids); +} + +static void + vl_api_classify_table_by_interface_t_handler + (vl_api_classify_table_by_interface_t * mp) +{ + vl_api_classify_table_by_interface_reply_t *rmp; + int rv = 0; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 *acl = 0; + + vec_validate (acl, INPUT_ACL_N_TABLES - 1); + vec_set (acl, ~0); + + VALIDATE_SW_IF_INDEX (mp); + + input_acl_main_t *am = &input_acl_main; + + int if_idx; + u32 type; + + for (type = 0; type < INPUT_ACL_N_TABLES; type++) + { + u32 *vec_tbl = am->classify_table_index_by_sw_if_index[type]; + if (vec_len (vec_tbl)) + { + for (if_idx = 0; if_idx < vec_len (vec_tbl); if_idx++) + { + if (vec_elt (vec_tbl, if_idx) == ~0 || sw_if_index != if_idx) + { + continue; + } + acl[type] = vec_elt (vec_tbl, if_idx); + } + } + } + + BAD_SW_IF_INDEX_LABEL; + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_CLASSIFY_TABLE_BY_INTERFACE_REPLY, + ({ + rmp->sw_if_index = ntohl(sw_if_index); + rmp->l2_table_id = ntohl(acl[INPUT_ACL_TABLE_L2]); + rmp->ip4_table_id = ntohl(acl[INPUT_ACL_TABLE_IP4]); + rmp->ip6_table_id = ntohl(acl[INPUT_ACL_TABLE_IP6]); + })); + /* *INDENT-ON* */ + vec_free (acl); +} + +static void +vl_api_classify_table_info_t_handler (vl_api_classify_table_info_t * mp) +{ + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + vl_api_classify_table_info_reply_t *rmp = 0; + + vnet_classify_main_t *cm = &vnet_classify_main; + u32 table_id = ntohl (mp->table_id); + vnet_classify_table_t *t; + + /* *INDENT-OFF* */ + pool_foreach (t, cm->tables, + ({ + if (table_id == t - cm->tables) + { + rmp = vl_msg_api_alloc_as_if_client + (sizeof (*rmp) + t->match_n_vectors * sizeof (u32x4)); + rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_TABLE_INFO_REPLY); + rmp->context = mp->context; + rmp->table_id = ntohl(table_id); + rmp->nbuckets = ntohl(t->nbuckets); + rmp->match_n_vectors = ntohl(t->match_n_vectors); + rmp->skip_n_vectors = ntohl(t->skip_n_vectors); + rmp->active_sessions = ntohl(t->active_elements); + rmp->next_table_index = ntohl(t->next_table_index); + rmp->miss_next_index = ntohl(t->miss_next_index); + rmp->mask_length = ntohl(t->match_n_vectors * sizeof (u32x4)); + clib_memcpy(rmp->mask, t->mask, t->match_n_vectors * sizeof(u32x4)); + rmp->retval = 0; + break; + } + })); + /* *INDENT-ON* */ + + if (rmp == 0) + { + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs ((VL_API_CLASSIFY_TABLE_INFO_REPLY)); + rmp->context = mp->context; + rmp->retval = ntohl (VNET_API_ERROR_CLASSIFY_TABLE_NOT_FOUND); + } + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_classify_session_details_t_handler (vl_api_classify_session_details_t * + mp) +{ + clib_warning ("BUG"); +} + +static void +send_classify_session_details (unix_shared_memory_queue_t * q, + u32 table_id, + u32 match_length, + vnet_classify_entry_t * e, u32 context) +{ + vl_api_classify_session_details_t *rmp; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_SESSION_DETAILS); + rmp->context = context; + rmp->table_id = ntohl (table_id); + rmp->hit_next_index = ntohl (e->next_index); + rmp->advance = ntohl (e->advance); + rmp->opaque_index = ntohl (e->opaque_index); + rmp->match_length = ntohl (match_length); + clib_memcpy (rmp->match, e->key, match_length); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_classify_session_dump_t_handler (vl_api_classify_session_dump_t * mp) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + unix_shared_memory_queue_t *q; + + u32 table_id = ntohl (mp->table_id); + vnet_classify_table_t *t; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + /* *INDENT-OFF* */ + pool_foreach (t, cm->tables, + ({ + if (table_id == t - cm->tables) + { + vnet_classify_bucket_t * b; + vnet_classify_entry_t * v, * save_v; + int i, j, k; + + for (i = 0; i < t->nbuckets; i++) + { + b = &t->buckets [i]; + if (b->offset == 0) + continue; + + save_v = vnet_classify_get_entry (t, b->offset); + for (j = 0; j < (1<log2_pages); j++) + { + for (k = 0; k < t->entries_per_page; k++) + { + v = vnet_classify_entry_at_index + (t, save_v, j*t->entries_per_page + k); + if (vnet_classify_entry_is_free (v)) + continue; + + send_classify_session_details + (q, table_id, t->match_n_vectors * sizeof (u32x4), + v, mp->context); + } + } + } + break; + } + })); + /* *INDENT-ON* */ +} + +static void + vl_api_flow_classify_set_interface_t_handler + (vl_api_flow_classify_set_interface_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_flow_classify_set_interface_reply_t *rmp; + int rv; + u32 sw_if_index, ip4_table_index, ip6_table_index; + + ip4_table_index = ntohl (mp->ip4_table_index); + ip6_table_index = ntohl (mp->ip6_table_index); + sw_if_index = ntohl (mp->sw_if_index); + + VALIDATE_SW_IF_INDEX (mp); + + rv = vnet_set_flow_classify_intfc (vm, sw_if_index, ip4_table_index, + ip6_table_index, mp->is_add); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_FLOW_CLASSIFY_SET_INTERFACE_REPLY); +} + +static void +send_flow_classify_details (u32 sw_if_index, + u32 table_index, + unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_flow_classify_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_FLOW_CLASSIFY_DETAILS); + mp->context = context; + mp->sw_if_index = htonl (sw_if_index); + mp->table_index = htonl (table_index); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_flow_classify_dump_t_handler (vl_api_flow_classify_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + flow_classify_main_t *pcm = &flow_classify_main; + u32 *vec_tbl; + int i; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + vec_tbl = pcm->classify_table_index_by_sw_if_index[mp->type]; + + if (vec_len (vec_tbl)) + { + for (i = 0; i < vec_len (vec_tbl); i++) + { + if (vec_elt (vec_tbl, i) == ~0) + continue; + + send_flow_classify_details (i, vec_elt (vec_tbl, i), q, + mp->context); + } + } +} + +/* + * classify_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_classify; +#undef _ +} + +static clib_error_t * +classify_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (classify_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index 1024f92c..c7789650 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -52,6 +52,7 @@ #include #include #include +#include /* * fd.io coding-style-patch-verification: ON diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 3afc3383..6e7e9c10 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -62,10 +62,7 @@ #include #include #include -#include #include -#include -#include #include #include #include @@ -139,8 +136,6 @@ _(CLI_REQUEST, cli_request) \ _(CLI_INBAND, cli_inband) \ _(SET_ARP_NEIGHBOR_LIMIT, set_arp_neighbor_limit) \ _(L2_PATCH_ADD_DEL, l2_patch_add_del) \ -_(CLASSIFY_ADD_DEL_TABLE, classify_add_del_table) \ -_(CLASSIFY_ADD_DEL_SESSION, classify_add_del_session) \ _(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table) \ _(CLASSIFY_SET_INTERFACE_L2_TABLES, classify_set_interface_l2_tables) \ _(GET_NODE_INDEX, get_node_index) \ @@ -165,13 +160,6 @@ _(IOAM_ENABLE, ioam_enable) \ _(IOAM_DISABLE, ioam_disable) \ _(POLICER_ADD_DEL, policer_add_del) \ _(POLICER_DUMP, policer_dump) \ -_(POLICER_CLASSIFY_SET_INTERFACE, policer_classify_set_interface) \ -_(POLICER_CLASSIFY_DUMP, policer_classify_dump) \ -_(CLASSIFY_TABLE_IDS,classify_table_ids) \ -_(CLASSIFY_TABLE_BY_INTERFACE, classify_table_by_interface) \ -_(CLASSIFY_TABLE_INFO,classify_table_info) \ -_(CLASSIFY_SESSION_DUMP,classify_session_dump) \ -_(CLASSIFY_SESSION_DETAILS,classify_session_details) \ _(SET_IPFIX_EXPORTER, set_ipfix_exporter) \ _(IPFIX_EXPORTER_DUMP, ipfix_exporter_dump) \ _(SET_IPFIX_CLASSIFY_STREAM, set_ipfix_classify_stream) \ @@ -189,8 +177,6 @@ _(IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL, \ _(DELETE_SUBIF, delete_subif) \ _(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) \ _(PUNT, punt) \ -_(FLOW_CLASSIFY_SET_INTERFACE, flow_classify_set_interface) \ -_(FLOW_CLASSIFY_DUMP, flow_classify_dump) \ _(FEATURE_ENABLE_DISABLE, feature_enable_disable) #define QUOTE_(x) #x @@ -1412,99 +1398,6 @@ vl_api_set_arp_neighbor_limit_t_handler (vl_api_set_arp_neighbor_limit_t * mp) REPLY_MACRO (VL_API_SET_ARP_NEIGHBOR_LIMIT_REPLY); } -#define foreach_classify_add_del_table_field \ -_(table_index) \ -_(nbuckets) \ -_(memory_size) \ -_(skip_n_vectors) \ -_(match_n_vectors) \ -_(next_table_index) \ -_(miss_next_index) \ -_(current_data_flag) \ -_(current_data_offset) - -static void vl_api_classify_add_del_table_t_handler - (vl_api_classify_add_del_table_t * mp) -{ - vl_api_classify_add_del_table_reply_t *rmp; - vnet_classify_main_t *cm = &vnet_classify_main; - vnet_classify_table_t *t; - int rv; - -#define _(a) u32 a; - foreach_classify_add_del_table_field; -#undef _ - -#define _(a) a = ntohl(mp->a); - foreach_classify_add_del_table_field; -#undef _ - - /* The underlying API fails silently, on purpose, so check here */ - if (mp->is_add == 0) /* delete */ - { - if (pool_is_free_index (cm->tables, table_index)) - { - rv = VNET_API_ERROR_NO_SUCH_TABLE; - goto out; - } - } - else /* add or update */ - { - if (table_index != ~0 && pool_is_free_index (cm->tables, table_index)) - table_index = ~0; - } - - rv = vnet_classify_add_del_table - (cm, mp->mask, nbuckets, memory_size, - skip_n_vectors, match_n_vectors, - next_table_index, miss_next_index, &table_index, - current_data_flag, current_data_offset, mp->is_add, mp->del_chain); - -out: - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_CLASSIFY_ADD_DEL_TABLE_REPLY, - ({ - if (rv == 0 && mp->is_add) - { - t = pool_elt_at_index (cm->tables, table_index); - rmp->skip_n_vectors = ntohl(t->skip_n_vectors); - rmp->match_n_vectors = ntohl(t->match_n_vectors); - rmp->new_table_index = ntohl(table_index); - } - else - { - rmp->skip_n_vectors = ~0; - rmp->match_n_vectors = ~0; - rmp->new_table_index = ~0; - } - })); - /* *INDENT-ON* */ -} - -static void vl_api_classify_add_del_session_t_handler - (vl_api_classify_add_del_session_t * mp) -{ - vnet_classify_main_t *cm = &vnet_classify_main; - vl_api_classify_add_del_session_reply_t *rmp; - int rv; - u32 table_index, hit_next_index, opaque_index, metadata; - i32 advance; - u8 action; - - table_index = ntohl (mp->table_index); - hit_next_index = ntohl (mp->hit_next_index); - opaque_index = ntohl (mp->opaque_index); - advance = ntohl (mp->advance); - action = mp->action; - metadata = ntohl (mp->metadata); - - rv = vnet_classify_add_del_session - (cm, table_index, mp->match, hit_next_index, opaque_index, - advance, action, metadata, mp->is_add); - - REPLY_MACRO (VL_API_CLASSIFY_ADD_DEL_SESSION_REPLY); -} - static void vl_api_classify_set_interface_ip_table_t_handler (vl_api_classify_set_interface_ip_table_t * mp) { @@ -2528,288 +2421,6 @@ vl_api_policer_dump_t_handler (vl_api_policer_dump_t * mp) } } -static void - vl_api_policer_classify_set_interface_t_handler - (vl_api_policer_classify_set_interface_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - vl_api_policer_classify_set_interface_reply_t *rmp; - int rv; - u32 sw_if_index, ip4_table_index, ip6_table_index, l2_table_index; - - ip4_table_index = ntohl (mp->ip4_table_index); - ip6_table_index = ntohl (mp->ip6_table_index); - l2_table_index = ntohl (mp->l2_table_index); - sw_if_index = ntohl (mp->sw_if_index); - - VALIDATE_SW_IF_INDEX (mp); - - rv = vnet_set_policer_classify_intfc (vm, sw_if_index, ip4_table_index, - ip6_table_index, l2_table_index, - mp->is_add); - - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_POLICER_CLASSIFY_SET_INTERFACE_REPLY); -} - -static void -send_policer_classify_details (u32 sw_if_index, - u32 table_index, - unix_shared_memory_queue_t * q, u32 context) -{ - vl_api_policer_classify_details_t *mp; - - mp = vl_msg_api_alloc (sizeof (*mp)); - memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_POLICER_CLASSIFY_DETAILS); - mp->context = context; - mp->sw_if_index = htonl (sw_if_index); - mp->table_index = htonl (table_index); - - vl_msg_api_send_shmem (q, (u8 *) & mp); -} - -static void -vl_api_policer_classify_dump_t_handler (vl_api_policer_classify_dump_t * mp) -{ - unix_shared_memory_queue_t *q; - policer_classify_main_t *pcm = &policer_classify_main; - u32 *vec_tbl; - int i; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - return; - - vec_tbl = pcm->classify_table_index_by_sw_if_index[mp->type]; - - if (vec_len (vec_tbl)) - { - for (i = 0; i < vec_len (vec_tbl); i++) - { - if (vec_elt (vec_tbl, i) == ~0) - continue; - - send_policer_classify_details (i, vec_elt (vec_tbl, i), q, - mp->context); - } - } -} - -static void -vl_api_classify_table_ids_t_handler (vl_api_classify_table_ids_t * mp) -{ - unix_shared_memory_queue_t *q; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - return; - - vnet_classify_main_t *cm = &vnet_classify_main; - vnet_classify_table_t *t; - u32 *table_ids = 0; - u32 count; - - /* *INDENT-OFF* */ - pool_foreach (t, cm->tables, - ({ - vec_add1 (table_ids, ntohl(t - cm->tables)); - })); - /* *INDENT-ON* */ - count = vec_len (table_ids); - - vl_api_classify_table_ids_reply_t *rmp; - rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp) + count * sizeof (u32)); - rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_TABLE_IDS_REPLY); - rmp->context = mp->context; - rmp->count = ntohl (count); - clib_memcpy (rmp->ids, table_ids, count * sizeof (u32)); - rmp->retval = 0; - - vl_msg_api_send_shmem (q, (u8 *) & rmp); - - vec_free (table_ids); -} - -static void - vl_api_classify_table_by_interface_t_handler - (vl_api_classify_table_by_interface_t * mp) -{ - vl_api_classify_table_by_interface_reply_t *rmp; - int rv = 0; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 *acl = 0; - - vec_validate (acl, INPUT_ACL_N_TABLES - 1); - vec_set (acl, ~0); - - VALIDATE_SW_IF_INDEX (mp); - - input_acl_main_t *am = &input_acl_main; - - int if_idx; - u32 type; - - for (type = 0; type < INPUT_ACL_N_TABLES; type++) - { - u32 *vec_tbl = am->classify_table_index_by_sw_if_index[type]; - if (vec_len (vec_tbl)) - { - for (if_idx = 0; if_idx < vec_len (vec_tbl); if_idx++) - { - if (vec_elt (vec_tbl, if_idx) == ~0 || sw_if_index != if_idx) - { - continue; - } - acl[type] = vec_elt (vec_tbl, if_idx); - } - } - } - - BAD_SW_IF_INDEX_LABEL; - - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_CLASSIFY_TABLE_BY_INTERFACE_REPLY, - ({ - rmp->sw_if_index = ntohl(sw_if_index); - rmp->l2_table_id = ntohl(acl[INPUT_ACL_TABLE_L2]); - rmp->ip4_table_id = ntohl(acl[INPUT_ACL_TABLE_IP4]); - rmp->ip6_table_id = ntohl(acl[INPUT_ACL_TABLE_IP6]); - })); - /* *INDENT-ON* */ - vec_free (acl); -} - -static void -vl_api_classify_table_info_t_handler (vl_api_classify_table_info_t * mp) -{ - unix_shared_memory_queue_t *q; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - return; - - vl_api_classify_table_info_reply_t *rmp = 0; - - vnet_classify_main_t *cm = &vnet_classify_main; - u32 table_id = ntohl (mp->table_id); - vnet_classify_table_t *t; - - /* *INDENT-OFF* */ - pool_foreach (t, cm->tables, - ({ - if (table_id == t - cm->tables) - { - rmp = vl_msg_api_alloc_as_if_client - (sizeof (*rmp) + t->match_n_vectors * sizeof (u32x4)); - rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_TABLE_INFO_REPLY); - rmp->context = mp->context; - rmp->table_id = ntohl(table_id); - rmp->nbuckets = ntohl(t->nbuckets); - rmp->match_n_vectors = ntohl(t->match_n_vectors); - rmp->skip_n_vectors = ntohl(t->skip_n_vectors); - rmp->active_sessions = ntohl(t->active_elements); - rmp->next_table_index = ntohl(t->next_table_index); - rmp->miss_next_index = ntohl(t->miss_next_index); - rmp->mask_length = ntohl(t->match_n_vectors * sizeof (u32x4)); - clib_memcpy(rmp->mask, t->mask, t->match_n_vectors * sizeof(u32x4)); - rmp->retval = 0; - break; - } - })); - /* *INDENT-ON* */ - - if (rmp == 0) - { - rmp = vl_msg_api_alloc (sizeof (*rmp)); - rmp->_vl_msg_id = ntohs ((VL_API_CLASSIFY_TABLE_INFO_REPLY)); - rmp->context = mp->context; - rmp->retval = ntohl (VNET_API_ERROR_CLASSIFY_TABLE_NOT_FOUND); - } - - vl_msg_api_send_shmem (q, (u8 *) & rmp); -} - -static void -vl_api_classify_session_details_t_handler (vl_api_classify_session_details_t * - mp) -{ - clib_warning ("BUG"); -} - -static void -send_classify_session_details (unix_shared_memory_queue_t * q, - u32 table_id, - u32 match_length, - vnet_classify_entry_t * e, u32 context) -{ - vl_api_classify_session_details_t *rmp; - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_SESSION_DETAILS); - rmp->context = context; - rmp->table_id = ntohl (table_id); - rmp->hit_next_index = ntohl (e->next_index); - rmp->advance = ntohl (e->advance); - rmp->opaque_index = ntohl (e->opaque_index); - rmp->match_length = ntohl (match_length); - clib_memcpy (rmp->match, e->key, match_length); - - vl_msg_api_send_shmem (q, (u8 *) & rmp); -} - -static void -vl_api_classify_session_dump_t_handler (vl_api_classify_session_dump_t * mp) -{ - vnet_classify_main_t *cm = &vnet_classify_main; - unix_shared_memory_queue_t *q; - - u32 table_id = ntohl (mp->table_id); - vnet_classify_table_t *t; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (!q) - return; - - /* *INDENT-OFF* */ - pool_foreach (t, cm->tables, - ({ - if (table_id == t - cm->tables) - { - vnet_classify_bucket_t * b; - vnet_classify_entry_t * v, * save_v; - int i, j, k; - - for (i = 0; i < t->nbuckets; i++) - { - b = &t->buckets [i]; - if (b->offset == 0) - continue; - - save_v = vnet_classify_get_entry (t, b->offset); - for (j = 0; j < (1<log2_pages); j++) - { - for (k = 0; k < t->entries_per_page; k++) - { - v = vnet_classify_entry_at_index - (t, save_v, j*t->entries_per_page + k); - if (vnet_classify_entry_is_free (v)) - continue; - - send_classify_session_details - (q, table_id, t->match_n_vectors * sizeof (u32x4), - v, mp->context); - } - } - } - break; - } - })); - /* *INDENT-ON* */ -} static void vl_api_set_ipfix_exporter_t_handler (vl_api_set_ipfix_exporter_t * mp) @@ -3399,73 +3010,6 @@ vl_api_punt_t_handler (vl_api_punt_t * mp) REPLY_MACRO (VL_API_PUNT_REPLY); } -static void - vl_api_flow_classify_set_interface_t_handler - (vl_api_flow_classify_set_interface_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - vl_api_flow_classify_set_interface_reply_t *rmp; - int rv; - u32 sw_if_index, ip4_table_index, ip6_table_index; - - ip4_table_index = ntohl (mp->ip4_table_index); - ip6_table_index = ntohl (mp->ip6_table_index); - sw_if_index = ntohl (mp->sw_if_index); - - VALIDATE_SW_IF_INDEX (mp); - - rv = vnet_set_flow_classify_intfc (vm, sw_if_index, ip4_table_index, - ip6_table_index, mp->is_add); - - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_FLOW_CLASSIFY_SET_INTERFACE_REPLY); -} - -static void -send_flow_classify_details (u32 sw_if_index, - u32 table_index, - unix_shared_memory_queue_t * q, u32 context) -{ - vl_api_flow_classify_details_t *mp; - - mp = vl_msg_api_alloc (sizeof (*mp)); - memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_FLOW_CLASSIFY_DETAILS); - mp->context = context; - mp->sw_if_index = htonl (sw_if_index); - mp->table_index = htonl (table_index); - - vl_msg_api_send_shmem (q, (u8 *) & mp); -} - -static void -vl_api_flow_classify_dump_t_handler (vl_api_flow_classify_dump_t * mp) -{ - unix_shared_memory_queue_t *q; - flow_classify_main_t *pcm = &flow_classify_main; - u32 *vec_tbl; - int i; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - return; - - vec_tbl = pcm->classify_table_index_by_sw_if_index[mp->type]; - - if (vec_len (vec_tbl)) - { - for (i = 0; i < vec_len (vec_tbl); i++) - { - if (vec_elt (vec_tbl, i) == ~0) - continue; - - send_flow_classify_details (i, vec_elt (vec_tbl, i), q, - mp->context); - } - } -} - static void vl_api_feature_enable_disable_t_handler (vl_api_feature_enable_disable_t * mp) { diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index e784fa01..a9b34d1f 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -26,7 +26,7 @@ * IP APIs: see .../src/vnet/ip/{ip.api, ip_api.c} * TAP APIs: see .../src/vnet/unix/{tap.api, tap_api.c} * VXLAN APIs: see .../src/vnet/vxlan/{vxlan.api, vxlan_api.c} - * AF-PACKET APIs: ... see /vnet/devices/af_packet/{af_packet.api, af_packet_api.c} + * AF-PACKET APIs: see ... /vnet/devices/af_packet/{af_packet.api, af_packet_api.c} * NETMAP APIs: see ... /src/vnet/devices/netmap/{netmap.api, netmap_api.c} * VHOST-USER APIs: see .../vnet/devices/virtio/{vhost_user.api, vhost_user_api.c} * VXLAN GPE APIs: see .../src/vnet/vxlan-gpe/{vxlan_gpe.api, vxlan_gpe_api.c} @@ -40,7 +40,8 @@ * LISP-GPE APIs: see .../src/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c} * MPLS APIs: see .../src/vnet/mpls/{mpls.api, mpls_api.c} * SR APIs: see .../src/vnet/sr/{sr.api, sr_api.c} - * DPDK APIs: ... see /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} + * DPDK APIs: see ... /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} + * CLASSIFY APIs: see ... /src/vnet/classify/{classify.api, classify_api.c} */ /** \brief Create a new subinterface with the given vlan id @@ -685,109 +686,6 @@ define bd_ip_mac_add_del_reply i32 retval; }; -/** \brief Add/Delete classification table request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param is_add- if non-zero add the table, else delete it - @param del_chain - if non-zero delete the whole chain of tables - @param table_index - if add, reuturns index of the created table, else specifies the table to delete - @param nbuckets - number of buckets when adding a table - @param memory_size - memory size when adding a table - @param match_n_vectors - number of match vectors - @param next_table_index - index of next table - @param miss_next_index - index of miss table - @param current_data_flag - option to use current node's packet payload - as the starting point from where packets are classified, - This option is only valid for L2/L3 input ACL for now. - 0: by default, classify data from the buffer's start location - 1: classify packets from VPP node’s current data pointer - @param current_data_offset - a signed value to shift the start location of - the packet to be classified - For example, if input IP ACL node is used, L2 header’s first byte - can be accessible by configuring current_data_offset to -14 - if there is no vlan tag. - This is valid only if current_data_flag is set to 1. - @param mask[] - match mask -*/ -define classify_add_del_table -{ - u32 client_index; - u32 context; - u8 is_add; - u8 del_chain; - u32 table_index; - u32 nbuckets; - u32 memory_size; - u32 skip_n_vectors; - u32 match_n_vectors; - u32 next_table_index; - u32 miss_next_index; - u32 current_data_flag; - i32 current_data_offset; - u8 mask[0]; -}; - -/** \brief Add/Delete classification table response - @param context - sender context, to match reply w/ request - @param retval - return code for the table add/del requst - @param new_table_index - for add, returned index of the new table - @param skip_n_vectors - for add, returned value of skip_n_vectors in table - @param match_n_vectors -for add, returned value of match_n_vectors in table -*/ -define classify_add_del_table_reply -{ - u32 context; - i32 retval; - u32 new_table_index; - u32 skip_n_vectors; - u32 match_n_vectors; -}; - -/** \brief Classify add / del session request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param is_add - add session if non-zero, else delete - @param table_index - index of the table to add/del the session, required - @param hit_next_index - for add, hit_next_index of new session, required - @param opaque_index - for add, opaque_index of new session - @param advance -for add, advance value for session - @param action - - 0: no action (by default) - metadata is not used. - 1: Classified IP packets will be looked up from the - specified ipv4 fib table (configured by metadata as VRF id). - Only valid for L3 input ACL node - 2: Classified IP packets will be looked up from the - specified ipv6 fib table (configured by metadata as VRF id). - Only valid for L3 input ACL node - @param metadata - valid only if action != 0 - VRF id if action is 1 or 2. - @param match[] - for add, match value for session, required -*/ -define classify_add_del_session -{ - u32 client_index; - u32 context; - u8 is_add; - u32 table_index; - u32 hit_next_index; - u32 opaque_index; - i32 advance; - u8 action; - u32 metadata; - u8 match[0]; -}; - -/** \brief Classify add / del session response - @param context - sender context, to match reply w/ request - @param retval - return code for the add/del session request -*/ -define classify_add_del_session_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset the classification table for an interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1491,187 +1389,6 @@ define policer_details u64 last_update_time; }; -/** \brief Set/unset policer classify interface - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - interface to set/unset policer classify - @param ip4_table_index - ip4 classify table index (~0 for skip) - @param ip6_table_index - ip6 classify table index (~0 for skip) - @param l2_table_index - l2 classify table index (~0 for skip) - @param is_add - Set if non-zero, else unset - Note: User is recommeneded to use just one valid table_index per call. - (ip4_table_index, ip6_table_index, or l2_table_index) -*/ -define policer_classify_set_interface -{ - u32 client_index; - u32 context; - u32 sw_if_index; - u32 ip4_table_index; - u32 ip6_table_index; - u32 l2_table_index; - u8 is_add; -}; - -/** \brief Set/unset policer classify interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define policer_classify_set_interface_reply -{ - u32 context; - i32 retval; -}; - -/** \brief Get list of policer classify interfaces and tables - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param type - classify table type -*/ -define policer_classify_dump -{ - u32 client_index; - u32 context; - u8 type; -}; - -/** \brief Policer iclassify operational state response. - @param context - sender context, to match reply w/ request - @param sw_if_index - software interface index - @param table_index - classify table index -*/ -define policer_classify_details -{ - u32 context; - u32 sw_if_index; - u32 table_index; -}; - -/** \brief Classify get table IDs request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request -*/ -define classify_table_ids -{ - u32 client_index; - u32 context; -}; - -/** \brief Reply for classify get table IDs request - @param context - sender context which was passed in the request - @param count - number of ids returned in response - @param ids - array of classify table ids -*/ -define classify_table_ids_reply -{ - u32 context; - i32 retval; - u32 count; - u32 ids[count]; -}; - -/** \brief Classify table ids by interface index request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - index of the interface -*/ -define classify_table_by_interface -{ - u32 client_index; - u32 context; - u32 sw_if_index; -}; - -/** \brief Reply for classify table id by interface index request - @param context - sender context which was passed in the request - @param count - number of ids returned in response - @param sw_if_index - index of the interface - @param l2_table_id - l2 classify table index - @param ip4_table_id - ip4 classify table index - @param ip6_table_id - ip6 classify table index -*/ -define classify_table_by_interface_reply -{ - u32 context; - i32 retval; - u32 sw_if_index; - u32 l2_table_id; - u32 ip4_table_id; - u32 ip6_table_id; -}; - -/** \brief Classify table info - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param table_id - classify table index -*/ -define classify_table_info -{ - u32 client_index; - u32 context; - u32 table_id; -}; - -/** \brief Reply for classify table info request - @param context - sender context which was passed in the request - @param count - number of ids returned in response - @param table_id - classify table index - @param nbuckets - number of buckets when adding a table - @param match_n_vectors - number of match vectors - @param skip_n_vectors - number of skip_n_vectors - @param active_sessions - number of sessions (active entries) - @param next_table_index - index of next table - @param miss_next_index - index of miss table - @param mask[] - match mask -*/ -define classify_table_info_reply -{ - u32 context; - i32 retval; - u32 table_id; - u32 nbuckets; - u32 match_n_vectors; - u32 skip_n_vectors; - u32 active_sessions; - u32 next_table_index; - u32 miss_next_index; - u32 mask_length; - u8 mask[mask_length]; -}; - -/** \brief Classify sessions dump request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param table_id - classify table index -*/ -define classify_session_dump -{ - u32 client_index; - u32 context; - u32 table_id; -}; - -/** \brief Reply for classify table session dump request - @param context - sender context which was passed in the request - @param count - number of ids returned in response - @param table_id - classify table index - @param hit_next_index - hit_next_index of session - @param opaque_index - for add, opaque_index of session - @param advance - advance value of session - @param match[] - match value for session -*/ -define classify_session_details -{ - u32 context; - i32 retval; - u32 table_id; - u32 hit_next_index; - i32 advance; - u32 opaque_index; - u32 match_length; - u8 match[match_length]; -}; - /** \brief Configure IPFIX exporter process request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1825,57 +1542,6 @@ define ipfix_classify_table_details { u8 transport_protocol; }; -/** \brief Set/unset flow classify interface - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - interface to set/unset flow classify - @param ip4_table_index - ip4 classify table index (~0 for skip) - @param ip6_table_index - ip6 classify table index (~0 for skip) - @param l2_table_index - l2 classify table index (~0 for skip) - @param is_add - Set if non-zero, else unset - Note: User is recommeneded to use just one valid table_index per call. - (ip4_table_index, ip6_table_index, or l2_table_index) -*/ -define flow_classify_set_interface { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 ip4_table_index; - u32 ip6_table_index; - u8 is_add; -}; - -/** \brief Set/unset flow classify interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define flow_classify_set_interface_reply { - u32 context; - i32 retval; -}; - -/** \brief Get list of flow classify interfaces and tables - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param type - classify table type -*/ -define flow_classify_dump { - u32 client_index; - u32 context; - u8 type; -}; - -/** \brief Flow classify operational state response. - @param context - sender context, to match reply w/ request - @param sw_if_index - software interface index - @param table_index - classify table index -*/ -define flow_classify_details { - u32 context; - u32 sw_if_index; - u32 table_index; -}; - /** \brief Query relative index via node names @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request -- cgit 1.2.3-korg From eb9e666a3c7045d36e2b2ae40122f40a0b5f9c65 Mon Sep 17 00:00:00 2001 From: Pavel Kotucek Date: Tue, 24 Jan 2017 13:40:26 +0100 Subject: API refactoring : flow Change-Id: I99e913b954f8b02f347bfeff093856a1c5e96781 Signed-off-by: Pavel Kotucek --- src/vnet.am | 8 +- src/vnet/flow/flow.api | 173 ++++++++++++++++++++ src/vnet/flow/flow_api.c | 397 ++++++++++++++++++++++++++++++++++++++++++++++ src/vnet/vnet_all_api_h.h | 1 + src/vpp/api/api.c | 299 ---------------------------------- src/vpp/api/vpe.api | 154 +----------------- 6 files changed, 578 insertions(+), 454 deletions(-) create mode 100644 src/vnet/flow/flow.api create mode 100644 src/vnet/flow/flow_api.c (limited to 'src/vpp/api') diff --git a/src/vnet.am b/src/vnet.am index 660efcf5..c751c100 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -707,12 +707,16 @@ nobase_include_HEADERS += \ # IPFIX / netflow v10 ######################################## libvnet_la_SOURCES += \ - vnet/flow/flow_report.c + vnet/flow/flow_report.c \ + vnet/flow/flow_api.c nobase_include_HEADERS += \ vnet/flow/flow_report.h \ vnet/flow/ipfix_info_elements.h \ - vnet/flow/ipfix_packet.h + vnet/flow/ipfix_packet.h \ + vnet/flow/flow.api.h + +API_FILES += vnet/flow/flow.api ######################################## # IPFIX classify code diff --git a/src/vnet/flow/flow.api b/src/vnet/flow/flow.api new file mode 100644 index 00000000..0e0f99bf --- /dev/null +++ b/src/vnet/flow/flow.api @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief Configure IPFIX exporter process request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param collector_address - address of IPFIX collector + @param collector_port - port of IPFIX collector + @param src_address - address of IPFIX exporter + @param vrf_id - VRF / fib table ID + @param path_mtu - Path MTU between exporter and collector + @param template_interval - number of seconds after which to resend template + @param udp_checksum - UDP checksum calculation enable flag +*/ +define set_ipfix_exporter +{ + u32 client_index; + u32 context; + u8 collector_address[16]; + u16 collector_port; + u8 src_address[16]; + u32 vrf_id; + u32 path_mtu; + u32 template_interval; + u8 udp_checksum; +}; + +/** \brief Reply to IPFIX exporter configure request + @param context - sender context which was passed in the request +*/ +define set_ipfix_exporter_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IPFIX exporter dump request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define ipfix_exporter_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply to IPFIX exporter dump request + @param context - sender context which was passed in the request + @param collector_address - address of IPFIX collector + @param collector_port - port of IPFIX collector + @param src_address - address of IPFIX exporter + @param fib_index - fib table index + @param path_mtu - Path MTU between exporter and collector + @param template_interval - number of seconds after which to resend template + @param udp_checksum - UDP checksum calculation enable flag +*/ +define ipfix_exporter_details +{ + u32 context; + u8 collector_address[16]; + u16 collector_port; + u8 src_address[16]; + u32 vrf_id; + u32 path_mtu; + u32 template_interval; + u8 udp_checksum; +}; + +/** \brief IPFIX classify stream configure request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param domain_id - domain ID reported in IPFIX messages for classify stream + @param src_port - source port of UDP session for classify stream +*/ +define set_ipfix_classify_stream { + u32 client_index; + u32 context; + u32 domain_id; + u16 src_port; +}; + +/** \brief IPFIX classify stream configure response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define set_ipfix_classify_stream_reply { + u32 context; + i32 retval; +}; + +/** \brief IPFIX classify stream dump request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define ipfix_classify_stream_dump { + u32 client_index; + u32 context; +}; + +/** \brief Reply to IPFIX classify stream dump request + @param context - sender context, to match reply w/ request + @param domain_id - domain ID reported in IPFIX messages for classify stream + @param src_port - source port of UDP session for classify stream +*/ +define ipfix_classify_stream_details { + u32 context; + u32 domain_id; + u16 src_port; +}; + +/** \brief IPFIX add or delete classifier table request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param table_id - classifier table ID + @param ip_version - version of IP used in the classifier table + @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified +*/ +define ipfix_classify_table_add_del { + u32 client_index; + u32 context; + u32 table_id; + u8 ip_version; + u8 transport_protocol; + u8 is_add; +}; + +/** \brief IPFIX add classifier table response + @param context - sender context which was passed in the request +*/ +define ipfix_classify_table_add_del_reply { + u32 context; + i32 retval; +}; + +/** \brief IPFIX classify tables dump request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define ipfix_classify_table_dump { + u32 client_index; + u32 context; +}; + +/** \brief Reply to IPFIX classify tables dump request + @param context - sender context, to match reply w/ request + @param table_id - classifier table ID + @param ip_version - version of IP used in the classifier table + @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified +*/ +define ipfix_classify_table_details { + u32 context; + u32 table_id; + u8 ip_version; + u8 transport_protocol; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/flow/flow_api.c b/src/vnet/flow/flow_api.c new file mode 100644 index 00000000..b975dda1 --- /dev/null +++ b/src/vnet/flow/flow_api.c @@ -0,0 +1,397 @@ +/* + *------------------------------------------------------------------ + * flow_api.c - flow api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include + +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(SET_IPFIX_EXPORTER, set_ipfix_exporter) \ +_(IPFIX_EXPORTER_DUMP, ipfix_exporter_dump) \ +_(SET_IPFIX_CLASSIFY_STREAM, set_ipfix_classify_stream) \ +_(IPFIX_CLASSIFY_STREAM_DUMP, ipfix_classify_stream_dump) \ +_(IPFIX_CLASSIFY_TABLE_ADD_DEL, ipfix_classify_table_add_del) \ +_(IPFIX_CLASSIFY_TABLE_DUMP, ipfix_classify_table_dump) + +static void +vl_api_set_ipfix_exporter_t_handler (vl_api_set_ipfix_exporter_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + flow_report_main_t *frm = &flow_report_main; + vl_api_set_ipfix_exporter_reply_t *rmp; + ip4_address_t collector, src; + u16 collector_port = UDP_DST_PORT_ipfix; + u32 path_mtu; + u32 template_interval; + u8 udp_checksum; + u32 fib_id; + u32 fib_index = ~0; + int rv = 0; + + memcpy (collector.data, mp->collector_address, sizeof (collector.data)); + collector_port = ntohs (mp->collector_port); + if (collector_port == (u16) ~ 0) + collector_port = UDP_DST_PORT_ipfix; + memcpy (src.data, mp->src_address, sizeof (src.data)); + fib_id = ntohl (mp->vrf_id); + + ip4_main_t *im = &ip4_main; + if (fib_id == ~0) + { + fib_index = ~0; + } + else + { + uword *p = hash_get (im->fib_index_by_table_id, fib_id); + if (!p) + { + rv = VNET_API_ERROR_NO_SUCH_FIB; + goto out; + } + fib_index = p[0]; + } + + path_mtu = ntohl (mp->path_mtu); + if (path_mtu == ~0) + path_mtu = 512; // RFC 7011 section 10.3.3. + template_interval = ntohl (mp->template_interval); + if (template_interval == ~0) + template_interval = 20; + udp_checksum = mp->udp_checksum; + + if (collector.as_u32 == 0) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + if (src.as_u32 == 0) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + if (path_mtu > 1450 /* vpp does not support fragmentation */ ) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + if (path_mtu < 68) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + /* Reset report streams if we are reconfiguring IP addresses */ + if (frm->ipfix_collector.as_u32 != collector.as_u32 || + frm->src_address.as_u32 != src.as_u32 || + frm->collector_port != collector_port) + vnet_flow_reports_reset (frm); + + frm->ipfix_collector.as_u32 = collector.as_u32; + frm->collector_port = collector_port; + frm->src_address.as_u32 = src.as_u32; + frm->fib_index = fib_index; + frm->path_mtu = path_mtu; + frm->template_interval = template_interval; + frm->udp_checksum = udp_checksum; + + /* Turn on the flow reporting process */ + vlib_process_signal_event (vm, flow_report_process_node.index, 1, 0); + +out: + REPLY_MACRO (VL_API_SET_IPFIX_EXPORTER_REPLY); +} + +static void +vl_api_ipfix_exporter_dump_t_handler (vl_api_ipfix_exporter_dump_t * mp) +{ + flow_report_main_t *frm = &flow_report_main; + unix_shared_memory_queue_t *q; + vl_api_ipfix_exporter_details_t *rmp; + ip4_main_t *im = &ip4_main; + u32 vrf_id; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_IPFIX_EXPORTER_DETAILS); + rmp->context = mp->context; + memcpy (rmp->collector_address, frm->ipfix_collector.data, + sizeof (frm->ipfix_collector.data)); + rmp->collector_port = htons (frm->collector_port); + memcpy (rmp->src_address, frm->src_address.data, + sizeof (frm->src_address.data)); + if (frm->fib_index == ~0) + vrf_id = ~0; + else + vrf_id = im->fibs[frm->fib_index].ft_table_id; + rmp->vrf_id = htonl (vrf_id); + rmp->path_mtu = htonl (frm->path_mtu); + rmp->template_interval = htonl (frm->template_interval); + rmp->udp_checksum = (frm->udp_checksum != 0); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void + vl_api_set_ipfix_classify_stream_t_handler + (vl_api_set_ipfix_classify_stream_t * mp) +{ + vl_api_set_ipfix_classify_stream_reply_t *rmp; + flow_report_classify_main_t *fcm = &flow_report_classify_main; + flow_report_main_t *frm = &flow_report_main; + u32 domain_id = 0; + u32 src_port = UDP_DST_PORT_ipfix; + int rv = 0; + + domain_id = ntohl (mp->domain_id); + src_port = ntohs (mp->src_port); + + if (fcm->src_port != 0 && + (fcm->domain_id != domain_id || fcm->src_port != (u16) src_port)) + { + int rv = vnet_stream_change (frm, fcm->domain_id, fcm->src_port, + domain_id, (u16) src_port); + ASSERT (rv == 0); + } + + fcm->domain_id = domain_id; + fcm->src_port = (u16) src_port; + + REPLY_MACRO (VL_API_SET_IPFIX_CLASSIFY_STREAM_REPLY); +} + +static void + vl_api_ipfix_classify_stream_dump_t_handler + (vl_api_ipfix_classify_stream_dump_t * mp) +{ + flow_report_classify_main_t *fcm = &flow_report_classify_main; + unix_shared_memory_queue_t *q; + vl_api_ipfix_classify_stream_details_t *rmp; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_IPFIX_CLASSIFY_STREAM_DETAILS); + rmp->context = mp->context; + rmp->domain_id = htonl (fcm->domain_id); + rmp->src_port = htons (fcm->src_port); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void + vl_api_ipfix_classify_table_add_del_t_handler + (vl_api_ipfix_classify_table_add_del_t * mp) +{ + vl_api_ipfix_classify_table_add_del_reply_t *rmp; + flow_report_classify_main_t *fcm = &flow_report_classify_main; + flow_report_main_t *frm = &flow_report_main; + vnet_flow_report_add_del_args_t args; + ipfix_classify_table_t *table; + int is_add; + u32 classify_table_index; + u8 ip_version; + u8 transport_protocol; + int rv = 0; + + classify_table_index = ntohl (mp->table_id); + ip_version = mp->ip_version; + transport_protocol = mp->transport_protocol; + is_add = mp->is_add; + + if (fcm->src_port == 0) + { + /* call set_ipfix_classify_stream first */ + rv = VNET_API_ERROR_UNSPECIFIED; + goto out; + } + + memset (&args, 0, sizeof (args)); + + table = 0; + int i; + for (i = 0; i < vec_len (fcm->tables); i++) + if (ipfix_classify_table_index_valid (i)) + if (fcm->tables[i].classify_table_index == classify_table_index) + { + table = &fcm->tables[i]; + break; + } + + if (is_add) + { + if (table) + { + rv = VNET_API_ERROR_VALUE_EXIST; + goto out; + } + table = ipfix_classify_add_table (); + table->classify_table_index = classify_table_index; + } + else + { + if (!table) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto out; + } + } + + table->ip_version = ip_version; + table->transport_protocol = transport_protocol; + + args.opaque.as_uword = table - fcm->tables; + args.rewrite_callback = ipfix_classify_template_rewrite; + args.flow_data_callback = ipfix_classify_send_flows; + args.is_add = is_add; + args.domain_id = fcm->domain_id; + args.src_port = fcm->src_port; + + rv = vnet_flow_report_add_del (frm, &args); + + /* If deleting, or add failed */ + if (is_add == 0 || (rv && is_add)) + ipfix_classify_delete_table (table - fcm->tables); + +out: + REPLY_MACRO (VL_API_SET_IPFIX_CLASSIFY_STREAM_REPLY); +} + +static void +send_ipfix_classify_table_details (u32 table_index, + unix_shared_memory_queue_t * q, + u32 context) +{ + flow_report_classify_main_t *fcm = &flow_report_classify_main; + vl_api_ipfix_classify_table_details_t *mp; + + ipfix_classify_table_t *table = &fcm->tables[table_index]; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IPFIX_CLASSIFY_TABLE_DETAILS); + mp->context = context; + mp->table_id = htonl (table->classify_table_index); + mp->ip_version = table->ip_version; + mp->transport_protocol = table->transport_protocol; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void + vl_api_ipfix_classify_table_dump_t_handler + (vl_api_ipfix_classify_table_dump_t * mp) +{ + flow_report_classify_main_t *fcm = &flow_report_classify_main; + unix_shared_memory_queue_t *q; + u32 i; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + for (i = 0; i < vec_len (fcm->tables); i++) + if (ipfix_classify_table_index_valid (i)) + send_ipfix_classify_table_details (i, q, mp->context); +} + +/* + * flow_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_flow; +#undef _ +} + +static clib_error_t * +flow_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (flow_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index c7789650..d76eee5a 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -53,6 +53,7 @@ #include #include #include +#include /* * fd.io coding-style-patch-verification: ON diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 6e7e9c10..9f6f260b 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -72,8 +72,6 @@ #include #include #include -#include -#include #include #include @@ -160,12 +158,6 @@ _(IOAM_ENABLE, ioam_enable) \ _(IOAM_DISABLE, ioam_disable) \ _(POLICER_ADD_DEL, policer_add_del) \ _(POLICER_DUMP, policer_dump) \ -_(SET_IPFIX_EXPORTER, set_ipfix_exporter) \ -_(IPFIX_EXPORTER_DUMP, ipfix_exporter_dump) \ -_(SET_IPFIX_CLASSIFY_STREAM, set_ipfix_classify_stream) \ -_(IPFIX_CLASSIFY_STREAM_DUMP, ipfix_classify_stream_dump) \ -_(IPFIX_CLASSIFY_TABLE_ADD_DEL, ipfix_classify_table_add_del) \ -_(IPFIX_CLASSIFY_TABLE_DUMP, ipfix_classify_table_dump) \ _(GET_NEXT_INDEX, get_next_index) \ _(PG_CREATE_INTERFACE, pg_create_interface) \ _(PG_CAPTURE, pg_capture) \ @@ -2422,297 +2414,6 @@ vl_api_policer_dump_t_handler (vl_api_policer_dump_t * mp) } -static void -vl_api_set_ipfix_exporter_t_handler (vl_api_set_ipfix_exporter_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - flow_report_main_t *frm = &flow_report_main; - vl_api_set_ipfix_exporter_reply_t *rmp; - ip4_address_t collector, src; - u16 collector_port = UDP_DST_PORT_ipfix; - u32 path_mtu; - u32 template_interval; - u8 udp_checksum; - u32 fib_id; - u32 fib_index = ~0; - int rv = 0; - - memcpy (collector.data, mp->collector_address, sizeof (collector.data)); - collector_port = ntohs (mp->collector_port); - if (collector_port == (u16) ~ 0) - collector_port = UDP_DST_PORT_ipfix; - memcpy (src.data, mp->src_address, sizeof (src.data)); - fib_id = ntohl (mp->vrf_id); - - ip4_main_t *im = &ip4_main; - if (fib_id == ~0) - { - fib_index = ~0; - } - else - { - uword *p = hash_get (im->fib_index_by_table_id, fib_id); - if (!p) - { - rv = VNET_API_ERROR_NO_SUCH_FIB; - goto out; - } - fib_index = p[0]; - } - - path_mtu = ntohl (mp->path_mtu); - if (path_mtu == ~0) - path_mtu = 512; // RFC 7011 section 10.3.3. - template_interval = ntohl (mp->template_interval); - if (template_interval == ~0) - template_interval = 20; - udp_checksum = mp->udp_checksum; - - if (collector.as_u32 == 0) - { - rv = VNET_API_ERROR_INVALID_VALUE; - goto out; - } - - if (src.as_u32 == 0) - { - rv = VNET_API_ERROR_INVALID_VALUE; - goto out; - } - - if (path_mtu > 1450 /* vpp does not support fragmentation */ ) - { - rv = VNET_API_ERROR_INVALID_VALUE; - goto out; - } - - if (path_mtu < 68) - { - rv = VNET_API_ERROR_INVALID_VALUE; - goto out; - } - - /* Reset report streams if we are reconfiguring IP addresses */ - if (frm->ipfix_collector.as_u32 != collector.as_u32 || - frm->src_address.as_u32 != src.as_u32 || - frm->collector_port != collector_port) - vnet_flow_reports_reset (frm); - - frm->ipfix_collector.as_u32 = collector.as_u32; - frm->collector_port = collector_port; - frm->src_address.as_u32 = src.as_u32; - frm->fib_index = fib_index; - frm->path_mtu = path_mtu; - frm->template_interval = template_interval; - frm->udp_checksum = udp_checksum; - - /* Turn on the flow reporting process */ - vlib_process_signal_event (vm, flow_report_process_node.index, 1, 0); - -out: - REPLY_MACRO (VL_API_SET_IPFIX_EXPORTER_REPLY); -} - -static void -vl_api_ipfix_exporter_dump_t_handler (vl_api_ipfix_exporter_dump_t * mp) -{ - flow_report_main_t *frm = &flow_report_main; - unix_shared_memory_queue_t *q; - vl_api_ipfix_exporter_details_t *rmp; - ip4_main_t *im = &ip4_main; - u32 vrf_id; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (!q) - return; - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_IPFIX_EXPORTER_DETAILS); - rmp->context = mp->context; - memcpy (rmp->collector_address, frm->ipfix_collector.data, - sizeof (frm->ipfix_collector.data)); - rmp->collector_port = htons (frm->collector_port); - memcpy (rmp->src_address, frm->src_address.data, - sizeof (frm->src_address.data)); - if (frm->fib_index == ~0) - vrf_id = ~0; - else - vrf_id = im->fibs[frm->fib_index].ft_table_id; - rmp->vrf_id = htonl (vrf_id); - rmp->path_mtu = htonl (frm->path_mtu); - rmp->template_interval = htonl (frm->template_interval); - rmp->udp_checksum = (frm->udp_checksum != 0); - - vl_msg_api_send_shmem (q, (u8 *) & rmp); -} - -static void - vl_api_set_ipfix_classify_stream_t_handler - (vl_api_set_ipfix_classify_stream_t * mp) -{ - vl_api_set_ipfix_classify_stream_reply_t *rmp; - flow_report_classify_main_t *fcm = &flow_report_classify_main; - flow_report_main_t *frm = &flow_report_main; - u32 domain_id = 0; - u32 src_port = UDP_DST_PORT_ipfix; - int rv = 0; - - domain_id = ntohl (mp->domain_id); - src_port = ntohs (mp->src_port); - - if (fcm->src_port != 0 && - (fcm->domain_id != domain_id || fcm->src_port != (u16) src_port)) - { - int rv = vnet_stream_change (frm, fcm->domain_id, fcm->src_port, - domain_id, (u16) src_port); - ASSERT (rv == 0); - } - - fcm->domain_id = domain_id; - fcm->src_port = (u16) src_port; - - REPLY_MACRO (VL_API_SET_IPFIX_CLASSIFY_STREAM_REPLY); -} - -static void - vl_api_ipfix_classify_stream_dump_t_handler - (vl_api_ipfix_classify_stream_dump_t * mp) -{ - flow_report_classify_main_t *fcm = &flow_report_classify_main; - unix_shared_memory_queue_t *q; - vl_api_ipfix_classify_stream_details_t *rmp; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (!q) - return; - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_IPFIX_CLASSIFY_STREAM_DETAILS); - rmp->context = mp->context; - rmp->domain_id = htonl (fcm->domain_id); - rmp->src_port = htons (fcm->src_port); - - vl_msg_api_send_shmem (q, (u8 *) & rmp); -} - -static void - vl_api_ipfix_classify_table_add_del_t_handler - (vl_api_ipfix_classify_table_add_del_t * mp) -{ - vl_api_ipfix_classify_table_add_del_reply_t *rmp; - flow_report_classify_main_t *fcm = &flow_report_classify_main; - flow_report_main_t *frm = &flow_report_main; - vnet_flow_report_add_del_args_t args; - ipfix_classify_table_t *table; - int is_add; - u32 classify_table_index; - u8 ip_version; - u8 transport_protocol; - int rv = 0; - - classify_table_index = ntohl (mp->table_id); - ip_version = mp->ip_version; - transport_protocol = mp->transport_protocol; - is_add = mp->is_add; - - if (fcm->src_port == 0) - { - /* call set_ipfix_classify_stream first */ - rv = VNET_API_ERROR_UNSPECIFIED; - goto out; - } - - memset (&args, 0, sizeof (args)); - - table = 0; - int i; - for (i = 0; i < vec_len (fcm->tables); i++) - if (ipfix_classify_table_index_valid (i)) - if (fcm->tables[i].classify_table_index == classify_table_index) - { - table = &fcm->tables[i]; - break; - } - - if (is_add) - { - if (table) - { - rv = VNET_API_ERROR_VALUE_EXIST; - goto out; - } - table = ipfix_classify_add_table (); - table->classify_table_index = classify_table_index; - } - else - { - if (!table) - { - rv = VNET_API_ERROR_NO_SUCH_ENTRY; - goto out; - } - } - - table->ip_version = ip_version; - table->transport_protocol = transport_protocol; - - args.opaque.as_uword = table - fcm->tables; - args.rewrite_callback = ipfix_classify_template_rewrite; - args.flow_data_callback = ipfix_classify_send_flows; - args.is_add = is_add; - args.domain_id = fcm->domain_id; - args.src_port = fcm->src_port; - - rv = vnet_flow_report_add_del (frm, &args); - - /* If deleting, or add failed */ - if (is_add == 0 || (rv && is_add)) - ipfix_classify_delete_table (table - fcm->tables); - -out: - REPLY_MACRO (VL_API_SET_IPFIX_CLASSIFY_STREAM_REPLY); -} - -static void -send_ipfix_classify_table_details (u32 table_index, - unix_shared_memory_queue_t * q, - u32 context) -{ - flow_report_classify_main_t *fcm = &flow_report_classify_main; - vl_api_ipfix_classify_table_details_t *mp; - - ipfix_classify_table_t *table = &fcm->tables[table_index]; - - mp = vl_msg_api_alloc (sizeof (*mp)); - memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_IPFIX_CLASSIFY_TABLE_DETAILS); - mp->context = context; - mp->table_id = htonl (table->classify_table_index); - mp->ip_version = table->ip_version; - mp->transport_protocol = table->transport_protocol; - - vl_msg_api_send_shmem (q, (u8 *) & mp); -} - -static void - vl_api_ipfix_classify_table_dump_t_handler - (vl_api_ipfix_classify_table_dump_t * mp) -{ - flow_report_classify_main_t *fcm = &flow_report_classify_main; - unix_shared_memory_queue_t *q; - u32 i; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (!q) - return; - - for (i = 0; i < vec_len (fcm->tables); i++) - if (ipfix_classify_table_index_valid (i)) - send_ipfix_classify_table_details (i, q, mp->context); -} - static void vl_api_pg_create_interface_t_handler (vl_api_pg_create_interface_t * mp) { diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index a9b34d1f..f32ba670 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -42,6 +42,7 @@ * SR APIs: see .../src/vnet/sr/{sr.api, sr_api.c} * DPDK APIs: see ... /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} * CLASSIFY APIs: see ... /src/vnet/classify/{classify.api, classify_api.c} + * FLOW APIs: see ... /src/vnet/flow/{flow.api, flow_api.c} */ /** \brief Create a new subinterface with the given vlan id @@ -1389,159 +1390,6 @@ define policer_details u64 last_update_time; }; -/** \brief Configure IPFIX exporter process request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param collector_address - address of IPFIX collector - @param collector_port - port of IPFIX collector - @param src_address - address of IPFIX exporter - @param vrf_id - VRF / fib table ID - @param path_mtu - Path MTU between exporter and collector - @param template_interval - number of seconds after which to resend template - @param udp_checksum - UDP checksum calculation enable flag -*/ -define set_ipfix_exporter -{ - u32 client_index; - u32 context; - u8 collector_address[16]; - u16 collector_port; - u8 src_address[16]; - u32 vrf_id; - u32 path_mtu; - u32 template_interval; - u8 udp_checksum; -}; - -/** \brief Reply to IPFIX exporter configure request - @param context - sender context which was passed in the request -*/ -define set_ipfix_exporter_reply -{ - u32 context; - i32 retval; -}; - -/** \brief IPFIX exporter dump request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request -*/ -define ipfix_exporter_dump -{ - u32 client_index; - u32 context; -}; - -/** \brief Reply to IPFIX exporter dump request - @param context - sender context which was passed in the request - @param collector_address - address of IPFIX collector - @param collector_port - port of IPFIX collector - @param src_address - address of IPFIX exporter - @param fib_index - fib table index - @param path_mtu - Path MTU between exporter and collector - @param template_interval - number of seconds after which to resend template - @param udp_checksum - UDP checksum calculation enable flag -*/ -define ipfix_exporter_details -{ - u32 context; - u8 collector_address[16]; - u16 collector_port; - u8 src_address[16]; - u32 vrf_id; - u32 path_mtu; - u32 template_interval; - u8 udp_checksum; -}; - -/** \brief IPFIX classify stream configure request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param domain_id - domain ID reported in IPFIX messages for classify stream - @param src_port - source port of UDP session for classify stream -*/ -define set_ipfix_classify_stream { - u32 client_index; - u32 context; - u32 domain_id; - u16 src_port; -}; - -/** \brief IPFIX classify stream configure response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define set_ipfix_classify_stream_reply { - u32 context; - i32 retval; -}; - -/** \brief IPFIX classify stream dump request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request -*/ -define ipfix_classify_stream_dump { - u32 client_index; - u32 context; -}; - -/** \brief Reply to IPFIX classify stream dump request - @param context - sender context, to match reply w/ request - @param domain_id - domain ID reported in IPFIX messages for classify stream - @param src_port - source port of UDP session for classify stream -*/ -define ipfix_classify_stream_details { - u32 context; - u32 domain_id; - u16 src_port; -}; - -/** \brief IPFIX add or delete classifier table request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param table_id - classifier table ID - @param ip_version - version of IP used in the classifier table - @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified -*/ -define ipfix_classify_table_add_del { - u32 client_index; - u32 context; - u32 table_id; - u8 ip_version; - u8 transport_protocol; - u8 is_add; -}; - -/** \brief IPFIX add classifier table response - @param context - sender context which was passed in the request -*/ -define ipfix_classify_table_add_del_reply { - u32 context; - i32 retval; -}; - -/** \brief IPFIX classify tables dump request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request -*/ -define ipfix_classify_table_dump { - u32 client_index; - u32 context; -}; - -/** \brief Reply to IPFIX classify tables dump request - @param context - sender context, to match reply w/ request - @param table_id - classifier table ID - @param ip_version - version of IP used in the classifier table - @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified -*/ -define ipfix_classify_table_details { - u32 context; - u32 table_id; - u8 ip_version; - u8 transport_protocol; -}; - /** \brief Query relative index via node names @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request -- cgit 1.2.3-korg From 044183faacc5eb8a055658f9deebefb56b254adc Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 24 Jan 2017 01:34:25 -0800 Subject: [re]Enable per-Adjacency/neighbour counters Change-Id: I953b3888bbc6d8a5f53f684a5edc8742b382f323 Signed-off-by: Neale Ranns --- src/scripts/vnet/ip6 | 18 +- src/vat/api_format.c | 124 ++++++++++++++ src/vat/vat.h | 18 ++ src/vlib/counter.h | 14 ++ src/vnet/adj/adj.c | 4 + src/vnet/adj/adj_l2.c | 10 +- src/vnet/ip/ip4_forward.c | 42 +++-- src/vnet/ip/ip6_forward.c | 24 +-- src/vnet/mpls/mpls_output.c | 37 ++-- src/vpp/api/vpe.api | 41 +++++ src/vpp/stats/stats.c | 405 +++++++++++++++++++++++++++++++++++++++++++- 11 files changed, 675 insertions(+), 62 deletions(-) (limited to 'src/vpp/api') diff --git a/src/scripts/vnet/ip6 b/src/scripts/vnet/ip6 index 4f9f3ee5..adb27225 100644 --- a/src/scripts/vnet/ip6 +++ b/src/scripts/vnet/ip6 @@ -6,10 +6,24 @@ packet-generator new { no-recycle data { IP6: 1.2.3 -> 4.5.6 - ICMP: ::1 -> ::2 + ICMP: 3002::2 -> 3001::2 ICMP echo_request incrementing 100 } } -tr add pg-input 100 + +loop create +loop create +set int state loop0 up +set int state loop1 up + +set int ip address loop0 2001:1::1/64 +set int ip address loop1 2001:2::1/64 + +set ip6 neighbor loop0 2001:1::2 00:00:DD:EE:AA:DD +set ip6 neighbor loop1 2001:2::2 00:00:DD:EE:AA:EE + +ip route add 3001::/64 via 2001:2::2 loop1 + +trace add pg-input 100 diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 653cf79f..839bcdaa 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -2042,6 +2042,42 @@ static void vl_api_vnet_ip4_fib_counters_t_handler_json } } +static void vl_api_vnet_ip4_nbr_counters_t_handler + (vl_api_vnet_ip4_nbr_counters_t * mp) +{ + /* not supported */ +} + +static void vl_api_vnet_ip4_nbr_counters_t_handler_json + (vl_api_vnet_ip4_nbr_counters_t * mp) +{ + vat_main_t *vam = &vat_main; + vl_api_ip4_nbr_counter_t *v; + ip4_nbr_counter_t *counter; + u32 sw_if_index; + u32 count; + int i; + + sw_if_index = ntohl (mp->sw_if_index); + count = ntohl (mp->count); + vec_validate (vam->ip4_nbr_counters, sw_if_index); + + if (mp->begin) + vec_free (vam->ip4_nbr_counters[sw_if_index]); + + v = (vl_api_ip4_nbr_counter_t *) & mp->c; + for (i = 0; i < count; i++) + { + vec_validate (vam->ip4_nbr_counters[sw_if_index], i); + counter = &vam->ip4_nbr_counters[sw_if_index][i]; + counter->address.s_addr = v->address; + counter->packets = clib_net_to_host_u64 (v->packets); + counter->bytes = clib_net_to_host_u64 (v->bytes); + counter->linkt = v->link_type; + v++; + } +} + static void vl_api_vnet_ip6_fib_counters_t_handler (vl_api_vnet_ip6_fib_counters_t * mp) { @@ -2087,6 +2123,43 @@ static void vl_api_vnet_ip6_fib_counters_t_handler_json } } +static void vl_api_vnet_ip6_nbr_counters_t_handler + (vl_api_vnet_ip6_nbr_counters_t * mp) +{ + /* not supported */ +} + +static void vl_api_vnet_ip6_nbr_counters_t_handler_json + (vl_api_vnet_ip6_nbr_counters_t * mp) +{ + vat_main_t *vam = &vat_main; + vl_api_ip6_nbr_counter_t *v; + ip6_nbr_counter_t *counter; + struct in6_addr ip6; + u32 sw_if_index; + u32 count; + int i; + + sw_if_index = ntohl (mp->sw_if_index); + count = ntohl (mp->count); + vec_validate (vam->ip6_nbr_counters, sw_if_index); + + if (mp->begin) + vec_free (vam->ip6_nbr_counters[sw_if_index]); + + v = (vl_api_ip6_nbr_counter_t *) & mp->c; + for (i = 0; i < count; i++) + { + vec_validate (vam->ip6_nbr_counters[sw_if_index], i); + counter = &vam->ip6_nbr_counters[sw_if_index][i]; + clib_memcpy (&ip6, &v->address, sizeof (ip6)); + counter->address = ip6; + counter->packets = clib_net_to_host_u64 (v->packets); + counter->bytes = clib_net_to_host_u64 (v->bytes); + v++; + } +} + static void vl_api_get_first_msg_id_reply_t_handler (vl_api_get_first_msg_id_reply_t * mp) { @@ -3490,6 +3563,10 @@ static void vl_api_flow_classify_details_t_handler_json #define vl_api_vnet_ip4_fib_counters_t_print vl_noop_handler #define vl_api_vnet_ip6_fib_counters_t_endian vl_noop_handler #define vl_api_vnet_ip6_fib_counters_t_print vl_noop_handler +#define vl_api_vnet_ip4_nbr_counters_t_endian vl_noop_handler +#define vl_api_vnet_ip4_nbr_counters_t_print vl_noop_handler +#define vl_api_vnet_ip6_nbr_counters_t_endian vl_noop_handler +#define vl_api_vnet_ip6_nbr_counters_t_print vl_noop_handler #define vl_api_lisp_adjacencies_get_reply_t_endian vl_noop_handler #define vl_api_lisp_adjacencies_get_reply_t_print vl_noop_handler @@ -3799,6 +3876,8 @@ _(DHCP_COMPL_EVENT, dhcp_compl_event) \ _(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ _(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ _(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ +_(VNET_IP4_NBR_COUNTERS, vnet_ip4_nbr_counters) \ +_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) \ _(MAP_ADD_DOMAIN_REPLY, map_add_domain_reply) \ _(MAP_DEL_DOMAIN_REPLY, map_del_domain_reply) \ _(MAP_ADD_DEL_RULE_REPLY, map_add_del_rule_reply) \ @@ -4131,6 +4210,8 @@ dump_stats_table (vat_main_t * vam) u64 packets; ip4_fib_counter_t *c4; ip6_fib_counter_t *c6; + ip4_nbr_counter_t *n4; + ip6_nbr_counter_t *n6; int i, j; if (!vam->json_output) @@ -4226,6 +4307,49 @@ dump_stats_table (vat_main_t * vam) } } + /* ip4 nbr counters */ + msg_array = vat_json_object_add (&node, "ip4_nbr_counters"); + vat_json_init_array (msg_array); + for (i = 0; i < vec_len (vam->ip4_nbr_counters); i++) + { + msg = vat_json_array_add (msg_array); + vat_json_init_object (msg); + vat_json_object_add_uint (msg, "sw_if_index", i); + counter_array = vat_json_object_add (msg, "c"); + vat_json_init_array (counter_array); + for (j = 0; j < vec_len (vam->ip4_nbr_counters[i]); j++) + { + counter = vat_json_array_add (counter_array); + vat_json_init_object (counter); + n4 = &vam->ip4_nbr_counters[i][j]; + vat_json_object_add_ip4 (counter, "address", n4->address); + vat_json_object_add_uint (counter, "link-type", n4->linkt); + vat_json_object_add_uint (counter, "packets", n4->packets); + vat_json_object_add_uint (counter, "bytes", n4->bytes); + } + } + + /* ip6 nbr counters */ + msg_array = vat_json_object_add (&node, "ip6_nbr_counters"); + vat_json_init_array (msg_array); + for (i = 0; i < vec_len (vam->ip6_nbr_counters); i++) + { + msg = vat_json_array_add (msg_array); + vat_json_init_object (msg); + vat_json_object_add_uint (msg, "sw_if_index", i); + counter_array = vat_json_object_add (msg, "c"); + vat_json_init_array (counter_array); + for (j = 0; j < vec_len (vam->ip6_nbr_counters[i]); j++) + { + counter = vat_json_array_add (counter_array); + vat_json_init_object (counter); + n6 = &vam->ip6_nbr_counters[i][j]; + vat_json_object_add_ip6 (counter, "address", n6->address); + vat_json_object_add_uint (counter, "packets", n6->packets); + vat_json_object_add_uint (counter, "bytes", n6->bytes); + } + } + vat_json_print (vam->ofp, &node); vat_json_free (&node); diff --git a/src/vat/vat.h b/src/vat/vat.h index 3d7d96ae..831bdf50 100644 --- a/src/vat/vat.h +++ b/src/vat/vat.h @@ -95,6 +95,22 @@ typedef struct u64 bytes; } ip6_fib_counter_t; +typedef struct +{ + struct in_addr address; + vnet_link_t linkt; + u64 packets; + u64 bytes; +} ip4_nbr_counter_t; + +typedef struct +{ + struct in6_addr address; + vnet_link_t linkt; + u64 packets; + u64 bytes; +} ip6_nbr_counter_t; + typedef struct { /* vpe input queue */ @@ -185,6 +201,8 @@ typedef struct u32 *ip4_fib_counters_vrf_id_by_index; ip6_fib_counter_t **ip6_fib_counters; u32 *ip6_fib_counters_vrf_id_by_index; + ip4_nbr_counter_t **ip4_nbr_counters; + ip6_nbr_counter_t **ip6_nbr_counters; /* Convenience */ vlib_main_t *vlib_main; diff --git a/src/vlib/counter.h b/src/vlib/counter.h index a7903206..abfa89ee 100644 --- a/src/vlib/counter.h +++ b/src/vlib/counter.h @@ -273,6 +273,20 @@ vlib_increment_combined_counter (vlib_combined_counter_main_t * cm, } } +#define vlib_prefetch_combined_counter(_cm, _cpu_index, _index) \ +{ \ + vlib_mini_counter_t *_cpu_minis; \ + \ + /* \ + * This CPU's mini index is assumed to already be in cache \ + */ \ + _cpu_minis = (_cm)->minis[(_cpu_index)]; \ + CLIB_PREFETCH(_cpu_minis + (_index), \ + sizeof(*_cpu_minis), \ + STORE); \ +} + + /** Get the value of a combined counter, never called in the speed path Scrapes the entire set of mini counters. Innacurate unless worker threads which might increment the counter are diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index e740c4cb..d0be0f0e 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -122,6 +122,10 @@ format_ip_adjacency (u8 * s, va_list * args) if (fiaf & FORMAT_IP_ADJACENCY_DETAIL) { + vlib_counter_t counts; + + vlib_get_combined_counter(&adjacency_counters, adj_index, &counts); + s = format (s, "\n counts:[%Ld:%Ld]", counts.packets, counts.bytes); s = format (s, "\n locks:%d", adj->ia_node.fn_locks); s = format (s, " node:[%d]:%U", adj->rewrite_header.node_index, diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c index 4d2dd708..5a083643 100644 --- a/src/vnet/adj/adj_l2.c +++ b/src/vnet/adj/adj_l2.c @@ -94,11 +94,11 @@ adj_l2_rewrite_inline (vlib_main_t * vm, rw_len0 = adj0[0].rewrite_header.data_bytes; vnet_buffer(p0)->ip.save_rewrite_length = rw_len0; - vlib_increment_combined_counter - (&adjacency_counters, - cpu_index, adj_index0, - /* packet increment */ 0, - /* byte increment */ rw_len0-sizeof(ethernet_header_t)); + vlib_increment_combined_counter(&adjacency_counters, + cpu_index, + adj_index0, + /* packet increment */ 0, + /* byte increment */ rw_len0); /* Check MTU of outgoing interface. */ if (PREDICT_TRUE((vlib_buffer_length_in_chain (vm, p0) <= diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 6e91b9e9..87b345bd 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -2402,19 +2402,12 @@ ip4_rewrite_inline (vlib_main_t * vm, error1); /* - * We've already accounted for an ethernet_header_t elsewhere + * pre-fetch the per-adjacency counters */ - if (PREDICT_FALSE (rw_len0 > sizeof (ethernet_header_t))) - vlib_increment_combined_counter - (&adjacency_counters, cpu_index, adj_index0, - /* packet increment */ 0, - /* byte increment */ rw_len0 - sizeof (ethernet_header_t)); - - if (PREDICT_FALSE (rw_len1 > sizeof (ethernet_header_t))) - vlib_increment_combined_counter - (&adjacency_counters, cpu_index, adj_index1, - /* packet increment */ 0, - /* byte increment */ rw_len1 - sizeof (ethernet_header_t)); + vlib_prefetch_combined_counter (&adjacency_counters, + cpu_index, adj_index0); + vlib_prefetch_combined_counter (&adjacency_counters, + cpu_index, adj_index1); /* Don't adjust the buffer for ttl issue; icmp-error node wants * to see the IP headerr */ @@ -2446,6 +2439,19 @@ ip4_rewrite_inline (vlib_main_t * vm, vnet_rewrite_two_headers (adj0[0], adj1[0], ip0, ip1, sizeof (ethernet_header_t)); + /* + * Bump the per-adjacency counters + */ + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, + adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); + + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, + adj_index1, 1, vlib_buffer_length_in_chain (vm, p1) + rw_len1); + if (is_midchain) { adj0->sub_type.midchain.fixup_func (vm, adj0, p0); @@ -2519,6 +2525,9 @@ ip4_rewrite_inline (vlib_main_t * vm, p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; } + vlib_prefetch_combined_counter (&adjacency_counters, + cpu_index, adj_index0); + /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); @@ -2526,11 +2535,10 @@ ip4_rewrite_inline (vlib_main_t * vm, rw_len0 = adj0[0].rewrite_header.data_bytes; vnet_buffer (p0)->ip.save_rewrite_length = rw_len0; - if (PREDICT_FALSE (rw_len0 > sizeof (ethernet_header_t))) - vlib_increment_combined_counter - (&adjacency_counters, cpu_index, adj_index0, - /* packet increment */ 0, - /* byte increment */ rw_len0 - sizeof (ethernet_header_t)); + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, + adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); /* Check MTU of outgoing interface. */ error0 = (vlib_buffer_length_in_chain (vm, p0) diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 197a9b79..232f7283 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -2108,14 +2108,14 @@ ip6_rewrite_inline (vlib_main_t * vm, vnet_buffer (p0)->ip.save_rewrite_length = rw_len0; vnet_buffer (p1)->ip.save_rewrite_length = rw_len1; - vlib_increment_combined_counter (&adjacency_counters, - cpu_index, adj_index0, - /* packet increment */ 0, - /* byte increment */ rw_len0); - vlib_increment_combined_counter (&adjacency_counters, - cpu_index, adj_index1, - /* packet increment */ 0, - /* byte increment */ rw_len1); + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, + adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, adj_index1, + 1, vlib_buffer_length_in_chain (vm, p1) + rw_len1); /* Check MTU of outgoing interface. */ error0 = @@ -2233,10 +2233,10 @@ ip6_rewrite_inline (vlib_main_t * vm, rw_len0 = adj0[0].rewrite_header.data_bytes; vnet_buffer (p0)->ip.save_rewrite_length = rw_len0; - vlib_increment_combined_counter (&adjacency_counters, - cpu_index, adj_index0, - /* packet increment */ 0, - /* byte increment */ rw_len0); + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, + adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); /* Check MTU of outgoing interface. */ error0 = diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c index 8292a0cb..c06cf917 100644 --- a/src/vnet/mpls/mpls_output.c +++ b/src/vnet/mpls/mpls_output.c @@ -128,18 +128,19 @@ mpls_output_inline (vlib_main_t * vm, rw_len0 = adj0[0].rewrite_header.data_bytes; rw_len1 = adj1[0].rewrite_header.data_bytes; - if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) - vlib_increment_combined_counter - (&adjacency_counters, - cpu_index, adj_index0, - /* packet increment */ 0, - /* byte increment */ rw_len0-sizeof(ethernet_header_t)); - if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t))) - vlib_increment_combined_counter - (&adjacency_counters, - cpu_index, adj_index1, - /* packet increment */ 0, - /* byte increment */ rw_len1-sizeof(ethernet_header_t)); + /* Bump the adj counters for packet and bytes */ + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, + adj_index0, + 1, + vlib_buffer_length_in_chain (vm, p0) + rw_len0); + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, + adj_index1, + 1, + vlib_buffer_length_in_chain (vm, p1) + rw_len1); /* Check MTU of outgoing interface. */ if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <= @@ -234,12 +235,12 @@ mpls_output_inline (vlib_main_t * vm, /* Update packet buffer attributes/set output interface. */ rw_len0 = adj0[0].rewrite_header.data_bytes; - if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) - vlib_increment_combined_counter - (&adjacency_counters, - cpu_index, adj_index0, - /* packet increment */ 0, - /* byte increment */ rw_len0-sizeof(ethernet_header_t)); + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, + adj_index0, + 1, + vlib_buffer_length_in_chain (vm, p0) + rw_len0); /* Check MTU of outgoing interface. */ if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <= diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index f32ba670..a00033c5 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -230,6 +230,31 @@ manual_print manual_endian define vnet_ip4_fib_counters vl_api_ip4_fib_counter_t c[count]; }; +typeonly manual_print manual_endian define ip4_nbr_counter +{ + u32 address; + u8 link_type; + u64 packets; + u64 bytes; +}; + +/** + * @brief Per-neighbour (i.e. per-adjacency) coutners + * @param count The size of the array of counters + * @param sw_if_index The interface the adjacency is on + * @param begin Flag to indicate this is the first set of stats for this + * interface. If this flag is not set the it is a continuation of + * stats for this interface + * @param c counters + */ +manual_print manual_endian define vnet_ip4_nbr_counters +{ + u32 count; + u32 sw_if_index; + u8 begin; + vl_api_ip4_nbr_counter_t c[count]; +}; + typeonly manual_print manual_endian define ip6_fib_counter { u64 address[2]; @@ -245,6 +270,22 @@ manual_print manual_endian define vnet_ip6_fib_counters vl_api_ip6_fib_counter_t c[count]; }; +typeonly manual_print manual_endian define ip6_nbr_counter +{ + u64 address[2]; + u8 link_type; + u64 packets; + u64 bytes; +}; + +manual_print manual_endian define vnet_ip6_nbr_counters +{ + u32 count; + u32 sw_if_index; + u8 begin; + vl_api_ip6_nbr_counter_t c[count]; +}; + /** \brief Request for a single block of summary stats @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c index 391e02f6..5e9b0d69 100644 --- a/src/vpp/stats/stats.c +++ b/src/vpp/stats/stats.c @@ -49,7 +49,9 @@ _(WANT_STATS, want_stats) \ _(WANT_STATS_REPLY, want_stats_reply) \ _(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ _(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ -_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) +_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ +_(VNET_IP4_NBR_COUNTERS, vnet_ip4_nbr_counters) \ +_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) /* These constants ensure msg sizes <= 1024, aka ring allocation */ #define SIMPLE_COUNTER_BATCH_SIZE 126 @@ -258,6 +260,313 @@ ip46_fib_stats_delay (stats_main_t * sm, u32 sec, u32 nsec) } } +/** + * @brief The context passed when collecting adjacency counters + */ +typedef struct ip4_nbr_stats_ctx_t_ +{ + /** + * The SW IF index all these adjs belong to + */ + u32 sw_if_index; + + /** + * A vector of ip4 nbr counters + */ + vl_api_ip4_nbr_counter_t *counters; +} ip4_nbr_stats_ctx_t; + +static adj_walk_rc_t +ip4_nbr_stats_cb (adj_index_t ai, void *arg) +{ + vl_api_ip4_nbr_counter_t *vl_counter; + vlib_counter_t adj_counter; + ip4_nbr_stats_ctx_t *ctx; + ip_adjacency_t *adj; + + ctx = arg; + vlib_get_combined_counter (&adjacency_counters, ai, &adj_counter); + + if (0 != adj_counter.packets) + { + vec_add2 (ctx->counters, vl_counter, 1); + adj = adj_get (ai); + + vl_counter->packets = clib_host_to_net_u64 (adj_counter.packets); + vl_counter->bytes = clib_host_to_net_u64 (adj_counter.bytes); + vl_counter->address = adj->sub_type.nbr.next_hop.ip4.as_u32; + vl_counter->link_type = adj->ia_link; + } + return (ADJ_WALK_RC_CONTINUE); +} + +#define MIN(x,y) (((x)<(y))?(x):(y)) + +static void +ip4_nbr_ship (stats_main_t * sm, ip4_nbr_stats_ctx_t * ctx) +{ + api_main_t *am = sm->api_main; + vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr; + unix_shared_memory_queue_t *q = shmem_hdr->vl_input_queue; + vl_api_vnet_ip4_nbr_counters_t *mp = 0; + int first = 0; + + /* + * If the walk context has counters, which may be left over from the last + * suspend, then we continue from there. + */ + while (0 != vec_len (ctx->counters)) + { + u32 n_items = MIN (vec_len (ctx->counters), + IP4_FIB_COUNTER_BATCH_SIZE); + u8 pause = 0; + + dslock (sm, 0 /* release hint */ , 1 /* tag */ ); + + mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) + + (n_items * + sizeof + (vl_api_ip4_nbr_counter_t))); + mp->_vl_msg_id = ntohs (VL_API_VNET_IP4_NBR_COUNTERS); + mp->count = ntohl (n_items); + mp->sw_if_index = ntohl (ctx->sw_if_index); + mp->begin = first; + first = 0; + + /* + * copy the counters from the back of the context, then we can easily + * 'erase' them by resetting the vector length. + * The order we push the stats to the caller is not important. + */ + clib_memcpy (mp->c, + &ctx->counters[vec_len (ctx->counters) - n_items], + n_items * sizeof (*ctx->counters)); + + _vec_len (ctx->counters) = vec_len (ctx->counters) - n_items; + + /* + * send to the shm q + */ + unix_shared_memory_queue_lock (q); + pause = unix_shared_memory_queue_is_full (q); + + vl_msg_api_send_shmem_nolock (q, (u8 *) & mp); + unix_shared_memory_queue_unlock (q); + dsunlock (sm); + + if (pause) + ip46_fib_stats_delay (sm, 0 /* sec */ , + STATS_RELEASE_DELAY_NS); + } +} + +static void +do_ip4_nbrs (stats_main_t * sm) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + vnet_sw_interface_t *si; + + ip4_nbr_stats_ctx_t ctx = { + .sw_if_index = 0, + .counters = NULL, + }; + + /* *INDENT-OFF* */ + pool_foreach (si, im->sw_interfaces, + ({ + /* + * update the interface we are now concerned with + */ + ctx.sw_if_index = si->sw_if_index; + + /* + * we are about to walk another interface, so we shouldn't have any pending + * stats to export. + */ + ASSERT(ctx.counters == NULL); + + /* + * visit each neighbour adjacency on the interface and collect + * its current stats. + * Because we hold the lock the walk is synchronous, so safe to routing + * updates. It's limited in work by the number of adjacenies on an + * interface, which is typically not huge. + */ + dslock (sm, 0 /* release hint */ , 1 /* tag */ ); + adj_nbr_walk (si->sw_if_index, + FIB_PROTOCOL_IP4, + ip4_nbr_stats_cb, + &ctx); + dsunlock (sm); + + /* + * if this interface has some adjacencies with counters then ship them, + * else continue to the next interface. + */ + if (NULL != ctx.counters) + { + ip4_nbr_ship(sm, &ctx); + } + })); + /* *INDENT-OFF* */ +} + +/** + * @brief The context passed when collecting adjacency counters + */ +typedef struct ip6_nbr_stats_ctx_t_ +{ + /** + * The SW IF index all these adjs belong to + */ + u32 sw_if_index; + + /** + * A vector of ip6 nbr counters + */ + vl_api_ip6_nbr_counter_t *counters; +} ip6_nbr_stats_ctx_t; + +static adj_walk_rc_t +ip6_nbr_stats_cb (adj_index_t ai, + void *arg) +{ + vl_api_ip6_nbr_counter_t *vl_counter; + vlib_counter_t adj_counter; + ip6_nbr_stats_ctx_t *ctx; + ip_adjacency_t *adj; + + ctx = arg; + vlib_get_combined_counter(&adjacency_counters, ai, &adj_counter); + + if (0 != adj_counter.packets) + { + vec_add2(ctx->counters, vl_counter, 1); + adj = adj_get(ai); + + vl_counter->packets = clib_host_to_net_u64(adj_counter.packets); + vl_counter->bytes = clib_host_to_net_u64(adj_counter.bytes); + vl_counter->address[0] = adj->sub_type.nbr.next_hop.ip6.as_u64[0]; + vl_counter->address[1] = adj->sub_type.nbr.next_hop.ip6.as_u64[1]; + vl_counter->link_type = adj->ia_link; + } + return (ADJ_WALK_RC_CONTINUE); +} + +#define MIN(x,y) (((x)<(y))?(x):(y)) + +static void +ip6_nbr_ship (stats_main_t * sm, + ip6_nbr_stats_ctx_t *ctx) +{ + api_main_t *am = sm->api_main; + vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr; + unix_shared_memory_queue_t *q = shmem_hdr->vl_input_queue; + vl_api_vnet_ip6_nbr_counters_t *mp = 0; + int first = 0; + + /* + * If the walk context has counters, which may be left over from the last + * suspend, then we continue from there. + */ + while (0 != vec_len(ctx->counters)) + { + u32 n_items = MIN (vec_len (ctx->counters), + IP6_FIB_COUNTER_BATCH_SIZE); + u8 pause = 0; + + dslock (sm, 0 /* release hint */ , 1 /* tag */ ); + + mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) + + (n_items * + sizeof + (vl_api_ip6_nbr_counter_t))); + mp->_vl_msg_id = ntohs (VL_API_VNET_IP6_NBR_COUNTERS); + mp->count = ntohl (n_items); + mp->sw_if_index = ntohl (ctx->sw_if_index); + mp->begin = first; + first = 0; + + /* + * copy the counters from the back of the context, then we can easily + * 'erase' them by resetting the vector length. + * The order we push the stats to the caller is not important. + */ + clib_memcpy (mp->c, + &ctx->counters[vec_len (ctx->counters) - n_items], + n_items * sizeof (*ctx->counters)); + + _vec_len (ctx->counters) = vec_len (ctx->counters) - n_items; + + /* + * send to the shm q + */ + unix_shared_memory_queue_lock (q); + pause = unix_shared_memory_queue_is_full (q); + + vl_msg_api_send_shmem_nolock (q, (u8 *) & mp); + unix_shared_memory_queue_unlock (q); + dsunlock (sm); + + if (pause) + ip46_fib_stats_delay (sm, 0 /* sec */ , + STATS_RELEASE_DELAY_NS); + } +} + +static void +do_ip6_nbrs (stats_main_t * sm) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + vnet_sw_interface_t *si; + + ip6_nbr_stats_ctx_t ctx = { + .sw_if_index = 0, + .counters = NULL, + }; + + /* *INDENT-OFF* */ + pool_foreach (si, im->sw_interfaces, + ({ + /* + * update the interface we are now concerned with + */ + ctx.sw_if_index = si->sw_if_index; + + /* + * we are about to walk another interface, so we shouldn't have any pending + * stats to export. + */ + ASSERT(ctx.counters == NULL); + + /* + * visit each neighbour adjacency on the interface and collect + * its current stats. + * Because we hold the lock the walk is synchronous, so safe to routing + * updates. It's limited in work by the number of adjacenies on an + * interface, which is typically not huge. + */ + dslock (sm, 0 /* release hint */ , 1 /* tag */ ); + adj_nbr_walk (si->sw_if_index, + FIB_PROTOCOL_IP6, + ip6_nbr_stats_cb, + &ctx); + dsunlock (sm); + + /* + * if this interface has some adjacencies with counters then ship them, + * else continue to the next interface. + */ + if (NULL != ctx.counters) + { + ip6_nbr_ship(sm, &ctx); + } + })); + /* *INDENT-OFF* */ +} + static void do_ip4_fibs (stats_main_t * sm) { @@ -318,13 +627,7 @@ again: hash_foreach_pair (p, hash, ({ x.address.data_u32 = p->key; - if (lm->fib_result_n_words > 1) - { - x.index = vec_len (results); - vec_add (results, p->value, lm->fib_result_n_words); - } - else - x.index = p->value[0]; + x.index = p->value[0]; vec_add1 (routes, x); if (sm->data_structure_lock->release_hint) @@ -631,6 +934,8 @@ stats_thread_fn (void *arg) do_combined_interface_counters (sm); do_ip4_fibs (sm); do_ip6_fibs (sm); + do_ip4_nbrs (sm); + do_ip6_nbrs (sm); } } @@ -804,6 +1109,45 @@ vl_api_vnet_ip4_fib_counters_t_handler (vl_api_vnet_ip4_fib_counters_t * mp) } } +static void +vl_api_vnet_ip4_nbr_counters_t_handler (vl_api_vnet_ip4_nbr_counters_t * mp) +{ + vpe_client_registration_t *reg; + stats_main_t *sm = &stats_main; + unix_shared_memory_queue_t *q, *q_prev = NULL; + vl_api_vnet_ip4_nbr_counters_t *mp_copy = NULL; + u32 mp_size; + + mp_size = sizeof (*mp_copy) + + ntohl (mp->count) * sizeof (vl_api_ip4_nbr_counter_t); + + /* *INDENT-OFF* */ + pool_foreach(reg, sm->stats_registrations, + ({ + q = vl_api_client_index_to_input_queue (reg->client_index); + if (q) + { + if (q_prev && (q_prev->cursize < q_prev->maxsize)) + { + mp_copy = vl_msg_api_alloc_as_if_client(mp_size); + clib_memcpy(mp_copy, mp, mp_size); + vl_msg_api_send_shmem (q_prev, (u8 *)&mp); + mp = mp_copy; + } + q_prev = q; + } + })); + /* *INDENT-ON* */ + if (q_prev && (q_prev->cursize < q_prev->maxsize)) + { + vl_msg_api_send_shmem (q_prev, (u8 *) & mp); + } + else + { + vl_msg_api_free (mp); + } +} + static void vl_api_vnet_ip6_fib_counters_t_handler (vl_api_vnet_ip6_fib_counters_t * mp) { @@ -843,6 +1187,45 @@ vl_api_vnet_ip6_fib_counters_t_handler (vl_api_vnet_ip6_fib_counters_t * mp) } } +static void +vl_api_vnet_ip6_nbr_counters_t_handler (vl_api_vnet_ip6_nbr_counters_t * mp) +{ + vpe_client_registration_t *reg; + stats_main_t *sm = &stats_main; + unix_shared_memory_queue_t *q, *q_prev = NULL; + vl_api_vnet_ip6_nbr_counters_t *mp_copy = NULL; + u32 mp_size; + + mp_size = sizeof (*mp_copy) + + ntohl (mp->count) * sizeof (vl_api_ip6_nbr_counter_t); + + /* *INDENT-OFF* */ + pool_foreach(reg, sm->stats_registrations, + ({ + q = vl_api_client_index_to_input_queue (reg->client_index); + if (q) + { + if (q_prev && (q_prev->cursize < q_prev->maxsize)) + { + mp_copy = vl_msg_api_alloc_as_if_client(mp_size); + clib_memcpy(mp_copy, mp, mp_size); + vl_msg_api_send_shmem (q_prev, (u8 *)&mp); + mp = mp_copy; + } + q_prev = q; + } + })); + /* *INDENT-ON* */ + if (q_prev && (q_prev->cursize < q_prev->maxsize)) + { + vl_msg_api_send_shmem (q_prev, (u8 *) & mp); + } + else + { + vl_msg_api_free (mp); + } +} + static void vl_api_want_stats_reply_t_handler (vl_api_want_stats_reply_t * mp) { @@ -929,6 +1312,10 @@ stats_memclnt_delete_callback (u32 client_index) #define vl_api_vnet_ip4_fib_counters_t_print vl_noop_handler #define vl_api_vnet_ip6_fib_counters_t_endian vl_noop_handler #define vl_api_vnet_ip6_fib_counters_t_print vl_noop_handler +#define vl_api_vnet_ip4_nbr_counters_t_endian vl_noop_handler +#define vl_api_vnet_ip4_nbr_counters_t_print vl_noop_handler +#define vl_api_vnet_ip6_nbr_counters_t_endian vl_noop_handler +#define vl_api_vnet_ip6_nbr_counters_t_print vl_noop_handler static clib_error_t * stats_init (vlib_main_t * vm) @@ -961,6 +1348,8 @@ stats_init (vlib_main_t * vm) am->message_bounce[VL_API_VNET_INTERFACE_COUNTERS] = 1; am->message_bounce[VL_API_VNET_IP4_FIB_COUNTERS] = 1; am->message_bounce[VL_API_VNET_IP6_FIB_COUNTERS] = 1; + am->message_bounce[VL_API_VNET_IP4_NBR_COUNTERS] = 1; + am->message_bounce[VL_API_VNET_IP6_NBR_COUNTERS] = 1; return 0; } -- cgit 1.2.3-korg From c8d8770a3e09c300eeff461a11ef3723b8e029cb Mon Sep 17 00:00:00 2001 From: Pavel Kotucek Date: Wed, 25 Jan 2017 07:25:32 +0100 Subject: API refactoring : dhcp Change-Id: I3829835ed2126e51e96690c907deac623dc77151 Signed-off-by: Pavel Kotucek --- src/vnet.am | 8 +- src/vnet/dhcp/dhcp.api | 166 ++++++++++++++++++++++++++++++ src/vnet/dhcp/dhcp_api.c | 253 ++++++++++++++++++++++++++++++++++++++++++++++ src/vnet/vnet_all_api_h.h | 1 + src/vpp/api/api.c | 158 ----------------------------- src/vpp/api/vpe.api | 147 +-------------------------- 6 files changed, 427 insertions(+), 306 deletions(-) create mode 100644 src/vnet/dhcp/dhcp.api create mode 100644 src/vnet/dhcp/dhcp_api.c (limited to 'src/vpp/api') diff --git a/src/vnet.am b/src/vnet.am index 669ea1ff..c6922493 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -657,10 +657,14 @@ endif ######################################## libvnet_la_SOURCES += \ vnet/dhcp/client.c \ - vnet/dhcp/client.h + vnet/dhcp/client.h \ + vnet/dhcp/dhcp_api.c nobase_include_HEADERS += \ - vnet/dhcp/client.h + vnet/dhcp/client.h \ + vnet/dhcp/dhcp.api.h + +API_FILES += vnet/dhcp/dhcp.api ######################################## # DHCP proxy diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api new file mode 100644 index 00000000..c228cd04 --- /dev/null +++ b/src/vnet/dhcp/dhcp.api @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief DHCP Proxy config add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf_id - vrf id + @param if_ipv6 - ipv6 if non-zero, else ipv4 + @param is_add - add the config if non-zero, else delete + @param insert_circuit_id - option82 suboption 1 fib number + @param dhcp_server[] - server address + @param dhcp_src_address[] - +*/ +define dhcp_proxy_config +{ + u32 client_index; + u32 context; + u32 vrf_id; + u8 is_ipv6; + u8 is_add; + u8 insert_circuit_id; + u8 dhcp_server[16]; + u8 dhcp_src_address[16]; +}; + +/** \brief DHCP Proxy config response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define dhcp_proxy_config_reply +{ + u32 context; + i32 retval; +}; + +/** \brief DHCP Proxy config 2 add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param rx_vrf_id - receive vrf id + @param server_vrf_id - server vrf id + @param if_ipv6 - ipv6 if non-zero, else ipv4 + @param is_add - add the config if non-zero, else delete + @param insert_circuit_id - option82 suboption 1 fib number + @param dhcp_server[] - server address + @param dhcp_src_address[] - +*/ +define dhcp_proxy_config_2 +{ + u32 client_index; + u32 context; + u32 rx_vrf_id; + u32 server_vrf_id; + u8 is_ipv6; + u8 is_add; + u8 insert_circuit_id; + u8 dhcp_server[16]; + u8 dhcp_src_address[16]; +}; + +/** \brief DHCP Proxy config 2 add / del response + @param context - sender context, to match reply w/ request + @param retval - return code for request +*/ +define dhcp_proxy_config_2_reply +{ + u32 context; + i32 retval; +}; + +/** \brief DHCP Proxy set / unset vss request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param tbl_id - table id + @param oui - first part of vpn id + @param fib_id - second part of vpn id + @param is_ipv6 - ip6 if non-zero, else ip4 + @param is_add - set vss if non-zero, else delete +*/ +define dhcp_proxy_set_vss +{ + u32 client_index; + u32 context; + u32 tbl_id; + u32 oui; + u32 fib_id; + u8 is_ipv6; + u8 is_add; +}; + +/** \brief DHCP proxy set / unset vss response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define dhcp_proxy_set_vss_reply +{ + u32 context; + i32 retval; +}; + +/** \brief DHCP Client config add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - index of the interface for DHCP client + @param hostname - hostname + @param is_add - add the config if non-zero, else delete + @param want_dhcp_event - DHCP event sent to the sender + via dhcp_compl_event API message if non-zero + @param pid - sender's pid +*/ +define dhcp_client_config +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 hostname[64]; + u8 is_add; + u8 want_dhcp_event; + u32 pid; +}; + +/** \brief DHCP Client config response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define dhcp_client_config_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Tell client about a DHCP completion event + @param client_index - opaque cookie to identify the sender + @param pid - client pid registered to receive notification + @param is_ipv6 - if non-zero the address is ipv6, else ipv4 + @param host_address - Host IP address + @param router_address - Router IP address + @param host_mac - Host MAC address +*/ +define dhcp_compl_event +{ + u32 client_index; + u32 pid; + u8 hostname[64]; + u8 is_ipv6; + u8 host_address[16]; + u8 router_address[16]; + u8 host_mac[6]; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ \ No newline at end of file diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c new file mode 100644 index 00000000..88b32b24 --- /dev/null +++ b/src/vnet/dhcp/dhcp_api.c @@ -0,0 +1,253 @@ +/* + *------------------------------------------------------------------ + * dhcp_api.c - dhcp api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(DHCP_PROXY_CONFIG,dhcp_proxy_config) \ +_(DHCP_PROXY_CONFIG_2,dhcp_proxy_config_2) \ +_(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss) \ +_(DHCP_CLIENT_CONFIG, dhcp_client_config) + +static void +dhcpv4_proxy_config (vl_api_dhcp_proxy_config_t * mp) +{ + vl_api_dhcp_proxy_config_reply_t *rmp; + int rv; + + rv = dhcp_proxy_set_server ((ip4_address_t *) (&mp->dhcp_server), + (ip4_address_t *) (&mp->dhcp_src_address), + (u32) ntohl (mp->vrf_id), + (int) mp->insert_circuit_id, + (int) (mp->is_add == 0)); + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); +} + + +static void +dhcpv6_proxy_config (vl_api_dhcp_proxy_config_t * mp) +{ + vl_api_dhcp_proxy_config_reply_t *rmp; + int rv = -1; + + rv = dhcpv6_proxy_set_server ((ip6_address_t *) (&mp->dhcp_server), + (ip6_address_t *) (&mp->dhcp_src_address), + (u32) ntohl (mp->vrf_id), + (int) mp->insert_circuit_id, + (int) (mp->is_add == 0)); + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); +} + +static void +dhcpv4_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp) +{ + vl_api_dhcp_proxy_config_reply_t *rmp; + int rv; + + rv = dhcp_proxy_set_server_2 ((ip4_address_t *) (&mp->dhcp_server), + (ip4_address_t *) (&mp->dhcp_src_address), + (u32) ntohl (mp->rx_vrf_id), + (u32) ntohl (mp->server_vrf_id), + (int) mp->insert_circuit_id, + (int) (mp->is_add == 0)); + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY); +} + + +static void +dhcpv6_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp) +{ + vl_api_dhcp_proxy_config_reply_t *rmp; + int rv = -1; + + rv = dhcpv6_proxy_set_server_2 ((ip6_address_t *) (&mp->dhcp_server), + (ip6_address_t *) (&mp->dhcp_src_address), + (u32) ntohl (mp->rx_vrf_id), + (u32) ntohl (mp->server_vrf_id), + (int) mp->insert_circuit_id, + (int) (mp->is_add == 0)); + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY); +} + + +static void +vl_api_dhcp_proxy_set_vss_t_handler (vl_api_dhcp_proxy_set_vss_t * mp) +{ + vl_api_dhcp_proxy_set_vss_reply_t *rmp; + int rv; + if (!mp->is_ipv6) + rv = dhcp_proxy_set_option82_vss (ntohl (mp->tbl_id), + ntohl (mp->oui), + ntohl (mp->fib_id), + (int) mp->is_add == 0); + else + rv = dhcpv6_proxy_set_vss (ntohl (mp->tbl_id), + ntohl (mp->oui), + ntohl (mp->fib_id), (int) mp->is_add == 0); + + REPLY_MACRO (VL_API_DHCP_PROXY_SET_VSS_REPLY); +} + + +static void vl_api_dhcp_proxy_config_t_handler + (vl_api_dhcp_proxy_config_t * mp) +{ + if (mp->is_ipv6 == 0) + dhcpv4_proxy_config (mp); + else + dhcpv6_proxy_config (mp); +} + +void +dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, + u8 is_ipv6, u8 * host_address, u8 * router_address, + u8 * host_mac) +{ + unix_shared_memory_queue_t *q; + vl_api_dhcp_compl_event_t *mp; + + q = vl_api_client_index_to_input_queue (client_index); + if (!q) + return; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->client_index = client_index; + mp->pid = pid; + mp->is_ipv6 = is_ipv6; + clib_memcpy (&mp->hostname, hostname, vec_len (hostname)); + mp->hostname[vec_len (hostname) + 1] = '\n'; + clib_memcpy (&mp->host_address[0], host_address, 16); + clib_memcpy (&mp->router_address[0], router_address, 16); + + if (NULL != host_mac) + clib_memcpy (&mp->host_mac[0], host_mac, 6); + + mp->_vl_msg_id = ntohs (VL_API_DHCP_COMPL_EVENT); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void vl_api_dhcp_proxy_config_2_t_handler + (vl_api_dhcp_proxy_config_2_t * mp) +{ + if (mp->is_ipv6 == 0) + dhcpv4_proxy_config_2 (mp); + else + dhcpv6_proxy_config_2 (mp); +} + +static void vl_api_dhcp_client_config_t_handler + (vl_api_dhcp_client_config_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_dhcp_client_config_reply_t *rmp; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + rv = dhcp_client_config (vm, ntohl (mp->sw_if_index), + mp->hostname, mp->is_add, mp->client_index, + mp->want_dhcp_event ? dhcp_compl_event_callback : + NULL, mp->pid); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_DHCP_CLIENT_CONFIG_REPLY); +} + +/* + * dhcp_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_dhcp; +#undef _ +} + +static clib_error_t * +dhcp_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (dhcp_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index d76eee5a..4ba3a0e3 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -54,6 +54,7 @@ #include #include #include +#include /* * fd.io coding-style-patch-verification: ON diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 9f6f260b..6317f557 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -52,12 +52,9 @@ #include #include #include -#include -#include #if WITH_LIBSSL > 0 #include #endif -#include #include #include #include @@ -124,10 +121,6 @@ _(PROXY_ARP_ADD_DEL, proxy_arp_add_del) \ _(PROXY_ARP_INTFC_ENABLE_DISABLE, proxy_arp_intfc_enable_disable) \ _(VNET_GET_SUMMARY_STATS, vnet_get_summary_stats) \ _(RESET_FIB, reset_fib) \ -_(DHCP_PROXY_CONFIG,dhcp_proxy_config) \ -_(DHCP_PROXY_CONFIG_2,dhcp_proxy_config_2) \ -_(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss) \ -_(DHCP_CLIENT_CONFIG, dhcp_client_config) \ _(CREATE_LOOPBACK, create_loopback) \ _(CONTROL_PING, control_ping) \ _(CLI_REQUEST, cli_request) \ @@ -1064,157 +1057,6 @@ vl_api_reset_fib_t_handler (vl_api_reset_fib_t * mp) REPLY_MACRO (VL_API_RESET_FIB_REPLY); } - -static void -dhcpv4_proxy_config (vl_api_dhcp_proxy_config_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv; - - rv = dhcp_proxy_set_server ((ip4_address_t *) (&mp->dhcp_server), - (ip4_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->vrf_id), - (int) mp->insert_circuit_id, - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); -} - - -static void -dhcpv6_proxy_config (vl_api_dhcp_proxy_config_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv = -1; - - rv = dhcpv6_proxy_set_server ((ip6_address_t *) (&mp->dhcp_server), - (ip6_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->vrf_id), - (int) mp->insert_circuit_id, - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); -} - -static void -dhcpv4_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv; - - rv = dhcp_proxy_set_server_2 ((ip4_address_t *) (&mp->dhcp_server), - (ip4_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->rx_vrf_id), - (u32) ntohl (mp->server_vrf_id), - (int) mp->insert_circuit_id, - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY); -} - - -static void -dhcpv6_proxy_config_2 (vl_api_dhcp_proxy_config_2_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv = -1; - - rv = dhcpv6_proxy_set_server_2 ((ip6_address_t *) (&mp->dhcp_server), - (ip6_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->rx_vrf_id), - (u32) ntohl (mp->server_vrf_id), - (int) mp->insert_circuit_id, - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_2_REPLY); -} - - -static void -vl_api_dhcp_proxy_set_vss_t_handler (vl_api_dhcp_proxy_set_vss_t * mp) -{ - vl_api_dhcp_proxy_set_vss_reply_t *rmp; - int rv; - if (!mp->is_ipv6) - rv = dhcp_proxy_set_option82_vss (ntohl (mp->tbl_id), - ntohl (mp->oui), - ntohl (mp->fib_id), - (int) mp->is_add == 0); - else - rv = dhcpv6_proxy_set_vss (ntohl (mp->tbl_id), - ntohl (mp->oui), - ntohl (mp->fib_id), (int) mp->is_add == 0); - - REPLY_MACRO (VL_API_DHCP_PROXY_SET_VSS_REPLY); -} - - -static void vl_api_dhcp_proxy_config_t_handler - (vl_api_dhcp_proxy_config_t * mp) -{ - if (mp->is_ipv6 == 0) - dhcpv4_proxy_config (mp); - else - dhcpv6_proxy_config (mp); -} - -static void vl_api_dhcp_proxy_config_2_t_handler - (vl_api_dhcp_proxy_config_2_t * mp) -{ - if (mp->is_ipv6 == 0) - dhcpv4_proxy_config_2 (mp); - else - dhcpv6_proxy_config_2 (mp); -} - -void -dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, - u8 is_ipv6, u8 * host_address, u8 * router_address, - u8 * host_mac) -{ - unix_shared_memory_queue_t *q; - vl_api_dhcp_compl_event_t *mp; - - q = vl_api_client_index_to_input_queue (client_index); - if (!q) - return; - - mp = vl_msg_api_alloc (sizeof (*mp)); - mp->client_index = client_index; - mp->pid = pid; - mp->is_ipv6 = is_ipv6; - clib_memcpy (&mp->hostname, hostname, vec_len (hostname)); - mp->hostname[vec_len (hostname) + 1] = '\n'; - clib_memcpy (&mp->host_address[0], host_address, 16); - clib_memcpy (&mp->router_address[0], router_address, 16); - - if (NULL != host_mac) - clib_memcpy (&mp->host_mac[0], host_mac, 6); - - mp->_vl_msg_id = ntohs (VL_API_DHCP_COMPL_EVENT); - - vl_msg_api_send_shmem (q, (u8 *) & mp); -} - -static void vl_api_dhcp_client_config_t_handler - (vl_api_dhcp_client_config_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - vl_api_dhcp_client_config_reply_t *rmp; - int rv = 0; - - VALIDATE_SW_IF_INDEX (mp); - - rv = dhcp_client_config (vm, ntohl (mp->sw_if_index), - mp->hostname, mp->is_add, mp->client_index, - mp->want_dhcp_event ? dhcp_compl_event_callback : - NULL, mp->pid); - - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_DHCP_CLIENT_CONFIG_REPLY); -} - static void vl_api_create_loopback_t_handler (vl_api_create_loopback_t * mp) { diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index a00033c5..3a35a54a 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -43,6 +43,7 @@ * DPDK APIs: see ... /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} * CLASSIFY APIs: see ... /src/vnet/classify/{classify.api, classify_api.c} * FLOW APIs: see ... /src/vnet/flow/{flow.api, flow_api.c} + * DHCP APIs: see ... /src/vnet/dhcp/{dhcpk.api, dhcp_api.c} */ /** \brief Create a new subinterface with the given vlan id @@ -398,68 +399,6 @@ define reset_fib_reply i32 retval; }; -/** \brief DHCP Proxy config add / del request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param vrf_id - vrf id - @param if_ipv6 - ipv6 if non-zero, else ipv4 - @param is_add - add the config if non-zero, else delete - @param insert_circuit_id - option82 suboption 1 fib number - @param dhcp_server[] - server address - @param dhcp_src_address[] - -*/ -define dhcp_proxy_config -{ - u32 client_index; - u32 context; - u32 vrf_id; - u8 is_ipv6; - u8 is_add; - u8 insert_circuit_id; - u8 dhcp_server[16]; - u8 dhcp_src_address[16]; -}; - -/** \brief DHCP Proxy config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_proxy_config_reply -{ - u32 context; - i32 retval; -}; - -/** \brief DHCP Proxy set / unset vss request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param tbl_id - table id - @param oui - first part of vpn id - @param fib_id - second part of vpn id - @param is_ipv6 - ip6 if non-zero, else ip4 - @param is_add - set vss if non-zero, else delete -*/ -define dhcp_proxy_set_vss -{ - u32 client_index; - u32 context; - u32 tbl_id; - u32 oui; - u32 fib_id; - u8 is_ipv6; - u8 is_add; -}; - -/** \brief DHCP proxy set / unset vss response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_proxy_set_vss_reply -{ - u32 context; - i32 retval; -}; - /** \brief Create loopback interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -834,40 +773,6 @@ define add_node_next_reply u32 next_index; }; -/** \brief DHCP Proxy config 2 add / del request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param rx_vrf_id - receive vrf id - @param server_vrf_id - server vrf id - @param if_ipv6 - ipv6 if non-zero, else ipv4 - @param is_add - add the config if non-zero, else delete - @param insert_circuit_id - option82 suboption 1 fib number - @param dhcp_server[] - server address - @param dhcp_src_address[] - -*/ -define dhcp_proxy_config_2 -{ - u32 client_index; - u32 context; - u32 rx_vrf_id; - u32 server_vrf_id; - u8 is_ipv6; - u8 is_add; - u8 insert_circuit_id; - u8 dhcp_server[16]; - u8 dhcp_src_address[16]; -}; - -/** \brief DHCP Proxy config 2 add / del response - @param context - sender context, to match reply w/ request - @param retval - return code for request -*/ -define dhcp_proxy_config_2_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 interface ethernet flow point filtering enable/disable request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1083,37 +988,6 @@ define ip6_nd_event u8 mac_ip; }; -/** \brief DHCP Client config add / del request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - index of the interface for DHCP client - @param hostname - hostname - @param is_add - add the config if non-zero, else delete - @param want_dhcp_event - DHCP event sent to the sender - via dhcp_compl_event API message if non-zero - @param pid - sender's pid -*/ -define dhcp_client_config -{ - u32 client_index; - u32 context; - u32 sw_if_index; - u8 hostname[64]; - u8 is_add; - u8 want_dhcp_event; - u32 pid; -}; - -/** \brief DHCP Client config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_client_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset input ACL interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1146,25 +1020,6 @@ define input_acl_set_interface_reply i32 retval; }; -/** \brief Tell client about a DHCP completion event - @param client_index - opaque cookie to identify the sender - @param pid - client pid registered to receive notification - @param is_ipv6 - if non-zero the address is ipv6, else ipv4 - @param host_address - Host IP address - @param router_address - Router IP address - @param host_mac - Host MAC address -*/ -define dhcp_compl_event -{ - u32 client_index; - u32 pid; - u8 hostname[64]; - u8 is_ipv6; - u8 host_address[16]; - u8 router_address[16]; - u8 host_mac[6]; -}; - /** \brief cop: enable/disable junk filtration features on an interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request -- cgit 1.2.3-korg From adec5878d3ac8399a3202728b0962a350939e7d9 Mon Sep 17 00:00:00 2001 From: Pavel Kotucek Date: Wed, 25 Jan 2017 08:50:53 +0100 Subject: API refactoring : l2 (add) Change-Id: I693a73ba9a5e3b0cb5d2a6c5d363f671e19c1f24 Signed-off-by: Pavel Kotucek --- src/vnet/l2/l2.api | 65 ++++++++++++++++++++++++++++++++++++++++ src/vnet/l2/l2_api.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++- src/vpp/api/api.c | 82 --------------------------------------------------- src/vpp/api/vpe.api | 65 ---------------------------------------- 4 files changed, 147 insertions(+), 148 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index 5b24f259..061990c0 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -260,6 +260,71 @@ define bridge_flags_reply u32 resulting_feature_bitmap; }; +/** \brief L2 interface vlan tag rewrite configure request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface the operation is applied to + @param vtr_op - Choose from l2_vtr_op_t enum values + @param push_dot1q - first pushed flag dot1q id set, else dot1ad + @param tag1 - Needed for any push or translate vtr op + @param tag2 - Needed for any push 2 or translate x-2 vtr ops +*/ +define l2_interface_vlan_tag_rewrite +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 vtr_op; + u32 push_dot1q; // ethertype of first pushed tag is dot1q/dot1ad + u32 tag1; // first pushed tag + u32 tag2; // second pushed tag +}; + +/** \brief L2 interface vlan tag rewrite response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define l2_interface_vlan_tag_rewrite_reply +{ + u32 context; + i32 retval; +}; + +/** \brief L2 interface pbb tag rewrite configure request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface the operation is applied to + @param vtr_op - Choose from l2_vtr_op_t enum values + @param inner_tag - needed for translate_qinq vtr op only + @param outer_tag - needed for translate_qinq vtr op only + @param b_dmac - B-tag remote mac address, needed for any push or translate_qinq vtr op + @param b_smac - B-tag local mac address, needed for any push or translate qinq vtr op + @param b_vlanid - B-tag vlanid, needed for any push or translate qinq vtr op + @param i_sid - I-tag service id, needed for any push or translate qinq vtr op +*/ +define l2_interface_pbb_tag_rewrite +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 vtr_op; + u16 outer_tag; + u8 b_dmac[6]; + u8 b_smac[6]; + u16 b_vlanid; + u32 i_sid; +}; + +/** \brief L2 interface pbb tag rewrite response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define l2_interface_pbb_tag_rewrite_reply +{ + u32 context; + i32 retval; +}; + /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index ef33509c..a3cc49bf 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -54,7 +55,9 @@ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ _(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ _(BRIDGE_DOMAIN_SW_IF_DETAILS, bridge_domain_sw_if_details) \ -_(BRIDGE_FLAGS, bridge_flags) +_(BRIDGE_FLAGS, bridge_flags) \ +_(L2_INTERFACE_VLAN_TAG_REWRITE, l2_interface_vlan_tag_rewrite) \ +_(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) static void send_l2_xconnect_details (unix_shared_memory_queue_t * q, u32 context, @@ -454,6 +457,84 @@ out: /* *INDENT-ON* */ } +static void + vl_api_l2_interface_vlan_tag_rewrite_t_handler + (vl_api_l2_interface_vlan_tag_rewrite_t * mp) +{ + int rv = 0; + vl_api_l2_interface_vlan_tag_rewrite_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + u32 vtr_op; + + VALIDATE_SW_IF_INDEX (mp); + + vtr_op = ntohl (mp->vtr_op); + + /* The L2 code is unsuspicious */ + switch (vtr_op) + { + case L2_VTR_DISABLED: + case L2_VTR_PUSH_1: + case L2_VTR_PUSH_2: + case L2_VTR_POP_1: + case L2_VTR_POP_2: + case L2_VTR_TRANSLATE_1_1: + case L2_VTR_TRANSLATE_1_2: + case L2_VTR_TRANSLATE_2_1: + case L2_VTR_TRANSLATE_2_2: + break; + + default: + rv = VNET_API_ERROR_INVALID_VALUE; + goto bad_sw_if_index; + } + + rv = l2vtr_configure (vm, vnm, ntohl (mp->sw_if_index), vtr_op, + ntohl (mp->push_dot1q), ntohl (mp->tag1), + ntohl (mp->tag2)); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_L2_INTERFACE_VLAN_TAG_REWRITE_REPLY); +} + +static void + vl_api_l2_interface_pbb_tag_rewrite_t_handler + (vl_api_l2_interface_pbb_tag_rewrite_t * mp) +{ + vl_api_l2_interface_pbb_tag_rewrite_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + u32 vtr_op; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + vtr_op = ntohl (mp->vtr_op); + + switch (vtr_op) + { + case L2_VTR_DISABLED: + case L2_VTR_PUSH_2: + case L2_VTR_POP_2: + case L2_VTR_TRANSLATE_2_1: + break; + + default: + rv = VNET_API_ERROR_INVALID_VALUE; + goto bad_sw_if_index; + } + + rv = l2pbb_configure (vm, vnm, ntohl (mp->sw_if_index), vtr_op, + mp->b_dmac, mp->b_smac, ntohs (mp->b_vlanid), + ntohl (mp->i_sid), ntohs (mp->outer_tag)); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_L2_INTERFACE_PBB_TAG_REWRITE_REPLY); +} + /* * l2_api_hookup * Add vpe's API message handlers to the table. diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 6317f557..e6227a68 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -62,7 +62,6 @@ #include #include #include -#include #include #include #include @@ -134,7 +133,6 @@ _(ADD_NODE_NEXT, add_node_next) \ _(VXLAN_ADD_DEL_TUNNEL, vxlan_add_del_tunnel) \ _(VXLAN_TUNNEL_DUMP, vxlan_tunnel_dump) \ _(L2_INTERFACE_EFP_FILTER, l2_interface_efp_filter) \ -_(L2_INTERFACE_VLAN_TAG_REWRITE, l2_interface_vlan_tag_rewrite) \ _(SHOW_VERSION, show_version) \ _(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \ _(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \ @@ -160,7 +158,6 @@ _(IP_SOURCE_AND_PORT_RANGE_CHECK_ADD_DEL, \ _(IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL, \ ip_source_and_port_range_check_interface_add_del) \ _(DELETE_SUBIF, delete_subif) \ -_(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) \ _(PUNT, punt) \ _(FEATURE_ENABLE_DISABLE, feature_enable_disable) @@ -1316,48 +1313,6 @@ vl_api_l2_interface_efp_filter_t_handler (vl_api_l2_interface_efp_filter_t * REPLY_MACRO (VL_API_L2_INTERFACE_EFP_FILTER_REPLY); } -static void - vl_api_l2_interface_vlan_tag_rewrite_t_handler - (vl_api_l2_interface_vlan_tag_rewrite_t * mp) -{ - int rv = 0; - vl_api_l2_interface_vlan_tag_rewrite_reply_t *rmp; - vnet_main_t *vnm = vnet_get_main (); - vlib_main_t *vm = vlib_get_main (); - u32 vtr_op; - - VALIDATE_SW_IF_INDEX (mp); - - vtr_op = ntohl (mp->vtr_op); - - /* The L2 code is unsuspicious */ - switch (vtr_op) - { - case L2_VTR_DISABLED: - case L2_VTR_PUSH_1: - case L2_VTR_PUSH_2: - case L2_VTR_POP_1: - case L2_VTR_POP_2: - case L2_VTR_TRANSLATE_1_1: - case L2_VTR_TRANSLATE_1_2: - case L2_VTR_TRANSLATE_2_1: - case L2_VTR_TRANSLATE_2_2: - break; - - default: - rv = VNET_API_ERROR_INVALID_VALUE; - goto bad_sw_if_index; - } - - rv = l2vtr_configure (vm, vnm, ntohl (mp->sw_if_index), vtr_op, - ntohl (mp->push_dot1q), ntohl (mp->tag1), - ntohl (mp->tag2)); - - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_L2_INTERFACE_VLAN_TAG_REWRITE_REPLY); -} - static void vl_api_show_version_t_handler (vl_api_show_version_t * mp) { @@ -2497,43 +2452,6 @@ vl_api_delete_subif_t_handler (vl_api_delete_subif_t * mp) REPLY_MACRO (VL_API_DELETE_SUBIF_REPLY); } -static void - vl_api_l2_interface_pbb_tag_rewrite_t_handler - (vl_api_l2_interface_pbb_tag_rewrite_t * mp) -{ - vl_api_l2_interface_pbb_tag_rewrite_reply_t *rmp; - vnet_main_t *vnm = vnet_get_main (); - vlib_main_t *vm = vlib_get_main (); - u32 vtr_op; - int rv = 0; - - VALIDATE_SW_IF_INDEX (mp); - - vtr_op = ntohl (mp->vtr_op); - - switch (vtr_op) - { - case L2_VTR_DISABLED: - case L2_VTR_PUSH_2: - case L2_VTR_POP_2: - case L2_VTR_TRANSLATE_2_1: - break; - - default: - rv = VNET_API_ERROR_INVALID_VALUE; - goto bad_sw_if_index; - } - - rv = l2pbb_configure (vm, vnm, ntohl (mp->sw_if_index), vtr_op, - mp->b_dmac, mp->b_smac, ntohs (mp->b_vlanid), - ntohl (mp->i_sid), ntohs (mp->outer_tag)); - - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_L2_INTERFACE_PBB_TAG_REWRITE_REPLY); - -} - static void vl_api_punt_t_handler (vl_api_punt_t * mp) { diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 3a35a54a..981ae25e 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -797,36 +797,6 @@ define l2_interface_efp_filter_reply i32 retval; }; -/** \brief L2 interface vlan tag rewrite configure request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - interface the operation is applied to - @param vtr_op - Choose from l2_vtr_op_t enum values - @param push_dot1q - first pushed flag dot1q id set, else dot1ad - @param tag1 - Needed for any push or translate vtr op - @param tag2 - Needed for any push 2 or translate x-2 vtr ops -*/ -define l2_interface_vlan_tag_rewrite -{ - u32 client_index; - u32 context; - u32 sw_if_index; - u32 vtr_op; - u32 push_dot1q; // ethertype of first pushed tag is dot1q/dot1ad - u32 tag1; // first pushed tag - u32 tag2; // second pushed tag -}; - -/** \brief L2 interface vlan tag rewrite response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_vlan_tag_rewrite_reply -{ - u32 context; - i32 retval; -}; - define create_subif { u32 client_index; @@ -1475,41 +1445,6 @@ define delete_subif_reply { i32 retval; }; -/** \brief L2 interface pbb tag rewrite configure request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - interface the operation is applied to - @param vtr_op - Choose from l2_vtr_op_t enum values - @param inner_tag - needed for translate_qinq vtr op only - @param outer_tag - needed for translate_qinq vtr op only - @param b_dmac - B-tag remote mac address, needed for any push or translate_qinq vtr op - @param b_smac - B-tag local mac address, needed for any push or translate qinq vtr op - @param b_vlanid - B-tag vlanid, needed for any push or translate qinq vtr op - @param i_sid - I-tag service id, needed for any push or translate qinq vtr op -*/ -define l2_interface_pbb_tag_rewrite -{ - u32 client_index; - u32 context; - u32 sw_if_index; - u32 vtr_op; - u16 outer_tag; - u8 b_dmac[6]; - u8 b_smac[6]; - u16 b_vlanid; - u32 i_sid; -}; - -/** \brief L2 interface pbb tag rewrite response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_pbb_tag_rewrite_reply -{ - u32 context; - i32 retval; -}; - /** \brief Punt traffic to the host @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request -- cgit 1.2.3-korg From 32e1c010b0c34fd0984f7fc45fae648a182025c5 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 22 Nov 2016 17:07:28 +0000 Subject: IP Multicast FIB (mfib) - IPv[46] mfib tables with support for (*,G/m), (*,G) and (S,G) exact and longest prefix match - Replication represented via a new replicate DPO. - RPF configuration and data-plane checking - data-plane signals sent to listening control planes. The functions of multicast forwarding entries differ from their unicast conterparts, so we introduce a new mfib_table_t and mfib_entry_t objects. However, we re-use the fib_path_list to resolve and build the entry's output list. the fib_path_list provides the service to construct a replicate DPO for multicast. 'make tests' is added to with two new suites; TEST=mfib, this is invocation of the CLI command 'test mfib' which deals with many path add/remove, flag set/unset scenarios, TEST=ip-mcast, data-plane forwarding tests. Updated applications to use the new MIFB functions; - IPv6 NS/RA. - DHCPv6 unit tests for these are undated accordingly. Change-Id: I49ec37b01f1b170335a5697541c8fd30e6d3a961 Signed-off-by: Neale Ranns --- src/scripts/vnet/mcast/ip4 | 22 + src/vat/api_format.c | 173 +++++ src/vnet.am | 35 +- src/vnet/adj/adj.c | 11 +- src/vnet/adj/adj.h | 1 - src/vnet/adj/adj_internal.h | 2 + src/vnet/adj/adj_mcast.c | 346 ++++++++++ src/vnet/adj/adj_mcast.h | 78 +++ src/vnet/adj/adj_nbr.c | 2 +- src/vnet/adj/adj_rewrite.c | 53 -- src/vnet/adj/adj_rewrite.h | 49 -- src/vnet/dhcpv6/proxy_node.c | 46 +- src/vnet/dpo/dpo.c | 2 + src/vnet/dpo/dpo.h | 8 +- src/vnet/dpo/load_balance.c | 13 +- src/vnet/dpo/load_balance.h | 8 + src/vnet/dpo/replicate_dpo.c | 759 ++++++++++++++++++++++ src/vnet/dpo/replicate_dpo.h | 143 +++++ src/vnet/ethernet/arp.c | 84 ++- src/vnet/ethernet/ethernet.h | 2 + src/vnet/ethernet/interface.c | 20 + src/vnet/fib/fib_attached_export.c | 4 +- src/vnet/fib/fib_entry.h | 2 +- src/vnet/fib/fib_entry_delegate.c | 3 + src/vnet/fib/fib_entry_src.c | 4 + src/vnet/fib/fib_node.h | 2 + src/vnet/fib/fib_path.c | 91 ++- src/vnet/fib/fib_path_list.c | 9 +- src/vnet/fib/fib_path_list.h | 6 + src/vnet/fib/fib_table.c | 69 +- src/vnet/fib/fib_table.h | 16 + src/vnet/fib/fib_test.c | 207 +++--- src/vnet/fib/fib_types.c | 4 + src/vnet/fib/fib_types.h | 14 + src/vnet/fib/fib_urpf_list.c | 20 +- src/vnet/fib/ip4_fib.c | 50 +- src/vnet/fib/ip4_fib.h | 9 + src/vnet/fib/ip6_fib.c | 117 ++-- src/vnet/fib/ip6_fib.h | 11 +- src/vnet/fib/mpls_fib.c | 17 +- src/vnet/fib/mpls_fib.h | 9 + src/vnet/ip/ip.api | 53 ++ src/vnet/ip/ip4.h | 24 + src/vnet/ip/ip4_forward.c | 498 +++++---------- src/vnet/ip/ip4_input.c | 4 +- src/vnet/ip/ip6.h | 27 + src/vnet/ip/ip6_forward.c | 83 ++- src/vnet/ip/ip6_input.c | 43 +- src/vnet/ip/ip6_neighbor.c | 134 ++-- src/vnet/ip/ip_api.c | 210 +++++++ src/vnet/ip/lookup.c | 171 +++++ src/vnet/ip/lookup.h | 82 +-- src/vnet/mcast/mcast.c | 565 ----------------- src/vnet/mcast/mcast.h | 50 -- src/vnet/mcast/mcast_test.c | 149 ----- src/vnet/mfib/ip4_mfib.c | 465 ++++++++++++++ src/vnet/mfib/ip4_mfib.h | 95 +++ src/vnet/mfib/ip6_mfib.c | 663 +++++++++++++++++++ src/vnet/mfib/ip6_mfib.h | 109 ++++ src/vnet/mfib/mfib_entry.c | 1096 ++++++++++++++++++++++++++++++++ src/vnet/mfib/mfib_entry.h | 172 +++++ src/vnet/mfib/mfib_forward.c | 512 +++++++++++++++ src/vnet/mfib/mfib_itf.c | 119 ++++ src/vnet/mfib/mfib_itf.h | 63 ++ src/vnet/mfib/mfib_signal.c | 201 ++++++ src/vnet/mfib/mfib_signal.h | 59 ++ src/vnet/mfib/mfib_table.c | 489 ++++++++++++++ src/vnet/mfib/mfib_table.h | 331 ++++++++++ src/vnet/mfib/mfib_test.c | 1225 ++++++++++++++++++++++++++++++++++++ src/vnet/mfib/mfib_types.c | 213 +++++++ src/vnet/mfib/mfib_types.h | 185 ++++++ src/vnet/misc.c | 3 + src/vnet/rewrite.h | 31 + src/vnet/sr/sr.c | 4 +- src/vnet/util/radix.c | 1104 ++++++++++++++++++++++++++++++++ src/vnet/util/radix.h | 147 +++++ src/vnet/vxlan/vxlan.c | 112 +++- src/vpp/api/api.c | 14 +- src/vppinfra.am | 2 +- src/vppinfra/dlist.h | 2 +- src/vppinfra/format.c | 8 +- src/vppinfra/format.h | 4 +- src/vppinfra/unformat.c | 16 +- test/test_dhcp.py | 16 - test/test_ip6.py | 131 ++-- test/test_ip_mcast.py | 612 ++++++++++++++++++ test/test_mfib.py | 23 + test/vpp_interface.py | 3 +- test/vpp_ip_route.py | 101 ++- test/vpp_papi_provider.py | 34 +- 90 files changed, 11211 insertions(+), 1767 deletions(-) create mode 100644 src/scripts/vnet/mcast/ip4 create mode 100644 src/vnet/adj/adj_mcast.c create mode 100644 src/vnet/adj/adj_mcast.h delete mode 100644 src/vnet/adj/adj_rewrite.c delete mode 100644 src/vnet/adj/adj_rewrite.h create mode 100644 src/vnet/dpo/replicate_dpo.c create mode 100644 src/vnet/dpo/replicate_dpo.h delete mode 100644 src/vnet/mcast/mcast.c delete mode 100644 src/vnet/mcast/mcast.h delete mode 100644 src/vnet/mcast/mcast_test.c create mode 100644 src/vnet/mfib/ip4_mfib.c create mode 100644 src/vnet/mfib/ip4_mfib.h create mode 100644 src/vnet/mfib/ip6_mfib.c create mode 100644 src/vnet/mfib/ip6_mfib.h create mode 100644 src/vnet/mfib/mfib_entry.c create mode 100644 src/vnet/mfib/mfib_entry.h create mode 100644 src/vnet/mfib/mfib_forward.c create mode 100644 src/vnet/mfib/mfib_itf.c create mode 100644 src/vnet/mfib/mfib_itf.h create mode 100644 src/vnet/mfib/mfib_signal.c create mode 100644 src/vnet/mfib/mfib_signal.h create mode 100644 src/vnet/mfib/mfib_table.c create mode 100644 src/vnet/mfib/mfib_table.h create mode 100644 src/vnet/mfib/mfib_test.c create mode 100644 src/vnet/mfib/mfib_types.c create mode 100644 src/vnet/mfib/mfib_types.h create mode 100644 src/vnet/util/radix.c create mode 100644 src/vnet/util/radix.h create mode 100644 test/test_ip_mcast.py create mode 100644 test/test_mfib.py (limited to 'src/vpp/api') diff --git a/src/scripts/vnet/mcast/ip4 b/src/scripts/vnet/mcast/ip4 new file mode 100644 index 00000000..69f1ee00 --- /dev/null +++ b/src/scripts/vnet/mcast/ip4 @@ -0,0 +1,22 @@ +packet-generator new { + name x + limit 1 + node ip4-input + size 64-64 + no-recycle + data { + ICMP: 1.0.0.2 -> 232.1.1.1 + ICMP echo_request + incrementing 100 + } +} + +trace add pg-input 100 +loop create +loop create +set int state loop0 up +set int state loop1 up + +ip mroute add 232.1.1.1 via pg0 Accept +ip mroute add 232.1.1.1 via loop0 Forward +ip mroute add 232.1.1.1 via loop1 Forward diff --git a/src/vat/api_format.c b/src/vat/api_format.c index b83313de..4cfe4a58 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "vat/json_format.h" @@ -505,6 +506,53 @@ unformat_flow_classify_table_type (unformat_input_t * input, va_list * va) return 1; } +static const char *mfib_flag_names[] = MFIB_ENTRY_NAMES_SHORT; +static const char *mfib_flag_long_names[] = MFIB_ENTRY_NAMES_LONG; +static const char *mfib_itf_flag_long_names[] = MFIB_ITF_NAMES_LONG; +static const char *mfib_itf_flag_names[] = MFIB_ITF_NAMES_SHORT; + +uword +unformat_mfib_itf_flags (unformat_input_t * input, va_list * args) +{ + mfib_itf_flags_t old, *iflags = va_arg (*args, mfib_itf_flags_t *); + mfib_itf_attribute_t attr; + + old = *iflags; + FOR_EACH_MFIB_ITF_ATTRIBUTE (attr) + { + if (unformat (input, mfib_itf_flag_long_names[attr])) + *iflags |= (1 << attr); + } + FOR_EACH_MFIB_ITF_ATTRIBUTE (attr) + { + if (unformat (input, mfib_itf_flag_names[attr])) + *iflags |= (1 << attr); + } + + return (old == *iflags ? 0 : 1); +} + +uword +unformat_mfib_entry_flags (unformat_input_t * input, va_list * args) +{ + mfib_entry_flags_t old, *eflags = va_arg (*args, mfib_entry_flags_t *); + mfib_entry_attribute_t attr; + + old = *eflags; + FOR_EACH_MFIB_ATTRIBUTE (attr) + { + if (unformat (input, mfib_flag_long_names[attr])) + *eflags |= (1 << attr); + } + FOR_EACH_MFIB_ATTRIBUTE (attr) + { + if (unformat (input, mfib_flag_names[attr])) + *eflags |= (1 << attr); + } + + return (old == *eflags ? 0 : 1); +} + #if (VPP_API_TEST_BUILTIN==0) u8 * format_ip4_address (u8 * s, va_list * args) @@ -3592,6 +3640,7 @@ _(bridge_domain_add_del_reply) \ _(sw_interface_set_l2_xconnect_reply) \ _(l2fib_add_del_reply) \ _(ip_add_del_route_reply) \ +_(ip_mroute_add_del_reply) \ _(mpls_route_add_del_reply) \ _(mpls_ip_bind_unbind_reply) \ _(proxy_arp_add_del_reply) \ @@ -3792,6 +3841,7 @@ _(TAP_MODIFY_REPLY, tap_modify_reply) \ _(TAP_DELETE_REPLY, tap_delete_reply) \ _(SW_INTERFACE_TAP_DETAILS, sw_interface_tap_details) \ _(IP_ADD_DEL_ROUTE_REPLY, ip_add_del_route_reply) \ +_(IP_MROUTE_ADD_DEL_REPLY, ip_mroute_add_del_reply) \ _(MPLS_ROUTE_ADD_DEL_REPLY, mpls_route_add_del_reply) \ _(MPLS_IP_BIND_UNBIND_REPLY, mpls_ip_bind_unbind_reply) \ _(PROXY_ARP_ADD_DEL_REPLY, proxy_arp_add_del_reply) \ @@ -6383,6 +6433,126 @@ api_ip_add_del_route (vat_main_t * vam) return (vam->retval); } +static int +api_ip_mroute_add_del (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_ip_mroute_add_del_t *mp; + f64 timeout; + u32 sw_if_index = ~0, vrf_id = 0; + u8 is_ipv6 = 0; + u8 is_local = 0; + u8 create_vrf_if_needed = 0; + u8 is_add = 1; + u8 address_set = 0; + u32 grp_address_length = 0; + ip4_address_t v4_grp_address, v4_src_address; + ip6_address_t v6_grp_address, v6_src_address; + mfib_itf_flags_t iflags = 0; + mfib_entry_flags_t eflags = 0; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (i, "%U %U", + unformat_ip4_address, &v4_src_address, + unformat_ip4_address, &v4_grp_address)) + { + grp_address_length = 64; + address_set = 1; + is_ipv6 = 0; + } + else if (unformat (i, "%U %U", + unformat_ip6_address, &v6_src_address, + unformat_ip6_address, &v6_grp_address)) + { + grp_address_length = 256; + address_set = 1; + is_ipv6 = 1; + } + else if (unformat (i, "%U", unformat_ip4_address, &v4_grp_address)) + { + memset (&v4_src_address, 0, sizeof (v4_src_address)); + grp_address_length = 32; + address_set = 1; + is_ipv6 = 0; + } + else if (unformat (i, "%U", unformat_ip6_address, &v6_grp_address)) + { + memset (&v6_src_address, 0, sizeof (v6_src_address)); + grp_address_length = 128; + address_set = 1; + is_ipv6 = 1; + } + else if (unformat (i, "/%d", &grp_address_length)) + ; + else if (unformat (i, "local")) + { + is_local = 1; + } + else if (unformat (i, "del")) + is_add = 0; + else if (unformat (i, "add")) + is_add = 1; + else if (unformat (i, "vrf %d", &vrf_id)) + ; + else if (unformat (i, "create-vrf")) + create_vrf_if_needed = 1; + else if (unformat (i, "%U", unformat_mfib_itf_flags, &iflags)) + ; + else if (unformat (i, "%U", unformat_mfib_entry_flags, &eflags)) + ; + else + { + clib_warning ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + if (address_set == 0) + { + errmsg ("missing addresses\n"); + return -99; + } + + /* Construct the API message */ + M (IP_MROUTE_ADD_DEL, ip_mroute_add_del); + + mp->next_hop_sw_if_index = ntohl (sw_if_index); + mp->table_id = ntohl (vrf_id); + mp->create_vrf_if_needed = create_vrf_if_needed; + + mp->is_add = is_add; + mp->is_ipv6 = is_ipv6; + mp->is_local = is_local; + mp->itf_flags = ntohl (iflags); + mp->entry_flags = ntohl (eflags); + mp->grp_address_length = grp_address_length; + mp->grp_address_length = ntohs (mp->grp_address_length); + + if (is_ipv6) + { + clib_memcpy (mp->grp_address, &v6_grp_address, sizeof (v6_grp_address)); + clib_memcpy (mp->src_address, &v6_src_address, sizeof (v6_src_address)); + } + else + { + clib_memcpy (mp->grp_address, &v4_grp_address, sizeof (v4_grp_address)); + clib_memcpy (mp->src_address, &v4_src_address, sizeof (v4_src_address)); + + } + + /* send it... */ + S; + /* Wait for a reply... */ + W; + + /* Return the good/bad news */ + return (vam->retval); +} + static int api_mpls_route_add_del (vat_main_t * vam) { @@ -17512,6 +17682,9 @@ _(ip_add_del_route, \ "[ | sw_if_index ] [resolve-attempts ]\n" \ "[weight ] [drop] [local] [classify ] [del]\n" \ "[multipath] [count ]") \ +_(ip_mroute_add_del, \ + " / [table-id ]\n" \ + "[ | sw_if_index ] [local] [del]") \ _(mpls_route_add_del, \ "
oui vpn-id ", + .function = dhcp_option_82_vss_fn, +}; + +static clib_error_t * +dhcp_vss_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + dhcp_vss_walk(FIB_PROTOCOL_IP4, dhcp_vss_show_walk, vm); + + return (NULL); +} + +VLIB_CLI_COMMAND (dhcp_proxy_vss_show_command, static) = { + .path = "show dhcp vss", + .short_help = "show dhcp VSS", + .function = dhcp_vss_show_command_fn, +}; + +static clib_error_t * +dhcp_option_82_address_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + vnet_main_t *vnm = vnet_get_main(); + u32 sw_if_index0=0, sw_if_index; + vnet_sw_interface_t *swif; + ip4_address_t *ia0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat(input, "%U", + unformat_vnet_sw_interface, vnm, &sw_if_index0)) + { + swif = vnet_get_sw_interface (vnm, sw_if_index0); + sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ? + swif->unnumbered_sw_if_index : sw_if_index0; + ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0); + if (ia0) + { + vlib_cli_output (vm, "%=20s%=20s", "interface", + "source IP address"); + + vlib_cli_output (vm, "%=20U%=20U", + format_vnet_sw_if_index_name, + vnm, sw_if_index0, + format_ip4_address, ia0); + } + else + vlib_cli_output (vm, "%=34s %=20U", + "No IPv4 address configured on", + format_vnet_sw_if_index_name, + vnm, sw_if_index); + } + else + break; + } + + return 0; +} + +VLIB_CLI_COMMAND (dhcp_proxy_address_show_command,static) = { + .path = "show dhcp option-82-address interface", + .short_help = "show dhcp option-82-address interface ", + .function = dhcp_option_82_address_show_command_fn, +}; diff --git a/src/vnet/dhcp/dhcp6_packet.h b/src/vnet/dhcp/dhcp6_packet.h new file mode 100644 index 00000000..ddcde7a0 --- /dev/null +++ b/src/vnet/dhcp/dhcp6_packet.h @@ -0,0 +1,183 @@ +#ifndef included_vnet_dhcp6_packet_h +#define included_vnet_dhcp6_packet_h + +/* + * DHCP packet format + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +// #define DHCP_VRF_NAME_MAX_LEN L3VM_MAX_NAME_STR_LEN +// #define DHCPV6_MAX_VRF_NAME_LEN L3VM_MAX_NAME_STR_LEN +#define DHCP_MAX_RELAY_ADDR 16 +#define PROTO_UDP 17 +#define DHCPV6_CLIENT_PORT 546 +#define DHCPV6_SERVER_PORT 547 +#define HOP_COUNT_LIMIT 32 +#define DHCPV6_CISCO_ENT_NUM 9 + +/* + * DHCPv6 message types + */ +typedef enum dhcpv6_msg_type_{ + DHCPV6_MSG_SOLICIT = 1, + DHCPV6_MSG_ADVERTISE = 2, + DHCPV6_MSG_REQUEST = 3, + DHCPV6_MSG_CONFIRM = 4, + DHCPV6_MSG_RENEW = 5, + DHCPV6_MSG_REBIND = 6, + DHCPV6_MSG_REPLY = 7, + DHCPV6_MSG_RELEASE = 8, + DHCPV6_MSG_DECLINE = 9, + DHCPV6_MSG_RECONFIGURE = 10, + DHCPV6_MSG_INFORMATION_REQUEST = 11, + DHCPV6_MSG_RELAY_FORW = 12, + DHCPV6_MSG_RELAY_REPL = 13, +} dhcpv6_msg_type_t; + +/* + * DHCPv6 options types + */ +enum { + DHCPV6_OPTION_CLIENTID = 1, + DHCPV6_OPTION_SERVERID = 2, + DHCPV6_OPTION_IA_NA = 3, + DHCPV6_OPTION_IA_TA = 4, + DHCPV6_OPTION_IAADDR = 5, + DHCPV6_OPTION_ORO = 6, + DHCPV6_OPTION_PREFERENCE = 7, + DHCPV6_OPTION_ELAPSED_TIME = 8, + DHCPV6_OPTION_RELAY_MSG = 9, + DHCPV6_OPTION_AUTH = 11, + DHCPV6_OPTION_UNICAST = 12, + DHCPV6_OPTION_STATUS_CODE = 13, + DHCPV6_OPTION_RAPID_COMMIT = 14, + DHCPV6_OPTION_USER_CLASS = 15, + DHCPV6_OPTION_VENDOR_CLASS = 16, + DHCPV6_OPTION_VENDOR_OPTS = 17, + DHCPV6_OPTION_INTERFACE_ID = 18, // relay agent fills this + DHCPV6_OPTION_RECONF_MSG = 19, + DHCPV6_OPTION_RECONF_ACCEPT = 20, + DHCPV6_OPTION_REMOTEID = 37, // relay agent fills this + DHCPV6_OPTION_VSS = 68, // relay agent fills this + DHCPV6_OPTION_CLIENT_LINK_LAYER_ADDRESS = 79, + DHCPV6_OPTION_MAX +}; + +/* +* DHCPv6 status codes + */ +enum { + DHCPV6_STATUS_SUCCESS = 0, + DHCPV6_STATUS_UNSPEC_FAIL = 1, + DHCPV6_STATUS_NOADDRS_AVAIL = 2, + DHCPV6_STATUS_NO_BINDING = 3, + DHCPV6_STATUS_NOT_ONLINK = 4, + DHCPV6_STATUS_USE_MULTICAST = 5, +}; + +/* + * DHCPv6 DUID types + */ +enum { + DHCPV6_DUID_LLT = 1, /* DUID Based on Link-layer Address Plus Time */ + DHCPV6_DUID_EN = 2, /* DUID Based on Enterprise Number */ + DHCPV6_DUID_LL = 3, /* DUID Based on Link-layer Address */ +}; + +//Structure for DHCPv6 payload from client +typedef struct dhcpv6_hdr_ { + union { + u8 msg_type; //DHCP msg type + u32 xid; // transaction id + }u; + u8 data[0]; +} dhcpv6_header_t; + + + +typedef CLIB_PACKED (struct dhcpv6_relay_ctx_ { + dhcpv6_header_t *pkt; + u32 pkt_len; + u32 dhcpv6_len; //DHCPv6 payload load +// if_ordinal iod; + u32 if_index; + u32 ctx_id; + char ctx_name[32+1]; + u8 dhcp_msg_type; +}) dhcpv6_relay_ctx_t; + +//Structure for DHCPv6 RELAY-FORWARD and DHCPv6 RELAY-REPLY pkts +typedef CLIB_PACKED (struct dhcpv6_relay_hdr_ { + u8 msg_type; + u8 hop_count; + ip6_address_t link_addr; + ip6_address_t peer_addr; + u8 data[0]; +}) dhcpv6_relay_hdr_t; + +typedef enum dhcp_stats_action_type_ { + DHCP_STATS_ACTION_FORWARDED=1, + DHCP_STATS_ACTION_RECEIVED, + DHCP_STATS_ACTION_DROPPED +} dhcp_stats_action_type_t; +//Generic counters for a packet +typedef struct dhcp_stats_counters_ { + u64 rx_pkts; //counter for received pkts + u64 tx_pkts; //counter for forwarded pkts + u64 drops; //counter for dropped pkts +} dhcp_stats_counters_t; + + +typedef enum dhcpv6_stats_drop_reason_ { + DHCPV6_RELAY_PKT_DROP_RELAYDISABLE = 1, + DHCPV6_RELAY_PKT_DROP_MAX_HOPS, + DHCPV6_RELAY_PKT_DROP_VALIDATION_FAIL, + DHCPV6_RELAY_PKT_DROP_UNKNOWN_OP_INTF, + DHCPV6_RELAY_PKT_DROP_BAD_CONTEXT, + DHCPV6_RELAY_PKT_DROP_OPT_INSERT_FAIL, + DHCPV6_RELAY_PKT_DROP_REPLY_FROM_CLIENT, +} dhcpv6_stats_drop_reason_t; + +typedef CLIB_PACKED (struct { + u16 option; + u16 length; + u8 data[0]; +}) dhcpv6_option_t; + +typedef CLIB_PACKED (struct { + dhcpv6_option_t opt; + u32 int_idx; +}) dhcpv6_int_id_t; + +typedef CLIB_PACKED (struct { + dhcpv6_option_t opt; + u8 data[8]; // data[0]:type, data[1..7]: VPN ID +}) dhcpv6_vss_t; + +typedef CLIB_PACKED (struct { + dhcpv6_option_t opt; + u32 ent_num; + u32 rmt_id; +}) dhcpv6_rmt_id_t; + +typedef CLIB_PACKED (struct { + dhcpv6_option_t opt; + u16 link_type; + u8 data[6]; // data[0]:data[5]: MAC address +}) dhcpv6_client_mac_t; + + +#endif /* included_vnet_dhcp6_packet_h */ diff --git a/src/vnet/dhcp/dhcp6_proxy_error.def b/src/vnet/dhcp/dhcp6_proxy_error.def new file mode 100644 index 00000000..55fa7317 --- /dev/null +++ b/src/vnet/dhcp/dhcp6_proxy_error.def @@ -0,0 +1,29 @@ +/* + * dhcp_proxy_error.def: dhcp proxy errors + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +dhcpv6_proxy_error (NONE, "no error") +dhcpv6_proxy_error (NO_SERVER, "no dhcpv6 server configured") +dhcpv6_proxy_error (RELAY_TO_SERVER, "DHCPV6 packets relayed to the server") +dhcpv6_proxy_error (RELAY_TO_CLIENT, "DHCPV6 packets relayed to clients") +dhcpv6_proxy_error (NO_INTERFACE_ADDRESS, "DHCPV6 no interface address") +dhcpv6_proxy_error (WRONG_MESSAGE_TYPE, "DHCPV6 wrong message type.") +dhcpv6_proxy_error (NO_SRC_ADDRESS, "DHCPV6 no srouce IPv6 address configured.") +dhcpv6_proxy_error (NO_CIRCUIT_ID_OPTION, "DHCPv6 reply packets without circuit ID option") +dhcpv6_proxy_error (NO_RELAY_MESSAGE_OPTION, "DHCPv6 reply packets without relay message option") +dhcpv6_proxy_error (BAD_SVR_FIB_OR_ADDRESS, "DHCPv6 packets not from DHCPv6 server or server FIB.") +dhcpv6_proxy_error (PKT_TOO_BIG, "DHCPv6 packets which are too big.") +dhcpv6_proxy_error (WRONG_INTERFACE_ID_OPTION, "DHCPv6 reply to invalid interface.") diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c new file mode 100644 index 00000000..ed44977d --- /dev/null +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -0,0 +1,1065 @@ +/* + * dhcp6_proxy_node.c: dhcpv6 proxy node processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +static char * dhcpv6_proxy_error_strings[] = { +#define dhcpv6_proxy_error(n,s) s, +#include +#undef dhcpv6_proxy_error +}; + +#define foreach_dhcpv6_proxy_to_server_input_next \ + _ (DROP, "error-drop") \ + _ (LOOKUP, "ip6-lookup") \ + _ (SEND_TO_CLIENT, "dhcpv6-proxy-to-client") + + +typedef enum { +#define _(s,n) DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_##s, + foreach_dhcpv6_proxy_to_server_input_next +#undef _ + DHCPV6_PROXY_TO_SERVER_INPUT_N_NEXT, +} dhcpv6_proxy_to_server_input_next_t; + +typedef struct { + /* 0 => to server, 1 => to client */ + int which; + u8 packet_data[64]; + u32 error; + u32 sw_if_index; + u32 original_sw_if_index; +} dhcpv6_proxy_trace_t; + +static vlib_node_registration_t dhcpv6_proxy_to_server_node; +static vlib_node_registration_t dhcpv6_proxy_to_client_node; + +/* all DHCP servers address */ +static ip6_address_t all_dhcpv6_server_address; +static ip6_address_t all_dhcpv6_server_relay_agent_address; + +static u8 * +format_dhcpv6_proxy_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + dhcpv6_proxy_trace_t * t = va_arg (*args, dhcpv6_proxy_trace_t *); + + if (t->which == 0) + s = format (s, "DHCPV6 proxy: sent to server %U", + format_ip6_address, &t->packet_data, sizeof (ip6_address_t)); + else + s = format (s, "DHCPV6 proxy: sent to client from %U", + format_ip6_address, &t->packet_data, sizeof (ip6_address_t)); + if (t->error != (u32)~0) + s = format (s, " error: %s\n", dhcpv6_proxy_error_strings[t->error]); + + s = format (s, " original_sw_if_index: %d, sw_if_index: %d\n", + t->original_sw_if_index, t->sw_if_index); + + return s; +} + +static u8 * +format_dhcpv6_proxy_header_with_length (u8 * s, va_list * args) +{ + dhcpv6_header_t * h = va_arg (*args, dhcpv6_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "dhcpv6 header truncated"); + + s = format (s, "DHCPV6 Proxy"); + + return s; +} +/* get first interface address */ +static ip6_address_t * +ip6_interface_first_global_or_site_address (ip6_main_t * im, u32 sw_if_index) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia = 0; + ip6_address_t * result = 0; + + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip6_address_t * a = ip_interface_address_get_address (lm, ia); + if ((a->as_u8[0] & 0xe0) == 0x20 || + (a->as_u8[0] & 0xfe) == 0xfc) { + result = a; + break; + } + })); + return result; +} + +static inline void copy_ip6_address (ip6_address_t *dst, + ip6_address_t *src) +{ + dst->as_u64[0] = src->as_u64[0]; + dst->as_u64[1] = src->as_u64[1]; +} + +static uword +dhcpv6_proxy_to_server_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + u32 pkts_to_server=0, pkts_to_client=0, pkts_no_server=0; + u32 pkts_no_interface_address=0, pkts_no_exceeding_max_hop=0; + u32 pkts_no_src_address=0; + u32 pkts_wrong_msg_type=0; + u32 pkts_too_big=0; + ip6_main_t * im = &ip6_main; + ip6_address_t * src; + int bogus_length; + dhcp_server_t * server; + u32 rx_fib_idx = 0, server_fib_idx = 0; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vnet_main_t *vnm = vnet_get_main(); + u32 sw_if_index = 0; + u32 rx_sw_if_index = 0; + vnet_sw_interface_t *swif; + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * u0, *u1; + dhcpv6_header_t * h0; // client msg hdr + ip6_header_t * ip0, *ip1; + ip6_address_t _ia0, *ia0=&_ia0; + u32 next0; + u32 error0 = (u32) ~0; + dhcpv6_option_t *fwd_opt; + dhcpv6_relay_hdr_t *r1; + u16 len; + dhcpv6_int_id_t *id1; + dhcpv6_vss_t *vss1; + dhcpv6_client_mac_t *cmac; // client mac + ethernet_header_t * e_h0; + u8 client_src_mac[6]; + vlib_buffer_free_list_t *fl; + dhcp_vss_t *vss; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + h0 = vlib_buffer_get_current (b0); + + /* + * udp_local hands us the DHCPV6 header. + */ + u0 = (void *)h0 -(sizeof(*u0)); + ip0 = (void *)u0 -(sizeof(*ip0)); + e_h0 = (void *)ip0 - ethernet_buffer_header_size(b0); + + clib_memcpy(client_src_mac, e_h0->src_address, 6); + + switch (h0->u.msg_type) { + case DHCPV6_MSG_SOLICIT: + case DHCPV6_MSG_REQUEST: + case DHCPV6_MSG_CONFIRM: + case DHCPV6_MSG_RENEW: + case DHCPV6_MSG_REBIND: + case DHCPV6_MSG_RELEASE: + case DHCPV6_MSG_DECLINE: + case DHCPV6_MSG_INFORMATION_REQUEST: + case DHCPV6_MSG_RELAY_FORW: + /* send to server */ + break; + case DHCPV6_MSG_RELAY_REPL: + /* send to client */ + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_SEND_TO_CLIENT; + error0 = 0; + pkts_to_client++; + goto do_enqueue; + default: + /* drop the packet */ + pkts_wrong_msg_type++; + error0 = DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + goto do_trace; + + } + + /* Send to DHCPV6 server via the configured FIB */ + rx_sw_if_index = sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_idx = im->fib_index_by_sw_if_index [rx_sw_if_index]; + server = dhcp_get_server(dpm, rx_fib_idx, FIB_PROTOCOL_IP6); + + if (PREDICT_FALSE (NULL == server)) + { + error0 = DHCPV6_PROXY_ERROR_NO_SERVER; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_server++; + goto do_trace; + } + + server_fib_idx = server->server_fib_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = server_fib_idx; + + + /* relay-option header pointer */ + vlib_buffer_advance(b0, -(sizeof(*fwd_opt))); + fwd_opt = vlib_buffer_get_current(b0); + /* relay message header pointer */ + vlib_buffer_advance(b0, -(sizeof(*r1))); + r1 = vlib_buffer_get_current(b0); + + vlib_buffer_advance(b0, -(sizeof(*u1))); + u1 = vlib_buffer_get_current(b0); + + vlib_buffer_advance(b0, -(sizeof(*ip1))); + ip1 = vlib_buffer_get_current(b0); + + /* fill in all that rubbish... */ + len = clib_net_to_host_u16(u0->length) - sizeof(udp_header_t); + copy_ip6_address(&r1->peer_addr, &ip0->src_address); + + r1->msg_type = DHCPV6_MSG_RELAY_FORW; + fwd_opt->length = clib_host_to_net_u16(len); + fwd_opt->option = clib_host_to_net_u16(DHCPV6_OPTION_RELAY_MSG); + + r1->hop_count++; + r1->hop_count = (h0->u.msg_type != DHCPV6_MSG_RELAY_FORW) ? 0 : r1->hop_count; + + if (PREDICT_FALSE(r1->hop_count >= HOP_COUNT_LIMIT)) + { + error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_exceeding_max_hop++; + goto do_trace; + } + + + /* If relay-fwd and src address is site or global unicast address */ + if (h0->u.msg_type == DHCPV6_MSG_RELAY_FORW && + ((ip0->src_address.as_u8[0] & 0xe0) == 0x20 || + (ip0->src_address.as_u8[0] & 0xfe) == 0xfc)) + { + /* Set link address to zero */ + r1->link_addr.as_u64[0] = 0; + r1->link_addr.as_u64[1] = 0; + goto link_address_set; + } + + /* if receiving interface is unnumbered, use receiving interface + * IP address as link address, otherwise use the loopback interface + * IP address as link address. + */ + + swif = vnet_get_sw_interface (vnm, rx_sw_if_index); + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; + + ia0 = ip6_interface_first_global_or_site_address(&ip6_main, sw_if_index); + if (ia0 == 0) + { + error0 = DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_interface_address++; + goto do_trace; + } + + copy_ip6_address(&r1->link_addr, ia0); + + link_address_set: + fl = vlib_buffer_get_free_list (vm, b0->free_list_index); + + if ((b0->current_length+sizeof(*id1)+sizeof(*vss1)+sizeof(*cmac)) + > fl->n_data_bytes) + { + error0 = DHCPV6_PROXY_ERROR_PKT_TOO_BIG; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_too_big++; + goto do_trace; + } + + id1 = (dhcpv6_int_id_t *) (((uword) ip1) + b0->current_length); + b0->current_length += (sizeof (*id1)); + + id1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_INTERFACE_ID); + id1->opt.length = clib_host_to_net_u16(sizeof(rx_sw_if_index)); + id1->int_idx = clib_host_to_net_u32(rx_sw_if_index); + + u1->length =0; + if (h0->u.msg_type != DHCPV6_MSG_RELAY_FORW) + { + cmac = (dhcpv6_client_mac_t *) (((uword) ip1) + b0->current_length); + b0->current_length += (sizeof (*cmac)); + cmac->opt.length =clib_host_to_net_u16(sizeof(*cmac) - + sizeof(cmac->opt)); + cmac->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_CLIENT_LINK_LAYER_ADDRESS); + cmac->link_type = clib_host_to_net_u16(1); // ethernet + clib_memcpy(cmac->data, client_src_mac, 6); + u1->length += sizeof(*cmac); + } + + vss = dhcp_get_vss_info(dpm, rx_fib_idx, FIB_PROTOCOL_IP6); + + if (NULL != vss) { + vss1 = (dhcpv6_vss_t *) (((uword) ip1) + b0->current_length); + b0->current_length += (sizeof (*vss1)); + vss1->opt.length =clib_host_to_net_u16(sizeof(*vss1) - + sizeof(vss1->opt)); + vss1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_VSS); + vss1->data[0] = 1; // type + vss1->data[1] = vss->oui >>16 & 0xff; + vss1->data[2] = vss->oui >>8 & 0xff; + vss1->data[3] = vss->oui & 0xff; + vss1->data[4] = vss->fib_id >> 24 & 0xff; + vss1->data[5] = vss->fib_id >> 16 & 0xff; + vss1->data[6] = vss->fib_id >> 8 & 0xff; + vss1->data[7] = vss->fib_id & 0xff; + u1->length += sizeof(*vss1); + } + + pkts_to_server++; + u1->checksum = 0; + u1->src_port = clib_host_to_net_u16(UDP_DST_PORT_dhcpv6_to_client); + u1->dst_port = clib_host_to_net_u16(UDP_DST_PORT_dhcpv6_to_server); + + u1->length = + clib_host_to_net_u16( clib_net_to_host_u16(fwd_opt->length) + + sizeof(*r1) + sizeof(*fwd_opt) + + sizeof(*u1) + sizeof(*id1) + u1->length); + + memset(ip1, 0, sizeof(*ip1)); + ip1->ip_version_traffic_class_and_flow_label = 0x60; + ip1->payload_length = u1->length; + ip1->protocol = PROTO_UDP; + ip1->hop_limit = HOP_COUNT_LIMIT; + src = (server->dhcp_server.ip6.as_u64[0] || + server->dhcp_server.ip6.as_u64[1]) ? + &server->dhcp_server.ip6 : &all_dhcpv6_server_address; + copy_ip6_address(&ip1->dst_address, src); + + + ia0 = ip6_interface_first_global_or_site_address + (&ip6_main, vnet_buffer(b0)->sw_if_index[VLIB_RX]); + + src = (server->dhcp_src_address.ip6.as_u64[0] || + server->dhcp_src_address.ip6.as_u64[1]) ? + &server->dhcp_src_address.ip6 : ia0; + if (ia0 == 0) + { + error0 = DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS; + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; + pkts_no_src_address++; + goto do_trace; + } + + copy_ip6_address (&ip1->src_address, src); + + + u1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, ip1, + &bogus_length); + ASSERT(bogus_length == 0); + + next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcpv6_proxy_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->which = 0; /* to server */ + tr->error = error0; + tr->original_sw_if_index = rx_sw_if_index; + tr->sw_if_index = sw_if_index; + if (DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP == next0) + copy_ip6_address((ip6_address_t *)&tr->packet_data[0], &server->dhcp_server.ip6); + } + + do_enqueue: + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_RELAY_TO_CLIENT, + pkts_to_client); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_RELAY_TO_SERVER, + pkts_to_server); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS, + pkts_no_interface_address); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE, + pkts_wrong_msg_type); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS, + pkts_no_src_address); + vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, + DHCPV6_PROXY_ERROR_PKT_TOO_BIG, + pkts_too_big); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dhcpv6_proxy_to_server_node, static) = { + .function = dhcpv6_proxy_to_server_input, + .name = "dhcpv6-proxy-to-server", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = DHCPV6_PROXY_N_ERROR, + .error_strings = dhcpv6_proxy_error_strings, + + .n_next_nodes = DHCPV6_PROXY_TO_SERVER_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_##s] = n, + foreach_dhcpv6_proxy_to_server_input_next +#undef _ + }, + + .format_buffer = format_dhcpv6_proxy_header_with_length, + .format_trace = format_dhcpv6_proxy_trace, +#if 0 + .unformat_buffer = unformat_dhcpv6_proxy_header, +#endif +}; + +static uword +dhcpv6_proxy_to_client_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + + u32 n_left_from, * from; + ethernet_main_t *em = ethernet_get_main (vm); + dhcp_proxy_main_t * dm = &dhcp_proxy_main; + dhcp_server_t * server; + vnet_main_t * vnm = vnet_get_main(); + int bogus_length; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t * b0; + udp_header_t * u0, *u1=0; + dhcpv6_relay_hdr_t * h0; + ip6_header_t * ip1 = 0, *ip0; + ip6_address_t _ia0, * ia0 = &_ia0; + ip6_address_t client_address; + ethernet_interface_t *ei0; + ethernet_header_t *mac0; + vnet_hw_interface_t *hi0; + vlib_frame_t *f0; + u32 * to_next0; + u32 sw_if_index = ~0; + u32 original_sw_if_index = ~0; + vnet_sw_interface_t *si0; + u32 error0 = (u32)~0; + vnet_sw_interface_t *swif; + dhcpv6_option_t *r0 = 0, *o; + u16 len = 0; + u8 interface_opt_flag = 0; + u8 relay_msg_opt_flag = 0; + ip6_main_t * im = &ip6_main; + u32 server_fib_idx, client_fib_idx; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + if (DHCPV6_MSG_RELAY_REPL != h0->msg_type) + { + error0 = DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE; + + drop_packet: + vlib_node_increment_counter (vm, dhcpv6_proxy_to_client_node.index, + error0, 1); + + f0 = vlib_get_frame_to_node (vm, dm->error_drop_node_index); + to_next0 = vlib_frame_vector_args (f0); + to_next0[0] = bi0; + f0->n_vectors = 1; + vlib_put_frame_to_node (vm, dm->error_drop_node_index, f0); + goto do_trace; + } + /* hop count seems not need to be checked */ + if (HOP_COUNT_LIMIT < h0->hop_count) + { + error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS; + goto drop_packet; + } + u0 = (void *)h0 -(sizeof(*u0)); + ip0 = (void *)u0 -(sizeof(*ip0)); + + vlib_buffer_advance (b0, sizeof(*h0)); + o = vlib_buffer_get_current (b0); + + /* Parse through TLVs looking for option 18 (DHCPV6_OPTION_INTERFACE_ID) + _and_ option 9 (DHCPV6_OPTION_RELAY_MSG) option which must be there. + Currently assuming no other options need to be processed + The interface-ID is the FIB number we need + to track down the client-facing interface */ + + while ((u8 *) o < (b0->data + b0->current_data + b0->current_length)) + { + if (DHCPV6_OPTION_INTERFACE_ID == clib_net_to_host_u16(o->option)) + { + interface_opt_flag = 1; + if (clib_net_to_host_u16(o->length) == sizeof(sw_if_index)) + sw_if_index = clib_net_to_host_u32(((dhcpv6_int_id_t*)o)->int_idx); + if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index)) + { + error0 = DHCPV6_PROXY_ERROR_WRONG_INTERFACE_ID_OPTION; + goto drop_packet; + } + } + if (DHCPV6_OPTION_RELAY_MSG == clib_net_to_host_u16(o->option)) + { + relay_msg_opt_flag = 1; + r0 = vlib_buffer_get_current (b0); + } + if ((relay_msg_opt_flag == 1) && (interface_opt_flag == 1)) + break; + vlib_buffer_advance (b0, sizeof(*o) + clib_net_to_host_u16(o->length)); + o = (dhcpv6_option_t *) (((uword) o) + clib_net_to_host_u16(o->length) + sizeof(*o)); + } + + if ((relay_msg_opt_flag == 0) || (r0 == 0)) + { + error0 = DHCPV6_PROXY_ERROR_NO_RELAY_MESSAGE_OPTION; + goto drop_packet; + } + + if ((u32)~0 == sw_if_index) + { + error0 = DHCPV6_PROXY_ERROR_NO_CIRCUIT_ID_OPTION; + goto drop_packet; + } + + //Advance buffer to start of encapsulated DHCPv6 message + vlib_buffer_advance (b0, sizeof(*r0)); + + client_fib_idx = im->fib_index_by_sw_if_index[sw_if_index]; + server = dhcp_get_server(dm, client_fib_idx, FIB_PROTOCOL_IP6); + + if (NULL == server) + { + error0 = DHCPV6_PROXY_ERROR_NO_SERVER; + goto drop_packet; + } + + server_fib_idx = im->fib_index_by_sw_if_index + [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; + + if (server_fib_idx != server->server_fib_index || + ip0->src_address.as_u64[0] != server->dhcp_server.ip6.as_u64[0] || + ip0->src_address.as_u64[1] != server->dhcp_server.ip6.as_u64[1]) + { + //drop packet if not from server with configured address or FIB + error0 = DHCPV6_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; + goto drop_packet; + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = original_sw_if_index + = sw_if_index; + + swif = vnet_get_sw_interface (vnm, original_sw_if_index); + if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) + sw_if_index = swif->unnumbered_sw_if_index; + + + /* + * udp_local hands us the DHCPV6 header, need udp hdr, + * ip hdr to relay to client + */ + vlib_buffer_advance (b0, -(sizeof(*u1))); + u1 = vlib_buffer_get_current (b0); + + vlib_buffer_advance (b0, -(sizeof(*ip1))); + ip1 = vlib_buffer_get_current (b0); + + copy_ip6_address(&client_address, &h0->peer_addr); + + ia0 = ip6_interface_first_address (&ip6_main, sw_if_index); + if (ia0 == 0) + { + error0 = DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS; + goto drop_packet; + } + + len = clib_net_to_host_u16(r0->length); + memset(ip1, 0, sizeof(*ip1)); + copy_ip6_address(&ip1->dst_address, &client_address); + u1->checksum = 0; + u1->src_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcpv6_to_server); + u1->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcpv6_to_client); + u1->length = clib_host_to_net_u16 (len + sizeof(udp_header_t)); + + ip1->ip_version_traffic_class_and_flow_label = + ip0->ip_version_traffic_class_and_flow_label & + 0x00000fff; + ip1->payload_length = u1->length; + ip1->protocol = PROTO_UDP; + ip1->hop_limit = HOP_COUNT_LIMIT; + copy_ip6_address(&ip1->src_address, ia0); + + u1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, ip1, + &bogus_length); + ASSERT(bogus_length == 0); + + vlib_buffer_advance (b0, -(sizeof(ethernet_header_t))); + si0 = vnet_get_sw_interface (vnm, original_sw_if_index); + if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) + vlib_buffer_advance (b0, -4 /* space for VLAN tag */); + + mac0 = vlib_buffer_get_current (b0); + + hi0 = vnet_get_sup_hw_interface (vnm, original_sw_if_index); + ei0 = pool_elt_at_index (em->interfaces, hi0->hw_instance); + clib_memcpy (mac0->src_address, ei0->address, sizeof (ei0->address)); + memset (&mac0->dst_address, 0xff, sizeof (mac0->dst_address)); + mac0->type = (si0->type == VNET_SW_INTERFACE_TYPE_SUB) ? + clib_net_to_host_u16(0x8100) : clib_net_to_host_u16 (0x86dd); + + if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) + { + u32 * vlan_tag = (u32 *)(mac0+1); + u32 tmp; + tmp = (si0->sub.id << 16) | 0x0800; + *vlan_tag = clib_host_to_net_u32 (tmp); + } + + /* $$$ consider adding a dynamic next to the graph node, for performance */ + f0 = vlib_get_frame_to_node (vm, hi0->output_node_index); + to_next0 = vlib_frame_vector_args (f0); + to_next0[0] = bi0; + f0->n_vectors = 1; + vlib_put_frame_to_node (vm, hi0->output_node_index, f0); + + do_trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + dhcpv6_proxy_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->which = 1; /* to client */ + if (ia0) + copy_ip6_address((ip6_address_t*)tr->packet_data, ia0); + tr->error = error0; + tr->original_sw_if_index = original_sw_if_index; + tr->sw_if_index = sw_if_index; + } + } + return from_frame->n_vectors; + +} + +VLIB_REGISTER_NODE (dhcpv6_proxy_to_client_node, static) = { + .function = dhcpv6_proxy_to_client_input, + .name = "dhcpv6-proxy-to-client", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = DHCPV6_PROXY_N_ERROR, + .error_strings = dhcpv6_proxy_error_strings, + .format_buffer = format_dhcpv6_proxy_header_with_length, + .format_trace = format_dhcpv6_proxy_trace, +#if 0 + .unformat_buffer = unformat_dhcpv6_proxy_header, +#endif +}; + +static clib_error_t * +dhcp6_proxy_init (vlib_main_t * vm) +{ + dhcp_proxy_main_t * dm = &dhcp_proxy_main; + vlib_node_t * error_drop_node; + + error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); + dm->error_drop_node_index = error_drop_node->index; + + /* RFC says this is the dhcpv6 server address */ + all_dhcpv6_server_address.as_u64[0] = clib_host_to_net_u64 (0xFF05000000000000); + all_dhcpv6_server_address.as_u64[1] = clib_host_to_net_u64 (0x00010003); + + /* RFC says this is the server and agent address */ + all_dhcpv6_server_relay_agent_address.as_u64[0] = clib_host_to_net_u64 (0xFF02000000000000); + all_dhcpv6_server_relay_agent_address.as_u64[1] = clib_host_to_net_u64 (0x00010002); + + udp_register_dst_port (vm, UDP_DST_PORT_dhcpv6_to_client, + dhcpv6_proxy_to_client_node.index, 0 /* is_ip6 */); + + udp_register_dst_port (vm, UDP_DST_PORT_dhcpv6_to_server, + dhcpv6_proxy_to_server_node.index, 0 /* is_ip6 */); + + return 0; +} + +VLIB_INIT_FUNCTION (dhcp6_proxy_init); + +int +dhcp6_proxy_set_server (ip46_address_t *addr, + ip46_address_t *src_addr, + u32 rx_table_id, + u32 server_table_id, + int is_del) +{ + u32 rx_fib_index = 0; + int rc = 0; + + const mfib_prefix_t all_dhcp_servers = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_grp_addr = { + .ip6 = all_dhcpv6_server_relay_agent_address, + } + }; + + if (ip46_address_is_zero(addr)) + return VNET_API_ERROR_INVALID_DST_ADDRESS; + + if (ip46_address_is_zero(src_addr)) + return VNET_API_ERROR_INVALID_SRC_ADDRESS; + + rx_fib_index = mfib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, + rx_table_id); + + if (is_del) + { + rc = dhcp_proxy_server_del (FIB_PROTOCOL_IP6, rx_fib_index); + + if (0 == rc) + { + mfib_table_entry_delete(rx_fib_index, + &all_dhcp_servers, + MFIB_SOURCE_DHCP); + mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); + } + } + else + { + const fib_route_path_t path_for_us = { + .frp_proto = FIB_PROTOCOL_IP6, + .frp_addr = zero_addr, + .frp_sw_if_index = 0xffffffff, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = FIB_ROUTE_PATH_LOCAL, + }; + if (dhcp_proxy_server_add (FIB_PROTOCOL_IP6, addr, src_addr, + rx_fib_index, server_table_id)) + { + mfib_table_entry_path_update(rx_fib_index, + &all_dhcp_servers, + MFIB_SOURCE_DHCP, + &path_for_us, + MFIB_ITF_FLAG_FORWARD); + /* + * Each interface that is enabled in this table, needs to be added + * as an accepting interface, but this is not easily doable in VPP. + * So we cheat. Add a flag to the entry that indicates accept form + * any interface. + * We will still only accept on v6 enabled interfaces, since the + * input feature ensures this. + */ + mfib_table_entry_update(rx_fib_index, + &all_dhcp_servers, + MFIB_SOURCE_DHCP, + MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF); + mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6); + } + } + + mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); + + return (rc); +} + +static clib_error_t * +dhcpv6_proxy_set_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip46_address_t addr, src_addr; + int set_server = 0, set_src_address = 0; + u32 rx_table_id = 0, server_table_id = 0; + int is_del = 0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "server %U", + unformat_ip6_address, &addr.ip6)) + set_server = 1; + else if (unformat(input, "src-address %U", + unformat_ip6_address, &src_addr.ip6)) + set_src_address =1; + else if (unformat (input, "server-fib-id %d", &server_table_id)) + ; + else if (unformat (input, "rx-fib-id %d", &rx_table_id)) + ; + else if (unformat (input, "delete") || + unformat (input, "del")) + is_del = 1; + else + break; + } + + if (is_del || (set_server && set_src_address)) + { + int rv; + + rv = dhcp6_proxy_set_server (&addr, &src_addr, rx_table_id, + server_table_id, is_del); + + //TODO: Complete the errors + switch (rv) + { + case 0: + return 0; + + case VNET_API_ERROR_INVALID_DST_ADDRESS: + return clib_error_return (0, "Invalid server address"); + + case VNET_API_ERROR_INVALID_SRC_ADDRESS: + return clib_error_return (0, "Invalid src address"); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return + (0, "Fib id %d: no per-fib DHCP server configured", rx_table_id); + + default: + return clib_error_return (0, "BUG: rv %d", rv); + } + } + else + return clib_error_return (0, "parse error`%U'", + format_unformat_error, input); +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_set_command, static) = { + .path = "set dhcpv6 proxy", + .short_help = "set dhcpv6 proxy [del] server src-address " + "[server-fib-id ] [rx-fib-id ] ", + .function = dhcpv6_proxy_set_command_fn, +}; + +static u8 * +format_dhcp6_proxy_server (u8 * s, va_list * args) +{ + dhcp_server_t * server = va_arg (*args, dhcp_server_t *); + ip6_fib_t * rx_fib, * server_fib; + + if (NULL == server) + { + s = format (s, "%=40s%=40s%=14s%=14s", "Server Address", "Source Address", + "Server FIB", "RX FIB"); + return s; + } + + server_fib = ip6_fib_get(server->server_fib_index); + rx_fib = ip6_fib_get(server->rx_fib_index); + + + s = format (s, "%=40U%=40U%=14u%=14u", + format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY, + format_ip46_address, &server->dhcp_src_address, IP46_TYPE_ANY, + server_fib->table_id, rx_fib->table_id); + return s; +} + +static int +dhcp6_proxy_show_walk (dhcp_server_t *server, + void *ctx) +{ + vlib_main_t * vm = ctx; + + vlib_cli_output (vm, "%U", format_dhcp6_proxy_server, server); + + return (1); +} + +static clib_error_t * +dhcpv6_proxy_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_cli_output (vm, "%U", format_dhcp6_proxy_server, NULL /* header line */); + + dhcp_proxy_walk(FIB_PROTOCOL_IP6, dhcp6_proxy_show_walk, vm); + + return (NULL); +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_show_command, static) = { + .path = "show dhcpv6 proxy", + .short_help = "Display dhcpv6 proxy info", + .function = dhcpv6_proxy_show_command_fn, +}; + +static clib_error_t * +dhcpv6_vss_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_del = 0, got_new_vss=0; + u32 oui=0; + u32 fib_id=0, tbl_id=~0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "oui %d", &oui)) + got_new_vss = 1; + else if (unformat (input, "vpn-id %d", &fib_id)) + got_new_vss = 1; + else if (unformat (input, "table %d", &tbl_id)) + got_new_vss = 1; + else if (unformat(input, "delete") || unformat(input, "del")) + is_del = 1; + else + break; + } + + if (tbl_id ==~0) + return clib_error_return (0, "no table ID specified."); + + if (is_del || got_new_vss) + { + int rv; + + rv = dhcp_proxy_set_vss(FIB_PROTOCOL_IP6, tbl_id, oui, fib_id, is_del); + switch (rv) + { + case 0: + return 0; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "vss info (oui:%d, vpn-id:%d) not found in table %d.", + oui, fib_id, tbl_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "vss for table %d not found in pool.", + tbl_id); + + default: + return clib_error_return (0, "BUG: rv %d", rv); + } + } + else + return clib_error_return (0, "parse error`%U'", + format_unformat_error, input); + +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_vss_command, static) = { + .path = "set dhcpv6 vss", + .short_help = "set dhcpv6 vss table oui vpn-idx ", + .function = dhcpv6_vss_command_fn, +}; + +static clib_error_t * +dhcpv6_vss_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + dhcp_vss_walk(FIB_PROTOCOL_IP6, dhcp_vss_show_walk, vm); + + return (NULL); +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_vss_show_command, static) = { + .path = "show dhcpv6 vss", + .short_help = "show dhcpv6 VSS", + .function = dhcpv6_vss_show_command_fn, +}; + +static clib_error_t * +dhcpv6_link_address_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) + +{ + vnet_main_t *vnm = vnet_get_main(); + u32 sw_if_index0=0, sw_if_index; + vnet_sw_interface_t *swif; + ip6_address_t *ia0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat(input, "%U", + unformat_vnet_sw_interface, vnm, &sw_if_index0)) + { + swif = vnet_get_sw_interface (vnm, sw_if_index0); + sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ? + swif->unnumbered_sw_if_index : sw_if_index0; + ia0 = ip6_interface_first_address(&ip6_main, sw_if_index); + if (ia0) + { + vlib_cli_output (vm, "%=20s%=48s", "interface", "link-address"); + + vlib_cli_output (vm, "%=20U%=48U", + format_vnet_sw_if_index_name, vnm, sw_if_index0, + format_ip6_address, ia0); + } else + vlib_cli_output (vm, "%=34s%=20U", "No IPv6 address configured on", + format_vnet_sw_if_index_name, vnm, sw_if_index); + } else + break; + } + + return 0; +} + +VLIB_CLI_COMMAND (dhcpv6_proxy_address_show_command, static) = { + .path = "show dhcpv6 link-address interface", + .short_help = "show dhcpv6 link-address interface ", + .function = dhcpv6_link_address_show_command_fn, +}; diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c index ce9039b7..bdf02cae 100644 --- a/src/vnet/dhcp/dhcp_api.c +++ b/src/vnet/dhcp/dhcp_api.c @@ -22,9 +22,8 @@ #include #include -#include +#include #include -#include #include @@ -51,52 +50,19 @@ _(DHCP_PROXY_DETAILS,dhcp_proxy_details) \ _(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss) \ _(DHCP_CLIENT_CONFIG, dhcp_client_config) -static void -dhcpv4_proxy_config (vl_api_dhcp_proxy_config_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv; - - rv = dhcp_proxy_set_server ((ip4_address_t *) (&mp->dhcp_server), - (ip4_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->rx_vrf_id), - (u32) ntohl (mp->server_vrf_id), - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); -} - - -static void -dhcpv6_proxy_config (vl_api_dhcp_proxy_config_t * mp) -{ - vl_api_dhcp_proxy_config_reply_t *rmp; - int rv = -1; - - rv = dhcpv6_proxy_set_server ((ip6_address_t *) (&mp->dhcp_server), - (ip6_address_t *) (&mp->dhcp_src_address), - (u32) ntohl (mp->rx_vrf_id), - (u32) ntohl (mp->server_vrf_id), - (int) (mp->is_add == 0)); - - REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); -} - static void vl_api_dhcp_proxy_set_vss_t_handler (vl_api_dhcp_proxy_set_vss_t * mp) { vl_api_dhcp_proxy_set_vss_reply_t *rmp; int rv; - if (!mp->is_ipv6) - rv = dhcp_proxy_set_option82_vss (ntohl (mp->tbl_id), - ntohl (mp->oui), - ntohl (mp->fib_id), - (int) mp->is_add == 0); - else - rv = dhcpv6_proxy_set_vss (ntohl (mp->tbl_id), - ntohl (mp->oui), - ntohl (mp->fib_id), (int) mp->is_add == 0); + + rv = dhcp_proxy_set_vss ((mp->is_ipv6 ? + FIB_PROTOCOL_IP6 : + FIB_PROTOCOL_IP4), + ntohl (mp->tbl_id), + ntohl (mp->oui), + ntohl (mp->fib_id), (int) mp->is_add == 0); REPLY_MACRO (VL_API_DHCP_PROXY_SET_VSS_REPLY); } @@ -105,10 +71,38 @@ vl_api_dhcp_proxy_set_vss_t_handler (vl_api_dhcp_proxy_set_vss_t * mp) static void vl_api_dhcp_proxy_config_t_handler (vl_api_dhcp_proxy_config_t * mp) { - if (mp->is_ipv6 == 0) - dhcpv4_proxy_config (mp); + vl_api_dhcp_proxy_set_vss_reply_t *rmp; + ip46_address_t src, server; + int rv = -1; + + if (mp->is_ipv6) + { + clib_memcpy (&src.ip6, mp->dhcp_src_address, sizeof (src.ip6)); + clib_memcpy (&server.ip6, mp->dhcp_server, sizeof (server.ip6)); + + rv = dhcp6_proxy_set_server (&server, + &src, + (u32) ntohl (mp->rx_vrf_id), + (u32) ntohl (mp->server_vrf_id), + (int) (mp->is_add == 0)); + } else - dhcpv6_proxy_config (mp); + { + ip46_address_reset (&src); + ip46_address_reset (&server); + + clib_memcpy (&src.ip4, mp->dhcp_src_address, sizeof (src.ip4)); + clib_memcpy (&server.ip4, mp->dhcp_server, sizeof (server.ip4)); + + rv = dhcp4_proxy_set_server (&server, + &src, + (u32) ntohl (mp->rx_vrf_id), + (u32) ntohl (mp->server_vrf_id), + (int) (mp->is_add == 0)); + } + + + REPLY_MACRO (VL_API_DHCP_PROXY_CONFIG_REPLY); } static void @@ -120,14 +114,13 @@ vl_api_dhcp_proxy_dump_t_handler (vl_api_dhcp_proxy_dump_t * mp) if (q == 0) return; - if (mp->is_ip6 == 0) - dhcp_proxy_dump (q, mp->context); - else - dhcpv6_proxy_dump (q, mp->context); + dhcp_proxy_dump ((mp->is_ip6 == 0 ? + FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4), q, mp->context); } void -dhcp_send_details (void *opaque, +dhcp_send_details (fib_protocol_t proto, + void *opaque, u32 context, const ip46_address_t * server, const ip46_address_t * src, @@ -149,7 +142,7 @@ dhcp_send_details (void *opaque, mp->vss_oui = htonl (vss_oui); mp->vss_fib_id = htonl (vss_fib_id); - mp->is_ipv6 = !ip46_address_is_ip4 (server); + mp->is_ipv6 = (proto == FIB_PROTOCOL_IP6); if (mp->is_ipv6) { diff --git a/src/vnet/dhcp/dhcp_proxy.c b/src/vnet/dhcp/dhcp_proxy.c new file mode 100644 index 00000000..da2deea6 --- /dev/null +++ b/src/vnet/dhcp/dhcp_proxy.c @@ -0,0 +1,275 @@ +/* + * proxy_node.c: common dhcp v4 and v6 proxy node processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +/** + * @brief Shard 4/6 instance of DHCP main + */ +dhcp_proxy_main_t dhcp_proxy_main; + +void +dhcp_proxy_walk (fib_protocol_t proto, + dhcp_proxy_walk_fn_t fn, + void *ctx) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + dhcp_server_t * server; + u32 server_index, i; + + vec_foreach_index (i, dpm->dhcp_server_index_by_rx_fib_index[proto]) + { + server_index = dpm->dhcp_server_index_by_rx_fib_index[proto][i]; + if (~0 == server_index) + continue; + + server = pool_elt_at_index (dpm->dhcp_servers[proto], server_index); + + if (!fn(server, ctx)) + break; + } +} + +void +dhcp_vss_walk (fib_protocol_t proto, + dhcp_vss_walk_fn_t fn, + void *ctx) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + dhcp_vss_t * vss; + u32 vss_index, i; + fib_table_t *fib; + + + vec_foreach_index (i, dpm->vss_index_by_rx_fib_index[proto]) + { + vss_index = dpm->vss_index_by_rx_fib_index[proto][i]; + if (~0 == vss_index) + continue; + + vss = pool_elt_at_index (dpm->vss[proto], vss_index); + + fib = fib_table_get(i, proto); + + if (!fn(vss, fib->ft_table_id, ctx)) + break; + } +} + +int +dhcp_proxy_server_del (fib_protocol_t proto, + u32 rx_fib_index) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + dhcp_server_t * server = 0; + int rc = 0; + + server = dhcp_get_server(dpm, rx_fib_index, proto); + + if (NULL == server) + { + rc = VNET_API_ERROR_NO_SUCH_ENTRY; + } + else + { + /* Use the default server again. */ + dpm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] = ~0; + + fib_table_unlock (server->server_fib_index, proto); + + pool_put (dpm->dhcp_servers[proto], server); + } + + return (rc); +} + +int +dhcp_proxy_server_add (fib_protocol_t proto, + ip46_address_t *addr, + ip46_address_t *src_address, + u32 rx_fib_index, + u32 server_table_id) +{ + dhcp_proxy_main_t * dpm = &dhcp_proxy_main; + dhcp_server_t * server = 0; + int new = 0; + + server = dhcp_get_server(dpm, rx_fib_index, proto); + + if (NULL == server) + { + vec_validate_init_empty(dpm->dhcp_server_index_by_rx_fib_index[proto], + rx_fib_index, + ~0); + + pool_get (dpm->dhcp_servers[proto], server); + memset (server, 0, sizeof (*server)); + new = 1; + + dpm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] = + server - dpm->dhcp_servers[proto]; + + server->rx_fib_index = rx_fib_index; + server->server_fib_index = + fib_table_find_or_create_and_lock(proto, server_table_id); + } + else + { + /* modify, may need to swap server FIBs */ + u32 tmp_index; + + tmp_index = fib_table_find(proto, server_table_id); + + if (tmp_index != server->server_fib_index) + { + tmp_index = server->server_fib_index; + + /* certainly swapping if the fib doesn't exist */ + server->server_fib_index = + fib_table_find_or_create_and_lock(proto, server_table_id); + fib_table_unlock (tmp_index, proto); + } + } + + server->dhcp_server = *addr; + server->dhcp_src_address = *src_address; + + return (new); +} + +typedef struct dhcp4_proxy_dump_walk_ctx_t_ +{ + fib_protocol_t proto; + void *opaque; + u32 context; +} dhcp_proxy_dump_walk_cxt_t; + +static int +dhcp_proxy_dump_walk (dhcp_server_t *server, + void *arg) +{ + dhcp_proxy_dump_walk_cxt_t *ctx = arg; + fib_table_t *s_fib, *r_fib; + dhcp_vss_t *v; + + v = dhcp_get_vss_info(&dhcp_proxy_main, + server->rx_fib_index, + ctx->proto); + + s_fib = fib_table_get(server->server_fib_index, ctx->proto); + r_fib = fib_table_get(server->rx_fib_index, ctx->proto); + + dhcp_send_details(ctx->proto, + ctx->opaque, + ctx->context, + &server->dhcp_server, + &server->dhcp_src_address, + s_fib->ft_table_id, + r_fib->ft_table_id, + (v ? v->fib_id : 0), + (v ? v->oui : 0)); + + return (1); +} + +void +dhcp_proxy_dump (fib_protocol_t proto, + void *opaque, + u32 context) +{ + dhcp_proxy_dump_walk_cxt_t ctx = { + .proto = proto, + .opaque = opaque, + .context = context, + }; + dhcp_proxy_walk(proto, dhcp_proxy_dump_walk, &ctx); +} + +int +dhcp_vss_show_walk (dhcp_vss_t *vss, + u32 rx_table_id, + void *ctx) +{ + vlib_main_t * vm = ctx; + + vlib_cli_output (vm, "%=6d%=6d%=12d", + rx_table_id, + vss->oui, + vss->fib_id); + + return (1); +} + +int dhcp_proxy_set_vss (fib_protocol_t proto, + u32 tbl_id, + u32 oui, + u32 fib_id, + int is_del) +{ + dhcp_proxy_main_t *dm = &dhcp_proxy_main; + dhcp_vss_t *v = NULL; + u32 rx_fib_index; + int rc = 0; + + rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id); + v = dhcp_get_vss_info(dm, rx_fib_index, proto); + + if (NULL != v) + { + if (is_del) + { + /* release the lock held on the table when the VSS + * info was created */ + fib_table_unlock (rx_fib_index, proto); + + pool_put (dm->vss[proto], v); + dm->vss_index_by_rx_fib_index[proto][rx_fib_index] = ~0; + } + else + { + /* this is a modify */ + v->fib_id = fib_id; + v->oui = oui; + } + } + else + { + if (is_del) + rc = VNET_API_ERROR_NO_SUCH_ENTRY; + else + { + /* create a new entry */ + vec_validate_init_empty(dm->vss_index_by_rx_fib_index[proto], + rx_fib_index, ~0); + + /* hold a lock on the table whilst the VSS info exist */ + fib_table_lock (rx_fib_index, proto); + + pool_get (dm->vss[proto], v); + v->fib_id = fib_id; + v->oui = oui; + dm->vss_index_by_rx_fib_index[proto][rx_fib_index] = + v - dm->vss[proto]; + } + } + + /* Release the lock taken during the create_or_lock at the start */ + fib_table_unlock (rx_fib_index, proto); + + return (rc); +} diff --git a/src/vnet/dhcp/dhcp_proxy.h b/src/vnet/dhcp/dhcp_proxy.h new file mode 100644 index 00000000..c0d79c41 --- /dev/null +++ b/src/vnet/dhcp/dhcp_proxy.h @@ -0,0 +1,248 @@ +/* + * dhcp_proxy.h: DHCP v4 & v6 proxy common functions/types + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_dhcp_proxy_h +#define included_dhcp_proxy_h + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef enum { +#define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n, +#include +#undef dhcp_proxy_error + DHCP_PROXY_N_ERROR, +} dhcp_proxy_error_t; + +typedef enum { +#define dhcpv6_proxy_error(n,s) DHCPV6_PROXY_ERROR_##n, +#include +#undef dhcpv6_proxy_error + DHCPV6_PROXY_N_ERROR, +} dhcpv6_proxy_error_t; + + +/** + * @brief The Virtual Sub-net Selection information for a given RX FIB + */ +typedef struct dhcp_vss_t_ { + /** + * @brief ?? RFC doesn't say + */ + u32 oui; + /** + * @brief VPN-ID + */ + u32 fib_id; +} dhcp_vss_t; + +/** + * @brief A DHCP proxy server represenation + */ +typedef struct dhcp_server_t_ { + /** + * @brief The address of the DHCP server to which to relay the client's + * messages + */ + ip46_address_t dhcp_server; + + /** + * @brief The source address to use in relayed messaes + */ + ip46_address_t dhcp_src_address; + + /** + * @brief The FIB index (not the external Table-ID) in which the server + * is reachable. + */ + u32 server_fib_index; + + /** + * @brief The FIB index (not the external Table-ID) in which the client + * is resides. + */ + u32 rx_fib_index; +} dhcp_server_t; + +#define DHCP_N_PROTOS (FIB_PROTOCOL_IP6 + 1) + +/** + * @brief Collection of global DHCP proxy data + */ +typedef struct { + /* Pool of DHCP servers */ + dhcp_server_t *dhcp_servers[DHCP_N_PROTOS]; + + /* Pool of selected DHCP server. Zero is the default server */ + u32 * dhcp_server_index_by_rx_fib_index[DHCP_N_PROTOS]; + + /* to drop pkts in server-to-client direction */ + u32 error_drop_node_index; + + dhcp_vss_t *vss[DHCP_N_PROTOS]; + + /* hash lookup specific vrf_id -> option 82 vss suboption */ + u32 *vss_index_by_rx_fib_index[DHCP_N_PROTOS]; + +} dhcp_proxy_main_t; + +extern dhcp_proxy_main_t dhcp_proxy_main; + +/** + * @brief Send the details of a proxy session to the API client during a dump + */ +void dhcp_send_details (fib_protocol_t proto, + void *opaque, + u32 context, + const ip46_address_t *server, + const ip46_address_t *src, + u32 server_fib_id, + u32 rx_fib_id, + u32 vss_fib_id, + u32 vss_oui); + +/** + * @brief Show (on CLI) a VSS config during a show walk + */ +int dhcp_vss_show_walk (dhcp_vss_t *vss, + u32 rx_table_id, + void *ctx); + +/** + * @brief Configure/set a new VSS info + */ +int dhcp_proxy_set_vss(fib_protocol_t proto, + u32 vrf_id, + u32 oui, + u32 fib_id, + int is_del); + +/** + * @brief Dump the proxy configs to the API + */ +void dhcp_proxy_dump(fib_protocol_t proto, + void *opaque, + u32 context); + +/** + * @brief Add a new DHCP proxy server configuration. + * @return 1 is the config is new, + * 0 otherwise (implying a modify of an existing) + */ +int dhcp_proxy_server_add(fib_protocol_t proto, + ip46_address_t *addr, + ip46_address_t *src_address, + u32 rx_fib_iindex, + u32 server_table_id); + +/** + * @brief Delete a DHCP proxy config + * @return 0 is deleted, otherwise an error code + */ +int dhcp_proxy_server_del(fib_protocol_t proto, + u32 rx_fib_index); + +/** + * @brief Callback function invoked for each DHCP proxy entry + * return 0 to break the walk, non-zero otherwise. + */ +typedef int (*dhcp_proxy_walk_fn_t)(dhcp_server_t *server, + void *ctx); + +/** + * @brief Walk/Visit each DHCP proxy server + */ +void dhcp_proxy_walk(fib_protocol_t proto, + dhcp_proxy_walk_fn_t fn, + void *ctx); + +/** + * @brief Callback function invoked for each DHCP VSS entry + * return 0 to break the walk, non-zero otherwise. + */ +typedef int (*dhcp_vss_walk_fn_t)(dhcp_vss_t *server, + u32 rx_table_id, + void *ctx); + +/** + * @brief Walk/Visit each DHCP proxy VSS + */ +void dhcp_vss_walk(fib_protocol_t proto, + dhcp_vss_walk_fn_t fn, + void *ctx); + +/** + * @brief Get the VSS data for the FIB index + */ +static inline dhcp_vss_t * +dhcp_get_vss_info (dhcp_proxy_main_t *dm, + u32 rx_fib_index, + fib_protocol_t proto) +{ + dhcp_vss_t *v = NULL; + + if (vec_len(dm->vss_index_by_rx_fib_index[proto]) > rx_fib_index && + dm->vss_index_by_rx_fib_index[proto][rx_fib_index] != ~0) + { + v = pool_elt_at_index ( + dm->vss[proto], + dm->vss_index_by_rx_fib_index[proto][rx_fib_index]); + } + + return (v); +} + +/** + * @brief Get the DHCP proxy server data for the FIB index + */ +static inline dhcp_server_t * +dhcp_get_server (dhcp_proxy_main_t *dm, + u32 rx_fib_index, + fib_protocol_t proto) +{ + dhcp_server_t *s = NULL; + + if (vec_len(dm->dhcp_server_index_by_rx_fib_index[proto]) > rx_fib_index && + dm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index] != ~0) + { + s = pool_elt_at_index ( + dm->dhcp_servers[proto], + dm->dhcp_server_index_by_rx_fib_index[proto][rx_fib_index]); + } + + return (s); +} + +int dhcp6_proxy_set_server (ip46_address_t *addr, + ip46_address_t *src_addr, + u32 rx_table_id, + u32 server_table_id, + int is_del); +int dhcp4_proxy_set_server (ip46_address_t *addr, + ip46_address_t *src_addr, + u32 rx_table_id, + u32 server_table_id, + int is_del); + +#endif /* included_dhcp_proxy_h */ diff --git a/src/vnet/dhcp/packet.h b/src/vnet/dhcp/packet.h deleted file mode 100644 index 267a8eaf..00000000 --- a/src/vnet/dhcp/packet.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef included_vnet_dhcp_packet_h -#define included_vnet_dhcp_packet_h - -/* - * DHCP packet format - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -typedef struct { - u8 opcode; /* 1 = request, 2 = reply */ - u8 hardware_type; /* 1 = ethernet */ - u8 hardware_address_length; - u8 hops; - u32 transaction_identifier; - u16 seconds; - u16 flags; -#define DHCP_FLAG_BROADCAST (1<<15) - ip4_address_t client_ip_address; - ip4_address_t your_ip_address; /* use this one */ - ip4_address_t server_ip_address; - ip4_address_t gateway_ip_address; /* use option 3, not this one */ - u8 client_hardware_address[16]; - u8 server_name[64]; - u8 boot_filename[128]; - ip4_address_t magic_cookie; - u8 options[0]; -} dhcp_header_t; - -typedef struct { - u8 option; - u8 length; - union { - u8 data[0]; - u32 data_as_u32[0]; - }; -} __attribute__((packed)) dhcp_option_t; - -typedef enum { - DHCP_PACKET_DISCOVER=1, - DHCP_PACKET_OFFER, - DHCP_PACKET_REQUEST, - DHCP_PACKET_ACK=5, -} dhcp_packet_type_t; - -/* charming antique: 99.130.83.99 is the dhcp magic cookie */ -#define DHCP_MAGIC (clib_host_to_net_u32(0x63825363)) - -#endif /* included_vnet_dhcp_packet_h */ diff --git a/src/vnet/dhcp/proxy.h b/src/vnet/dhcp/proxy.h deleted file mode 100644 index 4b115c74..00000000 --- a/src/vnet/dhcp/proxy.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * proxy.h: dhcp proxy - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_dhcp_proxy_h -#define included_dhcp_proxy_h - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef enum { -#define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n, -#include -#undef dhcp_proxy_error - DHCP_PROXY_N_ERROR, -} dhcp_proxy_error_t; - -typedef struct { - u32 oui; - u32 fib_id; -} vss_id; - -typedef union { - u8 as_u8[8]; - vss_id vpn_id; -} vss_info; - -typedef struct { - ip4_address_t dhcp_server; - ip4_address_t dhcp_src_address; - u32 server_fib_index; -} dhcp_server_t; - -typedef struct { - /* Pool of DHCP servers */ - dhcp_server_t * dhcp_servers; - - /* Pool of selected DHCP server. Zero is the default server */ - u32 * dhcp_server_index_by_rx_fib_index; - - /* to drop pkts in server-to-client direction */ - u32 error_drop_node_index; - - vss_info *vss; - - /* hash lookup specific vrf_id -> option 82 vss suboption */ - u32 *vss_index_by_rx_fib_index; - - /* convenience */ - vlib_main_t * vlib_main; - vnet_main_t * vnet_main; -} dhcp_proxy_main_t; - -extern dhcp_proxy_main_t dhcp_proxy_main; - -void dhcp_send_details (void *opaque, - u32 context, - const ip46_address_t *server, - const ip46_address_t *src, - u32 server_fib_id, - u32 rx_fib_id, - u32 vss_fib_id, - u32 vss_oui); - -int dhcp_proxy_set_server (ip4_address_t *addr, - ip4_address_t *src_address, - u32 fib_id, - u32 server_fib_id, - int is_del); - -int dhcp_proxy_set_option82_vss(u32 vrf_id, - u32 oui, - u32 fib_id, - int is_del); - -void dhcp_proxy_dump(void *opaque, - u32 context); - -#endif /* included_dhcp_proxy_h */ diff --git a/src/vnet/dhcp/proxy_error.def b/src/vnet/dhcp/proxy_error.def deleted file mode 100644 index 6d790d73..00000000 --- a/src/vnet/dhcp/proxy_error.def +++ /dev/null @@ -1,31 +0,0 @@ -/* - * dhcp_proxy_error.def: dhcp proxy errors - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -dhcp_proxy_error (NONE, "no error") -dhcp_proxy_error (NO_SERVER, "no dhcp server configured") -dhcp_proxy_error (RELAY_TO_SERVER, "DHCP packets relayed to the server") -dhcp_proxy_error (RELAY_TO_CLIENT, "DHCP packets relayed to clients") -dhcp_proxy_error (OPTION_82_ERROR, "DHCP failed to insert option 82") -dhcp_proxy_error (NO_OPTION_82, "DHCP option 82 missing") -dhcp_proxy_error (BAD_OPTION_82_ITF, "Bad DHCP option 82 interface value") -dhcp_proxy_error (BAD_OPTION_82_ADDR, "Bad DHCP option 82 address value") -dhcp_proxy_error (BAD_FIB_ID, "DHCP option 82 fib-id to fib-index map failure") -dhcp_proxy_error (NO_INTERFACE_ADDRESS, "DHCP no interface address") -dhcp_proxy_error (OPTION_82_VSS_NOT_PROCESSED, "DHCP VSS not processed by DHCP server") -dhcp_proxy_error (BAD_YIADDR, "DHCP packets with bad your_ip_address fields") -dhcp_proxy_error (BAD_SVR_FIB_OR_ADDRESS, "DHCP packets not from DHCP server or server FIB.") -dhcp_proxy_error (PKT_TOO_BIG, "DHCP packets which are too big.") diff --git a/src/vnet/dhcp/proxy_node.c b/src/vnet/dhcp/proxy_node.c deleted file mode 100644 index ab6819fe..00000000 --- a/src/vnet/dhcp/proxy_node.c +++ /dev/null @@ -1,1192 +0,0 @@ -/* - * proxy_node.c: dhcp proxy node processing - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -static char * dhcp_proxy_error_strings[] = { -#define dhcp_proxy_error(n,s) s, -#include "proxy_error.def" -#undef dhcp_proxy_error -}; - -#define foreach_dhcp_proxy_to_server_input_next \ - _ (DROP, "error-drop") \ - _ (LOOKUP, "ip4-lookup") \ - _ (SEND_TO_CLIENT, "dhcp-proxy-to-client") - -typedef enum { -#define _(s,n) DHCP_PROXY_TO_SERVER_INPUT_NEXT_##s, - foreach_dhcp_proxy_to_server_input_next -#undef _ - DHCP_PROXY_TO_SERVER_INPUT_N_NEXT, -} dhcp_proxy_to_server_input_next_t; - -typedef struct { - /* 0 => to server, 1 => to client */ - int which; - ip4_address_t trace_ip4_address; - u32 error; - u32 sw_if_index; - u32 original_sw_if_index; -} dhcp_proxy_trace_t; - -#define VPP_DHCP_OPTION82_SUB1_SIZE 6 -#define VPP_DHCP_OPTION82_SUB5_SIZE 6 -#define VPP_DHCP_OPTION82_VSS_SIZE 12 -#define VPP_DHCP_OPTION82_SIZE (VPP_DHCP_OPTION82_SUB1_SIZE + \ - VPP_DHCP_OPTION82_SUB5_SIZE + \ - VPP_DHCP_OPTION82_VSS_SIZE +3) - -vlib_node_registration_t dhcp_proxy_to_server_node; -vlib_node_registration_t dhcp_proxy_to_client_node; - -dhcp_proxy_main_t dhcp_proxy_main; - -u8 * format_dhcp_proxy_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - dhcp_proxy_trace_t * t = va_arg (*args, dhcp_proxy_trace_t *); - - if (t->which == 0) - s = format (s, "DHCP proxy: sent to server %U\n", - format_ip4_address, &t->trace_ip4_address, t->error); - else - s = format (s, "DHCP proxy: broadcast to client from %U\n", - format_ip4_address, &t->trace_ip4_address); - - if (t->error != (u32)~0) - s = format (s, " error: %s\n", dhcp_proxy_error_strings[t->error]); - - s = format (s, " original_sw_if_index: %d, sw_if_index: %d\n", - t->original_sw_if_index, t->sw_if_index); - - return s; -} - -u8 * format_dhcp_proxy_header_with_length (u8 * s, va_list * args) -{ - dhcp_header_t * h = va_arg (*args, dhcp_header_t *); - u32 max_header_bytes = va_arg (*args, u32); - u32 header_bytes; - - header_bytes = sizeof (h[0]); - if (max_header_bytes != 0 && header_bytes > max_header_bytes) - return format (s, "dhcp header truncated"); - - s = format (s, "DHCP Proxy"); - - return s; -} - -static inline vss_info * -dhcp_get_vss_info (dhcp_proxy_main_t *dm, - u32 rx_fib_index) -{ - vss_info *v; - - if (vec_len(dm->vss_index_by_rx_fib_index) <= rx_fib_index || - dm->vss_index_by_rx_fib_index[rx_fib_index] == ~0) - { - v = NULL; - } - else - { - v = pool_elt_at_index (dm->vss, - dm->vss_index_by_rx_fib_index[rx_fib_index]); - } - - return (v); -} - -static inline dhcp_server_t * -dhcp_get_server (dhcp_proxy_main_t *dm, - u32 rx_fib_index) -{ - dhcp_server_t *s = NULL; - - if (vec_len(dm->dhcp_server_index_by_rx_fib_index) > rx_fib_index && - dm->dhcp_server_index_by_rx_fib_index[rx_fib_index] != ~0) - { - s = pool_elt_at_index (dm->dhcp_servers, - dm->dhcp_server_index_by_rx_fib_index[rx_fib_index]); - } - - return (s); -} - -static uword -dhcp_proxy_to_server_input (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, * from, * to_next; - dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - u32 pkts_to_server=0, pkts_to_client=0, pkts_no_server=0; - u32 pkts_no_interface_address=0; - u32 pkts_too_big=0; - ip4_main_t * im = &ip4_main; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t * b0; - udp_header_t * u0; - dhcp_header_t * h0; - ip4_header_t * ip0; - u32 next0; - u32 old0, new0; - ip_csum_t sum0; - u32 error0 = (u32) ~0; - u32 sw_if_index = 0; - u32 original_sw_if_index = 0; - u8 *end = NULL; - u32 fib_index; - dhcp_server_t * server; - u32 rx_sw_if_index; - dhcp_option_t *o; - u32 len = 0; - vlib_buffer_free_list_t *fl; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - h0 = vlib_buffer_get_current (b0); - - /* - * udp_local hands us the DHCP header, need udp hdr, - * ip hdr to relay to server - */ - vlib_buffer_advance (b0, -(sizeof(*u0))); - u0 = vlib_buffer_get_current (b0); - - /* This blows. Return traffic has src_port = 67, dst_port = 67 */ - if (u0->src_port == clib_net_to_host_u16(UDP_DST_PORT_dhcp_to_server)) - { - vlib_buffer_advance (b0, sizeof(*u0)); - next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_SEND_TO_CLIENT; - error0 = 0; - pkts_to_client++; - goto do_enqueue; - } - - rx_sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - - fib_index = im->fib_index_by_sw_if_index [rx_sw_if_index]; - server = dhcp_get_server(dpm, fib_index); - - if (PREDICT_FALSE (NULL == server)) - { - error0 = DHCP_PROXY_ERROR_NO_SERVER; - next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_server++; - goto do_trace; - } - - vlib_buffer_advance (b0, -(sizeof(*ip0))); - ip0 = vlib_buffer_get_current (b0); - - /* disable UDP checksum */ - u0->checksum = 0; - sum0 = ip0->checksum; - old0 = ip0->dst_address.as_u32; - new0 = server->dhcp_server.as_u32; - ip0->dst_address.as_u32 = server->dhcp_server.as_u32; - sum0 = ip_csum_update (sum0, old0, new0, - ip4_header_t /* structure */, - dst_address /* changed member */); - ip0->checksum = ip_csum_fold (sum0); - - sum0 = ip0->checksum; - old0 = ip0->src_address.as_u32; - new0 = server->dhcp_src_address.as_u32; - ip0->src_address.as_u32 = new0; - sum0 = ip_csum_update (sum0, old0, new0, - ip4_header_t /* structure */, - src_address /* changed member */); - ip0->checksum = ip_csum_fold (sum0); - - /* Send to DHCP server via the configured FIB */ - vnet_buffer(b0)->sw_if_index[VLIB_TX] = - server->server_fib_index; - - h0->gateway_ip_address.as_u32 = server->dhcp_src_address.as_u32; - pkts_to_server++; - - o = (dhcp_option_t *) h0->options; - - fib_index = im->fib_index_by_sw_if_index - [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; - - end = b0->data + b0->current_data + b0->current_length; - /* TLVs are not performance-friendly... */ - while (o->option != 0xFF /* end of options */ && (u8 *)o < end) - o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); - - fl = vlib_buffer_get_free_list (vm, b0->free_list_index); - // start write at (option*)o, some packets have padding - if (((u8 *)o - (u8 *)b0->data + VPP_DHCP_OPTION82_SIZE) > fl->n_data_bytes) - { - next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_too_big++; - goto do_trace; - } - - if ((o->option == 0xFF) && ((u8 *)o <= end)) - { - vnet_main_t *vnm = vnet_get_main(); - u16 old_l0, new_l0; - ip4_address_t _ia0, * ia0 = &_ia0; - vss_info *vss; - vnet_sw_interface_t *swif; - sw_if_index = 0; - original_sw_if_index = 0; - - original_sw_if_index = sw_if_index = - vnet_buffer(b0)->sw_if_index[VLIB_RX]; - swif = vnet_get_sw_interface (vnm, sw_if_index); - if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) - sw_if_index = swif->unnumbered_sw_if_index; - - /* - * Get the first ip4 address on the [client-side] - * RX interface, if not unnumbered. otherwise use - * the loopback interface's ip address. - */ - ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0); - - if (ia0 == 0) - { - error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS; - next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_interface_address++; - goto do_trace; - } - - /* Add option 82 */ - o->option = 82; /* option 82 */ - o->length = 12; /* 12 octets to follow */ - o->data[0] = 1; /* suboption 1, circuit ID (=FIB id) */ - o->data[1] = 4; /* length of suboption */ - o->data[2] = (original_sw_if_index >> 24) & 0xFF; - o->data[3] = (original_sw_if_index >> 16) & 0xFF; - o->data[4] = (original_sw_if_index >> 8) & 0xFF; - o->data[5] = (original_sw_if_index >> 0) & 0xFF; - o->data[6] = 5; /* suboption 5 (client RX intfc address) */ - o->data[7] = 4; /* length 4 */ - o->data[8] = ia0->as_u8[0]; - o->data[9] = ia0->as_u8[1]; - o->data[10] = ia0->as_u8[2]; - o->data[11] = ia0->as_u8[3]; - o->data[12] = 0xFF; - - vss = dhcp_get_vss_info (dpm, fib_index); - if (NULL != vss) - { - u32 opt82_fib_id=0, opt82_oui=0; - - opt82_oui = vss->vpn_id.oui; - opt82_fib_id = vss->vpn_id.fib_id; - - o->data[12] = 151; /* vss suboption */ - if (255 == opt82_fib_id) { - o->data[13] = 1; /* length */ - o->data[14] = 255; /* vss option type */ - o->data[15] = 152; /* vss control suboption */ - o->data[16] = 0; /* length */ - /* and a new "end-of-options" option (0xff) */ - o->data[17] = 0xFF; - o->length += 5; - } else { - o->data[13] = 8; /* length */ - o->data[14] = 1; /* vss option type */ - o->data[15] = (opt82_oui >> 16) & 0xff; - o->data[16] = (opt82_oui >> 8) & 0xff; - o->data[17] = (opt82_oui ) & 0xff; - o->data[18] = (opt82_fib_id >> 24) & 0xff; - o->data[19] = (opt82_fib_id >> 16) & 0xff; - o->data[20] = (opt82_fib_id >> 8) & 0xff; - o->data[21] = (opt82_fib_id) & 0xff; - o->data[22] = 152; /* vss control suboption */ - o->data[23] = 0; /* length */ - - /* and a new "end-of-options" option (0xff) */ - o->data[24] = 0xFF; - o->length += 12; - } - } - - len = o->length + 3; - b0->current_length += len; - /* Fix IP header length and checksum */ - old_l0 = ip0->length; - new_l0 = clib_net_to_host_u16 (old_l0); - new_l0 += len; - new_l0 = clib_host_to_net_u16 (new_l0); - ip0->length = new_l0; - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */); - ip0->checksum = ip_csum_fold (sum0); - - /* Fix UDP length */ - new_l0 = clib_net_to_host_u16 (u0->length); - new_l0 += len; - u0->length = clib_host_to_net_u16 (new_l0); - } else { - vlib_node_increment_counter - (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_OPTION_82_ERROR, 1); - } - - next0 = DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; - - do_trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - dhcp_proxy_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->which = 0; /* to server */ - tr->error = error0; - tr->original_sw_if_index = original_sw_if_index; - tr->sw_if_index = sw_if_index; - if (next0 == DHCP_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP) - tr->trace_ip4_address.as_u32 = server->dhcp_server.as_u32; - } - - do_enqueue: - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_RELAY_TO_CLIENT, - pkts_to_client); - vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_RELAY_TO_SERVER, - pkts_to_server); - vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_NO_SERVER, - pkts_no_server); - vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS, - pkts_no_interface_address); - vlib_node_increment_counter (vm, dhcp_proxy_to_server_node.index, - DHCP_PROXY_ERROR_PKT_TOO_BIG, - pkts_too_big); - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dhcp_proxy_to_server_node) = { - .function = dhcp_proxy_to_server_input, - .name = "dhcp-proxy-to-server", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .n_errors = DHCP_PROXY_N_ERROR, - .error_strings = dhcp_proxy_error_strings, - - .n_next_nodes = DHCP_PROXY_TO_SERVER_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [DHCP_PROXY_TO_SERVER_INPUT_NEXT_##s] = n, - foreach_dhcp_proxy_to_server_input_next -#undef _ - }, - - .format_buffer = format_dhcp_proxy_header_with_length, - .format_trace = format_dhcp_proxy_trace, -#if 0 - .unformat_buffer = unformat_dhcp_proxy_header, -#endif -}; - -static uword -dhcp_proxy_to_client_input (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, * from; - ethernet_main_t *em = ethernet_get_main (vm); - dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - vnet_main_t * vnm = vnet_get_main(); - ip4_main_t * im = &ip4_main; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - while (n_left_from > 0) - { - u32 bi0; - vlib_buffer_t * b0; - udp_header_t * u0; - dhcp_header_t * h0; - ip4_header_t * ip0 = 0; - ip4_address_t * ia0 = 0; - u32 old0, new0; - ip_csum_t sum0; - ethernet_interface_t *ei0; - ethernet_header_t *mac0; - vnet_hw_interface_t *hi0; - vlib_frame_t *f0; - u32 * to_next0; - u32 sw_if_index = ~0; - vnet_sw_interface_t *si0; - u32 error0 = (u32)~0; - vnet_sw_interface_t *swif; - u32 fib_index; - dhcp_server_t * server; - u32 original_sw_if_index = (u32) ~0; - ip4_address_t relay_addr = { - .as_u32 = 0, - }; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - - b0 = vlib_get_buffer (vm, bi0); - h0 = vlib_buffer_get_current (b0); - - /* - * udp_local hands us the DHCP header, need udp hdr, - * ip hdr to relay to client - */ - vlib_buffer_advance (b0, -(sizeof(*u0))); - u0 = vlib_buffer_get_current (b0); - - vlib_buffer_advance (b0, -(sizeof(*ip0))); - ip0 = vlib_buffer_get_current (b0); - - /* Consumed by dhcp client code? */ - if (dhcp_client_for_us (bi0, b0, ip0, u0, h0)) - continue; - - if (1 /* dpm->insert_option_82 */) - { - dhcp_option_t *o = (dhcp_option_t *) h0->options; - dhcp_option_t *sub; - - /* Parse through TLVs looking for option 82. - The circuit-ID is the FIB number we need - to track down the client-facing interface */ - - while (o->option != 0xFF /* end of options */ && - (u8 *) o < (b0->data + b0->current_data + b0->current_length)) - { - if (o->option == 82) - { - u32 vss_exist = 0; - u32 vss_ctrl = 0; - sub = (dhcp_option_t *) &o->data[0]; - while (sub->option != 0xFF /* end of options */ && - (u8 *) sub < (u8 *)(o + o->length)) { - /* If this is one of ours, it will have - total length 12, circuit-id suboption type, - and the sw_if_index */ - if (sub->option == 1 && sub->length == 4) - { - sw_if_index = ((sub->data[0] << 24) | - (sub->data[1] << 16) | - (sub->data[2] << 8) | - (sub->data[3])); - } - else if (sub->option == 5 && sub->length == 4) - { - relay_addr.as_u8[0] = sub->data[0]; - relay_addr.as_u8[1] = sub->data[1]; - relay_addr.as_u8[2] = sub->data[2]; - relay_addr.as_u8[3] = sub->data[3]; - } - else if (sub->option == 151 && - sub->length == 7 && - sub->data[0] == 1) - vss_exist = 1; - else if (sub->option == 152 && sub->length == 0) - vss_ctrl = 1; - sub = (dhcp_option_t *) - (((uword) sub) + (sub->length + 2)); - } - if (vss_ctrl && vss_exist) - vlib_node_increment_counter - (vm, dhcp_proxy_to_client_node.index, - DHCP_PROXY_ERROR_OPTION_82_VSS_NOT_PROCESSED, 1); - - } - o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); - } - } - - if (sw_if_index == (u32)~0) - { - error0 = DHCP_PROXY_ERROR_NO_OPTION_82; - - drop_packet: - vlib_node_increment_counter (vm, dhcp_proxy_to_client_node.index, - error0, 1); - f0 = vlib_get_frame_to_node (vm, dpm->error_drop_node_index); - to_next0 = vlib_frame_vector_args (f0); - to_next0[0] = bi0; - f0->n_vectors = 1; - vlib_put_frame_to_node (vm, dpm->error_drop_node_index, f0); - goto do_trace; - } - - if (relay_addr.as_u32 == 0) - { - error0 = DHCP_PROXY_ERROR_BAD_OPTION_82_ADDR; - goto drop_packet; - } - - if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index)) - { - error0 = DHCP_PROXY_ERROR_BAD_OPTION_82_ITF; - goto drop_packet; - } - - fib_index = im->fib_index_by_sw_if_index [sw_if_index]; - server = dhcp_get_server(dpm, fib_index); - - if (PREDICT_FALSE (NULL == server)) - { - error0 = DHCP_PROXY_ERROR_NO_SERVER; - goto drop_packet; - } - - if (ip0->src_address.as_u32 != server->dhcp_server.as_u32) - { - error0 = DHCP_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; - goto drop_packet; - } - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index; - - swif = vnet_get_sw_interface (vnm, sw_if_index); - original_sw_if_index = sw_if_index; - if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) - sw_if_index = swif->unnumbered_sw_if_index; - - ia0 = ip4_interface_first_address (&ip4_main, sw_if_index, 0); - if (ia0 == 0) - { - error0 = DHCP_PROXY_ERROR_NO_INTERFACE_ADDRESS; - goto drop_packet; - } - - if (relay_addr.as_u32 != ia0->as_u32) - { - error0 = DHCP_PROXY_ERROR_BAD_YIADDR; - goto drop_packet; - } - - u0->checksum = 0; - u0->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcp_to_client); - sum0 = ip0->checksum; - old0 = ip0->dst_address.as_u32; - new0 = 0xFFFFFFFF; - ip0->dst_address.as_u32 = new0; - sum0 = ip_csum_update (sum0, old0, new0, - ip4_header_t /* structure */, - dst_address /* offset of changed member */); - ip0->checksum = ip_csum_fold (sum0); - - sum0 = ip0->checksum; - old0 = ip0->src_address.as_u32; - new0 = ia0->as_u32; - ip0->src_address.as_u32 = new0; - sum0 = ip_csum_update (sum0, old0, new0, - ip4_header_t /* structure */, - src_address /* offset of changed member */); - ip0->checksum = ip_csum_fold (sum0); - - vlib_buffer_advance (b0, -(sizeof(ethernet_header_t))); - si0 = vnet_get_sw_interface (vnm, original_sw_if_index); - if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) - vlib_buffer_advance (b0, -4 /* space for VLAN tag */); - - mac0 = vlib_buffer_get_current (b0); - - hi0 = vnet_get_sup_hw_interface (vnm, original_sw_if_index); - ei0 = pool_elt_at_index (em->interfaces, hi0->hw_instance); - clib_memcpy (mac0->src_address, ei0->address, sizeof (ei0->address)); - memset (mac0->dst_address, 0xff, sizeof (mac0->dst_address)); - mac0->type = (si0->type == VNET_SW_INTERFACE_TYPE_SUB) ? - clib_net_to_host_u16(0x8100) : clib_net_to_host_u16 (0x0800); - - if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) - { - u32 * vlan_tag = (u32 *)(mac0+1); - u32 tmp; - tmp = (si0->sub.id << 16) | 0x0800; - *vlan_tag = clib_host_to_net_u32 (tmp); - } - - /* $$$ This needs to be rewritten, for sure */ - f0 = vlib_get_frame_to_node (vm, hi0->output_node_index); - to_next0 = vlib_frame_vector_args (f0); - to_next0[0] = bi0; - f0->n_vectors = 1; - vlib_put_frame_to_node (vm, hi0->output_node_index, f0); - - do_trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - dhcp_proxy_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->which = 1; /* to client */ - tr->trace_ip4_address.as_u32 = ia0 ? ia0->as_u32 : 0; - tr->error = error0; - tr->original_sw_if_index = original_sw_if_index; - tr->sw_if_index = sw_if_index; - } - } - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dhcp_proxy_to_client_node) = { - .function = dhcp_proxy_to_client_input, - .name = "dhcp-proxy-to-client", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .n_errors = DHCP_PROXY_N_ERROR, - .error_strings = dhcp_proxy_error_strings, - .format_buffer = format_dhcp_proxy_header_with_length, - .format_trace = format_dhcp_proxy_trace, -#if 0 - .unformat_buffer = unformat_dhcp_proxy_header, -#endif -}; - -clib_error_t * dhcp_proxy_init (vlib_main_t * vm) -{ - dhcp_proxy_main_t * dm = &dhcp_proxy_main; - vlib_node_t * error_drop_node; - dhcp_server_t * server; - - dm->vlib_main = vm; - dm->vnet_main = vnet_get_main(); - error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); - dm->error_drop_node_index = error_drop_node->index; - - dm->vss_index_by_rx_fib_index = NULL; - - udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_client, - dhcp_proxy_to_client_node.index, 1 /* is_ip4 */); - - udp_register_dst_port (vm, UDP_DST_PORT_dhcp_to_server, - dhcp_proxy_to_server_node.index, 1 /* is_ip4 */); - - /* Create the default server, don't mark it valid */ - pool_get (dm->dhcp_servers, server); - memset (server, 0, sizeof (*server)); - - return 0; -} - -VLIB_INIT_FUNCTION (dhcp_proxy_init); - -int dhcp_proxy_set_server (ip4_address_t *addr, - ip4_address_t *src_address, - u32 rx_fib_id, - u32 server_fib_id, - int is_del) -{ - dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - dhcp_server_t * server = 0; - u32 server_index = 0; - u32 rx_fib_index = 0; - - const fib_prefix_t all_1s = - { - .fp_len = 32, - .fp_addr.ip4.as_u32 = 0xffffffff, - .fp_proto = FIB_PROTOCOL_IP4, - }; - - if (addr->as_u32 == 0) - return VNET_API_ERROR_INVALID_DST_ADDRESS; - - if (src_address->as_u32 == 0) - return VNET_API_ERROR_INVALID_SRC_ADDRESS; - - rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, - rx_fib_id); - - if (is_del) - { - if (rx_fib_index >= vec_len(dpm->dhcp_server_index_by_rx_fib_index)) - return VNET_API_ERROR_NO_SUCH_ENTRY; - - server_index = dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index]; - - if (server_index == ~0) - return VNET_API_ERROR_NO_SUCH_ENTRY; - - /* Use the default server again. */ - dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = ~0; - server = pool_elt_at_index (dpm->dhcp_servers, server_index); - - fib_table_entry_special_remove(rx_fib_index, - &all_1s, - FIB_SOURCE_DHCP); - fib_table_unlock (rx_fib_index, - FIB_PROTOCOL_IP4); - fib_table_unlock (server->server_fib_index, - FIB_PROTOCOL_IP4); - - memset (server, 0, sizeof (*server)); - pool_put (dpm->dhcp_servers, server); - return 0; - } - else - { - vec_validate_init_empty(dpm->dhcp_server_index_by_rx_fib_index, - rx_fib_index, - ~0); - - pool_get (dpm->dhcp_servers, server); - - server->dhcp_server.as_u32 = addr->as_u32; - server->dhcp_src_address.as_u32 = src_address->as_u32; - - fib_table_entry_special_add(rx_fib_index, - &all_1s, - FIB_SOURCE_DHCP, - FIB_ENTRY_FLAG_LOCAL, - ADJ_INDEX_INVALID); - fib_table_lock (rx_fib_index, - FIB_PROTOCOL_IP4); - - server->server_fib_index = - fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, - server_fib_id); - - vec_validate_init_empty (dpm->dhcp_server_index_by_rx_fib_index, - rx_fib_index, - ~0); - dpm->dhcp_server_index_by_rx_fib_index[rx_fib_index] = - server - dpm->dhcp_servers; - } - - fib_table_unlock (rx_fib_index, - FIB_PROTOCOL_IP4); - - return 0; -} - -static clib_error_t * -dhcp_proxy_set_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - ip4_address_t server_addr, src_addr; - u32 server_fib_id = 0, rx_fib_id = 0; - int is_del = 0; - int set_src = 0, set_server = 0; - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "server %U", - unformat_ip4_address, &server_addr)) - set_server = 1; - else if (unformat (input, "server-fib-id %d", &server_fib_id)) - ; - else if (unformat (input, "rx-fib-id %d", &rx_fib_id)) - ; - else if (unformat(input, "src-address %U", - unformat_ip4_address, &src_addr)) - set_src = 1; - else if (unformat (input, "delete") || - unformat (input, "del")) - is_del = 1; - else - break; - } - - if (is_del || (set_server && set_src)) - { - int rv; - - rv = dhcp_proxy_set_server (&server_addr, &src_addr, rx_fib_id, - server_fib_id, is_del); - switch (rv) - { - case 0: - return 0; - - case VNET_API_ERROR_INVALID_DST_ADDRESS: - return clib_error_return (0, "Invalid server address"); - - case VNET_API_ERROR_INVALID_SRC_ADDRESS: - return clib_error_return (0, "Invalid src address"); - - case VNET_API_ERROR_NO_SUCH_INNER_FIB: - return clib_error_return (0, "No such rx fib id %d", rx_fib_id); - - case VNET_API_ERROR_NO_SUCH_FIB: - return clib_error_return (0, "No such server fib id %d", - server_fib_id); - - case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return - (0, "Fib id %d: no per-fib DHCP server configured", rx_fib_id); - - default: - return clib_error_return (0, "BUG: rv %d", rv); - } - } - else - return clib_error_return (0, "parse error`%U'", - format_unformat_error, input); -} - -VLIB_CLI_COMMAND (dhcp_proxy_set_command, static) = { - .path = "set dhcp proxy", - .short_help = "set dhcp proxy [del] server src-address [server-fib-id ] [rx-fib-id ]", - .function = dhcp_proxy_set_command_fn, -}; - -u8 * format_dhcp_proxy_server (u8 * s, va_list * args) -{ - dhcp_proxy_main_t * dm = va_arg (*args, dhcp_proxy_main_t *); - dhcp_server_t * server = va_arg (*args, dhcp_server_t *); - u32 rx_fib_index = va_arg (*args, u32); - ip4_fib_t * rx_fib, * server_fib; - u32 server_fib_id = ~0, rx_fib_id = ~0; - - if (dm == 0) - { - s = format (s, "%=16s%=16s%=14s%=14s", "Server", "Src Address", - "Server FIB", "RX FIB"); - return s; - } - - server_fib = ip4_fib_get(server->server_fib_index); - - if (server_fib) - server_fib_id = server_fib->table_id; - - rx_fib = ip4_fib_get(rx_fib_index); - - if (rx_fib) - rx_fib_id = rx_fib->table_id; - - s = format (s, "%=16U%=16U%=14u%=14u", - format_ip4_address, &server->dhcp_server, - format_ip4_address, &server->dhcp_src_address, - server_fib_id, rx_fib_id); - return s; -} - -static clib_error_t * -dhcp_proxy_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - dhcp_server_t * server; - u32 server_index, i; - - vlib_cli_output (vm, "%U", format_dhcp_proxy_server, 0 /* header line */, - 0, 0); - - vec_foreach_index (i, dpm->dhcp_server_index_by_rx_fib_index) - { - server_index = dpm->dhcp_server_index_by_rx_fib_index[i]; - if (~0 == server_index) - continue; - - server = pool_elt_at_index (dpm->dhcp_servers, server_index); - - vlib_cli_output (vm, "%U", format_dhcp_proxy_server, dpm, - server, i); - } - - return 0; -} - -VLIB_CLI_COMMAND (dhcp_proxy_show_command, static) = { - .path = "show dhcp proxy", - .short_help = "Display dhcp proxy server info", - .function = dhcp_proxy_show_command_fn, -}; - -void -dhcp_proxy_dump (void *opaque, - u32 context) -{ - dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - ip4_fib_t *s_fib, *r_fib; - dhcp_server_t * server; - u32 server_index, i; - vss_info *v; - - vec_foreach_index (i, dpm->dhcp_server_index_by_rx_fib_index) - { - server_index = dpm->dhcp_server_index_by_rx_fib_index[i]; - if (~0 == server_index) - continue; - - server = pool_elt_at_index (dpm->dhcp_servers, server_index); - v = dhcp_get_vss_info(dpm, i); - - ip46_address_t src_addr = { - .ip4 = server->dhcp_src_address, - }; - ip46_address_t server_addr = { - .ip4 = server->dhcp_server, - }; - - s_fib = ip4_fib_get(server->server_fib_index); - r_fib = ip4_fib_get(i); - - dhcp_send_details(opaque, - context, - &server_addr, - &src_addr, - s_fib->table_id, - r_fib->table_id, - (v ? v->vpn_id.fib_id : 0), - (v ? v->vpn_id.oui : 0)); - } -} - -int dhcp_proxy_set_option82_vss(u32 tbl_id, - u32 oui, - u32 fib_id, - int is_del) -{ - dhcp_proxy_main_t *dm = &dhcp_proxy_main; - vss_info *v = NULL; - u32 rx_fib_index; - int rc = 0; - - rx_fib_index = ip4_fib_table_find_or_create_and_lock(tbl_id); - v = dhcp_get_vss_info(dm, rx_fib_index); - - if (NULL != v) - { - if (is_del) - { - /* release the lock held on the table when the VSS - * info was created */ - fib_table_unlock (rx_fib_index, - FIB_PROTOCOL_IP4); - - pool_put (dm->vss, v); - dm->vss_index_by_rx_fib_index[rx_fib_index] = ~0; - } - else - { - /* this is a modify */ - v->vpn_id.fib_id = fib_id; - v->vpn_id.oui = oui; - } - } - else - { - if (is_del) - rc = VNET_API_ERROR_NO_SUCH_ENTRY; - else - { - /* create a new entry */ - vec_validate_init_empty(dm->vss_index_by_rx_fib_index, - rx_fib_index, ~0); - - /* hold a lock on the table whilst the VSS info exist */ - fib_table_lock (rx_fib_index, - FIB_PROTOCOL_IP4); - - pool_get (dm->vss, v); - v->vpn_id.fib_id = fib_id; - v->vpn_id.oui = oui; - dm->vss_index_by_rx_fib_index[rx_fib_index] = v - dm->vss; - } - } - - /* Release the lock taken during the create_or_lock at the start */ - fib_table_unlock (rx_fib_index, - FIB_PROTOCOL_IP4); - - return (rc); -} - -static clib_error_t * -dhcp_option_82_vss_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - int is_del = 0, got_new_vpn_id=0; - u32 oui=0, fib_id=0, tbl_id=~0; - - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) - { - - if (unformat(input, "delete") || unformat(input, "del")) - is_del = 1; - else if (unformat (input, "oui %d", &oui)) - got_new_vpn_id = 1; - else if (unformat (input, "vpn-id %d", &fib_id)) - got_new_vpn_id = 1; - else if (unformat (input, "table %d", &tbl_id)) - got_new_vpn_id = 1; - else - break; - } - if (tbl_id == ~0) - return clib_error_return (0, "no table ID specified."); - - if (is_del || got_new_vpn_id) - { - int rv; - rv = dhcp_proxy_set_option82_vss(tbl_id, oui, fib_id, is_del); - switch (rv) - { - case 0: - return 0; - - case VNET_API_ERROR_NO_SUCH_FIB: - return clib_error_return (0, "option 82 vss(oui:%d, vpn-id:%d) not found in table %d", - oui, fib_id, tbl_id); - - case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "option 82 vss for table %d not found in in pool.", - tbl_id); - default: - return clib_error_return (0, "BUG: rv %d", rv); - } - } - else - return clib_error_return (0, "parse error`%U'", - format_unformat_error, input); -} - -VLIB_CLI_COMMAND (dhcp_proxy_vss_command,static) = { - .path = "set dhcp option-82 vss", - .short_help = "set dhcp option-82 vss [del] table
oui vpn-id ", - .function = dhcp_option_82_vss_fn, -}; - - -static clib_error_t * -dhcp_vss_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) - -{ - dhcp_proxy_main_t * dm = &dhcp_proxy_main; - ip4_fib_t *fib; - u32 *fib_index; - vss_info *v; - - vlib_cli_output (vm, "%=9s%=11s%=12s","Table", "OUI", "VPN-ID"); - pool_foreach (fib_index, dm->vss_index_by_rx_fib_index, - ({ - fib = ip4_fib_get (*fib_index); - v = pool_elt_at_index (dm->vss, *fib_index); - - vlib_cli_output (vm, "%=6d%=6d%=12d", - fib->table_id, - v->vpn_id.oui, - v->vpn_id.fib_id); - })); - - return 0; -} - -VLIB_CLI_COMMAND (dhcp_proxy_vss_show_command, static) = { - .path = "show dhcp vss", - .short_help = "show dhcp VSS", - .function = dhcp_vss_show_command_fn, -}; - -static clib_error_t * -dhcp_option_82_address_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) - -{ - dhcp_proxy_main_t *dm = &dhcp_proxy_main; - vnet_main_t *vnm = vnet_get_main(); - u32 sw_if_index0=0, sw_if_index; - ip4_address_t *ia0; - vnet_sw_interface_t *swif; - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) - { - - if (unformat(input, "%U", - unformat_vnet_sw_interface, dm->vnet_main, &sw_if_index0)) - { - swif = vnet_get_sw_interface (vnm, sw_if_index0); - sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ? - swif->unnumbered_sw_if_index : sw_if_index0; - ia0 = ip4_interface_first_address(&ip4_main, sw_if_index, 0); - if (ia0) - { - vlib_cli_output (vm, "%=20s%=20s", "interface", - "source IP address"); - - vlib_cli_output (vm, "%=20U%=20U", - format_vnet_sw_if_index_name, - dm->vnet_main, sw_if_index0, - format_ip4_address, ia0); - } - else - vlib_cli_output (vm, "%=34s %=20U", - "No IPv4 address configured on", - format_vnet_sw_if_index_name, - dm->vnet_main, sw_if_index); - } - else - break; - } - - return 0; -} - -VLIB_CLI_COMMAND (dhcp_proxy_address_show_command,static) = { - .path = "show dhcp option-82-address interface", - .short_help = "show dhcp option-82-address interface ", - .function = dhcp_option_82_address_show_command_fn, -}; diff --git a/src/vnet/dhcpv6/packet.h b/src/vnet/dhcpv6/packet.h deleted file mode 100644 index 8634b5d8..00000000 --- a/src/vnet/dhcpv6/packet.h +++ /dev/null @@ -1,183 +0,0 @@ -#ifndef included_vnet_dhcp_packet_h -#define included_vnet_dhcp_packet_h - -/* - * DHCP packet format - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -// #define DHCP_VRF_NAME_MAX_LEN L3VM_MAX_NAME_STR_LEN -// #define DHCPV6_MAX_VRF_NAME_LEN L3VM_MAX_NAME_STR_LEN -#define DHCP_MAX_RELAY_ADDR 16 -#define PROTO_UDP 17 -#define DHCPV6_CLIENT_PORT 546 -#define DHCPV6_SERVER_PORT 547 -#define HOP_COUNT_LIMIT 32 -#define DHCPV6_CISCO_ENT_NUM 9 - -/* - * DHCPv6 message types - */ -typedef enum dhcpv6_msg_type_{ - DHCPV6_MSG_SOLICIT = 1, - DHCPV6_MSG_ADVERTISE = 2, - DHCPV6_MSG_REQUEST = 3, - DHCPV6_MSG_CONFIRM = 4, - DHCPV6_MSG_RENEW = 5, - DHCPV6_MSG_REBIND = 6, - DHCPV6_MSG_REPLY = 7, - DHCPV6_MSG_RELEASE = 8, - DHCPV6_MSG_DECLINE = 9, - DHCPV6_MSG_RECONFIGURE = 10, - DHCPV6_MSG_INFORMATION_REQUEST = 11, - DHCPV6_MSG_RELAY_FORW = 12, - DHCPV6_MSG_RELAY_REPL = 13, -} dhcpv6_msg_type_t; - -/* - * DHCPv6 options types - */ -enum { - DHCPV6_OPTION_CLIENTID = 1, - DHCPV6_OPTION_SERVERID = 2, - DHCPV6_OPTION_IA_NA = 3, - DHCPV6_OPTION_IA_TA = 4, - DHCPV6_OPTION_IAADDR = 5, - DHCPV6_OPTION_ORO = 6, - DHCPV6_OPTION_PREFERENCE = 7, - DHCPV6_OPTION_ELAPSED_TIME = 8, - DHCPV6_OPTION_RELAY_MSG = 9, - DHCPV6_OPTION_AUTH = 11, - DHCPV6_OPTION_UNICAST = 12, - DHCPV6_OPTION_STATUS_CODE = 13, - DHCPV6_OPTION_RAPID_COMMIT = 14, - DHCPV6_OPTION_USER_CLASS = 15, - DHCPV6_OPTION_VENDOR_CLASS = 16, - DHCPV6_OPTION_VENDOR_OPTS = 17, - DHCPV6_OPTION_INTERFACE_ID = 18, // relay agent fills this - DHCPV6_OPTION_RECONF_MSG = 19, - DHCPV6_OPTION_RECONF_ACCEPT = 20, - DHCPV6_OPTION_REMOTEID = 37, // relay agent fills this - DHCPV6_OPTION_VSS = 68, // relay agent fills this - DHCPV6_OPTION_CLIENT_LINK_LAYER_ADDRESS = 79, - DHCPV6_OPTION_MAX -}; - -/* -* DHCPv6 status codes - */ -enum { - DHCPV6_STATUS_SUCCESS = 0, - DHCPV6_STATUS_UNSPEC_FAIL = 1, - DHCPV6_STATUS_NOADDRS_AVAIL = 2, - DHCPV6_STATUS_NO_BINDING = 3, - DHCPV6_STATUS_NOT_ONLINK = 4, - DHCPV6_STATUS_USE_MULTICAST = 5, -}; - -/* - * DHCPv6 DUID types - */ -enum { - DHCPV6_DUID_LLT = 1, /* DUID Based on Link-layer Address Plus Time */ - DHCPV6_DUID_EN = 2, /* DUID Based on Enterprise Number */ - DHCPV6_DUID_LL = 3, /* DUID Based on Link-layer Address */ -}; - -//Structure for DHCPv6 payload from client -typedef struct dhcpv6_hdr_ { - union { - u8 msg_type; //DHCP msg type - u32 xid; // transaction id - }u; - u8 data[0]; -} dhcpv6_header_t; - - - -typedef CLIB_PACKED (struct dhcpv6_relay_ctx_ { - dhcpv6_header_t *pkt; - u32 pkt_len; - u32 dhcpv6_len; //DHCPv6 payload load -// if_ordinal iod; - u32 if_index; - u32 ctx_id; - char ctx_name[32+1]; - u8 dhcp_msg_type; -}) dhcpv6_relay_ctx_t; - -//Structure for DHCPv6 RELAY-FORWARD and DHCPv6 RELAY-REPLY pkts -typedef CLIB_PACKED (struct dhcpv6_relay_hdr_ { - u8 msg_type; - u8 hop_count; - ip6_address_t link_addr; - ip6_address_t peer_addr; - u8 data[0]; -}) dhcpv6_relay_hdr_t; - -typedef enum dhcp_stats_action_type_ { - DHCP_STATS_ACTION_FORWARDED=1, - DHCP_STATS_ACTION_RECEIVED, - DHCP_STATS_ACTION_DROPPED -} dhcp_stats_action_type_t; -//Generic counters for a packet -typedef struct dhcp_stats_counters_ { - u64 rx_pkts; //counter for received pkts - u64 tx_pkts; //counter for forwarded pkts - u64 drops; //counter for dropped pkts -} dhcp_stats_counters_t; - - -typedef enum dhcpv6_stats_drop_reason_ { - DHCPV6_RELAY_PKT_DROP_RELAYDISABLE = 1, - DHCPV6_RELAY_PKT_DROP_MAX_HOPS, - DHCPV6_RELAY_PKT_DROP_VALIDATION_FAIL, - DHCPV6_RELAY_PKT_DROP_UNKNOWN_OP_INTF, - DHCPV6_RELAY_PKT_DROP_BAD_CONTEXT, - DHCPV6_RELAY_PKT_DROP_OPT_INSERT_FAIL, - DHCPV6_RELAY_PKT_DROP_REPLY_FROM_CLIENT, -} dhcpv6_stats_drop_reason_t; - -typedef CLIB_PACKED (struct { - u16 option; - u16 length; - u8 data[0]; -}) dhcpv6_option_t; - -typedef CLIB_PACKED (struct { - dhcpv6_option_t opt; - u32 int_idx; -}) dhcpv6_int_id_t; - -typedef CLIB_PACKED (struct { - dhcpv6_option_t opt; - u8 data[8]; // data[0]:type, data[1..7]: VPN ID -}) dhcpv6_vss_t; - -typedef CLIB_PACKED (struct { - dhcpv6_option_t opt; - u32 ent_num; - u32 rmt_id; -}) dhcpv6_rmt_id_t; - -typedef CLIB_PACKED (struct { - dhcpv6_option_t opt; - u16 link_type; - u8 data[6]; // data[0]:data[5]: MAC address -}) dhcpv6_client_mac_t; - - -#endif /* included_vnet_dhcp_packet_h */ diff --git a/src/vnet/dhcpv6/proxy.h b/src/vnet/dhcpv6/proxy.h deleted file mode 100644 index 77ced361..00000000 --- a/src/vnet/dhcpv6/proxy.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * proxy.h: dhcp proxy - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_dhcpv6_proxy_h -#define included_dhcpv6_proxy_h - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef enum { -#define dhcpv6_proxy_error(n,s) DHCPV6_PROXY_ERROR_##n, -#include -#undef dhcpv6_proxy_error - DHCPV6_PROXY_N_ERROR, -} dhcpv6_proxy_error_t; - -typedef struct { - u32 oui; - u32 fib_id; -} dhcpv6_vss_id; - -typedef union { - u8 as_u8[8]; - dhcpv6_vss_id vpn_id; -} dhcpv6_vss_info; - -typedef struct { - ip6_address_t dhcp6_server; - ip6_address_t dhcp6_src_address; - u32 server_fib6_index; -} dhcpv6_server_t; - -typedef struct { - /* Pool of DHCP servers */ - dhcpv6_server_t * dhcp6_servers; - - /* Pool of selected DHCP server. Zero is the default server */ - u32 * dhcp6_server_index_by_rx_fib_index; - - /* all DHCP servers address */ - ip6_address_t all_dhcpv6_server_address; - ip6_address_t all_dhcpv6_server_relay_agent_address; - - /* to drop pkts in server-to-client direction */ - u32 error_drop_node_index; - - dhcpv6_vss_info *vss; - - /* hash lookup specific vrf_id -> VSS vector index*/ - u32 *vss_index_by_rx_fib_index; - - /* convenience */ - vlib_main_t * vlib_main; - vnet_main_t * vnet_main; -} dhcpv6_proxy_main_t; - -dhcpv6_proxy_main_t dhcpv6_proxy_main; - -int dhcpv6_proxy_set_vss(u32 tbl_id, - u32 oui, - u32 fib_id, - int is_del); - -int dhcpv6_proxy_set_server(ip6_address_t *addr, - ip6_address_t *src_address, - u32 rx_fib_id, - u32 server_fib_id, - int is_del); - -void dhcpv6_proxy_dump(void *opaque, - u32 context); - -#endif /* included_dhcpv6_proxy_h */ diff --git a/src/vnet/dhcpv6/proxy_error.def b/src/vnet/dhcpv6/proxy_error.def deleted file mode 100644 index 55fa7317..00000000 --- a/src/vnet/dhcpv6/proxy_error.def +++ /dev/null @@ -1,29 +0,0 @@ -/* - * dhcp_proxy_error.def: dhcp proxy errors - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -dhcpv6_proxy_error (NONE, "no error") -dhcpv6_proxy_error (NO_SERVER, "no dhcpv6 server configured") -dhcpv6_proxy_error (RELAY_TO_SERVER, "DHCPV6 packets relayed to the server") -dhcpv6_proxy_error (RELAY_TO_CLIENT, "DHCPV6 packets relayed to clients") -dhcpv6_proxy_error (NO_INTERFACE_ADDRESS, "DHCPV6 no interface address") -dhcpv6_proxy_error (WRONG_MESSAGE_TYPE, "DHCPV6 wrong message type.") -dhcpv6_proxy_error (NO_SRC_ADDRESS, "DHCPV6 no srouce IPv6 address configured.") -dhcpv6_proxy_error (NO_CIRCUIT_ID_OPTION, "DHCPv6 reply packets without circuit ID option") -dhcpv6_proxy_error (NO_RELAY_MESSAGE_OPTION, "DHCPv6 reply packets without relay message option") -dhcpv6_proxy_error (BAD_SVR_FIB_OR_ADDRESS, "DHCPv6 packets not from DHCPv6 server or server FIB.") -dhcpv6_proxy_error (PKT_TOO_BIG, "DHCPv6 packets which are too big.") -dhcpv6_proxy_error (WRONG_INTERFACE_ID_OPTION, "DHCPv6 reply to invalid interface.") diff --git a/src/vnet/dhcpv6/proxy_node.c b/src/vnet/dhcpv6/proxy_node.c deleted file mode 100644 index f40798e6..00000000 --- a/src/vnet/dhcpv6/proxy_node.c +++ /dev/null @@ -1,1280 +0,0 @@ -/* - * proxy_node.c: dhcpv6 proxy node processing - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -static char * dhcpv6_proxy_error_strings[] = { -#define dhcpv6_proxy_error(n,s) s, -#include "proxy_error.def" -#undef dhcpv6_proxy_error -}; - -#define foreach_dhcpv6_proxy_to_server_input_next \ - _ (DROP, "error-drop") \ - _ (LOOKUP, "ip6-lookup") \ - _ (SEND_TO_CLIENT, "dhcpv6-proxy-to-client") - - -typedef enum { -#define _(s,n) DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_##s, - foreach_dhcpv6_proxy_to_server_input_next -#undef _ - DHCPV6_PROXY_TO_SERVER_INPUT_N_NEXT, -} dhcpv6_proxy_to_server_input_next_t; - -typedef struct { - /* 0 => to server, 1 => to client */ - int which; - u8 packet_data[64]; - u32 error; - u32 sw_if_index; - u32 original_sw_if_index; -} dhcpv6_proxy_trace_t; - -vlib_node_registration_t dhcpv6_proxy_to_server_node; -vlib_node_registration_t dhcpv6_proxy_to_client_node; - - -u8 * format_dhcpv6_proxy_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - dhcpv6_proxy_trace_t * t = va_arg (*args, dhcpv6_proxy_trace_t *); - - if (t->which == 0) - s = format (s, "DHCPV6 proxy: sent to server %U", - format_ip6_address, &t->packet_data, sizeof (ip6_address_t)); - else - s = format (s, "DHCPV6 proxy: sent to client from %U", - format_ip6_address, &t->packet_data, sizeof (ip6_address_t)); - if (t->error != (u32)~0) - s = format (s, " error: %s\n", dhcpv6_proxy_error_strings[t->error]); - - s = format (s, " original_sw_if_index: %d, sw_if_index: %d\n", - t->original_sw_if_index, t->sw_if_index); - - return s; -} - -u8 * format_dhcpv6_proxy_header_with_length (u8 * s, va_list * args) -{ - dhcpv6_header_t * h = va_arg (*args, dhcpv6_header_t *); - u32 max_header_bytes = va_arg (*args, u32); - u32 header_bytes; - - header_bytes = sizeof (h[0]); - if (max_header_bytes != 0 && header_bytes > max_header_bytes) - return format (s, "dhcpv6 header truncated"); - - s = format (s, "DHCPV6 Proxy"); - - return s; -} -/* get first interface address */ -static ip6_address_t * -ip6_interface_first_global_or_site_address (ip6_main_t * im, u32 sw_if_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip_interface_address_t * ia = 0; - ip6_address_t * result = 0; - - foreach_ip_interface_address (lm, ia, sw_if_index, - 1 /* honor unnumbered */, - ({ - ip6_address_t * a = ip_interface_address_get_address (lm, ia); - if ((a->as_u8[0] & 0xe0) == 0x20 || - (a->as_u8[0] & 0xfe) == 0xfc) { - result = a; - break; - } - })); - return result; -} - -static inline void copy_ip6_address (ip6_address_t *dst, ip6_address_t *src) -{ - - dst->as_u64[0] = src->as_u64[0]; - dst->as_u64[1] = src->as_u64[1]; -} - -static inline dhcpv6_vss_info * -dhcpv6_get_vss_info (dhcpv6_proxy_main_t *dm, - u32 rx_fib_index) -{ - dhcpv6_vss_info *v; - - if (vec_len(dm->vss_index_by_rx_fib_index) <= rx_fib_index || - dm->vss_index_by_rx_fib_index[rx_fib_index] == ~0) - { - v = NULL; - } - else - { - v = pool_elt_at_index (dm->vss, - dm->vss_index_by_rx_fib_index[rx_fib_index]); - } - - return (v); -} - -static inline dhcpv6_server_t * -dhcpv6_get_server (dhcpv6_proxy_main_t *dm, - u32 rx_fib_index) -{ - dhcpv6_server_t *s = NULL; - - if (vec_len(dm->dhcp6_server_index_by_rx_fib_index) > rx_fib_index && - dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] != ~0) - { - s = pool_elt_at_index (dm->dhcp6_servers, - dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index]); - } - - return (s); -} - -static uword -dhcpv6_proxy_to_server_input (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, * from, * to_next; - dhcpv6_proxy_main_t * dpm = &dhcpv6_proxy_main; - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - u32 pkts_to_server=0, pkts_to_client=0, pkts_no_server=0; - u32 pkts_no_interface_address=0, pkts_no_exceeding_max_hop=0; - u32 pkts_no_src_address=0; - u32 pkts_wrong_msg_type=0; - u32 pkts_too_big=0; - ip6_main_t * im = &ip6_main; - ip6_address_t * src; - int bogus_length; - dhcpv6_server_t * server; - u32 rx_fib_idx = 0, server_fib_idx = 0; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - vnet_main_t *vnm = vnet_get_main(); - u32 sw_if_index = 0; - u32 rx_sw_if_index = 0; - vnet_sw_interface_t *swif; - u32 bi0; - vlib_buffer_t * b0; - udp_header_t * u0, *u1; - dhcpv6_header_t * h0; // client msg hdr - ip6_header_t * ip0, *ip1; - ip6_address_t _ia0, *ia0=&_ia0; - u32 next0; - u32 error0 = (u32) ~0; - dhcpv6_option_t *fwd_opt; - dhcpv6_relay_hdr_t *r1; - u16 len; - dhcpv6_int_id_t *id1; - dhcpv6_vss_t *vss1; - dhcpv6_client_mac_t *cmac; // client mac - ethernet_header_t * e_h0; - u8 client_src_mac[6]; - vlib_buffer_free_list_t *fl; - dhcpv6_vss_info *vss; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - h0 = vlib_buffer_get_current (b0); - - /* - * udp_local hands us the DHCPV6 header. - */ - u0 = (void *)h0 -(sizeof(*u0)); - ip0 = (void *)u0 -(sizeof(*ip0)); - e_h0 = (void *)ip0 - ethernet_buffer_header_size(b0); - - clib_memcpy(client_src_mac, e_h0->src_address, 6); - - switch (h0->u.msg_type) { - case DHCPV6_MSG_SOLICIT: - case DHCPV6_MSG_REQUEST: - case DHCPV6_MSG_CONFIRM: - case DHCPV6_MSG_RENEW: - case DHCPV6_MSG_REBIND: - case DHCPV6_MSG_RELEASE: - case DHCPV6_MSG_DECLINE: - case DHCPV6_MSG_INFORMATION_REQUEST: - case DHCPV6_MSG_RELAY_FORW: - /* send to server */ - break; - case DHCPV6_MSG_RELAY_REPL: - /* send to client */ - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_SEND_TO_CLIENT; - error0 = 0; - pkts_to_client++; - goto do_enqueue; - default: - /* drop the packet */ - pkts_wrong_msg_type++; - error0 = DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - goto do_trace; - - } - - /* Send to DHCPV6 server via the configured FIB */ - rx_sw_if_index = sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - rx_fib_idx = im->fib_index_by_sw_if_index [rx_sw_if_index]; - server = dhcpv6_get_server(dpm, rx_fib_idx); - - if (PREDICT_FALSE (NULL == server)) - { - error0 = DHCPV6_PROXY_ERROR_NO_SERVER; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_server++; - goto do_trace; - } - - server_fib_idx = server->server_fib6_index; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = server_fib_idx; - - - /* relay-option header pointer */ - vlib_buffer_advance(b0, -(sizeof(*fwd_opt))); - fwd_opt = vlib_buffer_get_current(b0); - /* relay message header pointer */ - vlib_buffer_advance(b0, -(sizeof(*r1))); - r1 = vlib_buffer_get_current(b0); - - vlib_buffer_advance(b0, -(sizeof(*u1))); - u1 = vlib_buffer_get_current(b0); - - vlib_buffer_advance(b0, -(sizeof(*ip1))); - ip1 = vlib_buffer_get_current(b0); - - /* fill in all that rubbish... */ - len = clib_net_to_host_u16(u0->length) - sizeof(udp_header_t); - copy_ip6_address(&r1->peer_addr, &ip0->src_address); - - r1->msg_type = DHCPV6_MSG_RELAY_FORW; - fwd_opt->length = clib_host_to_net_u16(len); - fwd_opt->option = clib_host_to_net_u16(DHCPV6_OPTION_RELAY_MSG); - - r1->hop_count++; - r1->hop_count = (h0->u.msg_type != DHCPV6_MSG_RELAY_FORW) ? 0 : r1->hop_count; - - if (PREDICT_FALSE(r1->hop_count >= HOP_COUNT_LIMIT)) - { - error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_exceeding_max_hop++; - goto do_trace; - } - - - /* If relay-fwd and src address is site or global unicast address */ - if (h0->u.msg_type == DHCPV6_MSG_RELAY_FORW && - ((ip0->src_address.as_u8[0] & 0xe0) == 0x20 || - (ip0->src_address.as_u8[0] & 0xfe) == 0xfc)) - { - /* Set link address to zero */ - r1->link_addr.as_u64[0] = 0; - r1->link_addr.as_u64[1] = 0; - goto link_address_set; - } - - /* if receiving interface is unnumbered, use receiving interface - * IP address as link address, otherwise use the loopback interface - * IP address as link address. - */ - - swif = vnet_get_sw_interface (vnm, rx_sw_if_index); - if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) - sw_if_index = swif->unnumbered_sw_if_index; - - ia0 = ip6_interface_first_global_or_site_address(&ip6_main, sw_if_index); - if (ia0 == 0) - { - error0 = DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_interface_address++; - goto do_trace; - } - - copy_ip6_address(&r1->link_addr, ia0); - - link_address_set: - fl = vlib_buffer_get_free_list (vm, b0->free_list_index); - - if ((b0->current_length+sizeof(*id1)+sizeof(*vss1)+sizeof(*cmac)) - > fl->n_data_bytes) - { - error0 = DHCPV6_PROXY_ERROR_PKT_TOO_BIG; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_too_big++; - goto do_trace; - } - - id1 = (dhcpv6_int_id_t *) (((uword) ip1) + b0->current_length); - b0->current_length += (sizeof (*id1)); - - id1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_INTERFACE_ID); - id1->opt.length = clib_host_to_net_u16(sizeof(rx_sw_if_index)); - id1->int_idx = clib_host_to_net_u32(rx_sw_if_index); - - u1->length =0; - if (h0->u.msg_type != DHCPV6_MSG_RELAY_FORW) - { - cmac = (dhcpv6_client_mac_t *) (((uword) ip1) + b0->current_length); - b0->current_length += (sizeof (*cmac)); - cmac->opt.length =clib_host_to_net_u16(sizeof(*cmac) - - sizeof(cmac->opt)); - cmac->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_CLIENT_LINK_LAYER_ADDRESS); - cmac->link_type = clib_host_to_net_u16(1); // ethernet - clib_memcpy(cmac->data, client_src_mac, 6); - u1->length += sizeof(*cmac); - } - - //TODO: Revisit if hash makes sense here - vss = dhcpv6_get_vss_info(dpm, rx_fib_idx); - - if (NULL != vss) { - vss1 = (dhcpv6_vss_t *) (((uword) ip1) + b0->current_length); - b0->current_length += (sizeof (*vss1)); - vss1->opt.length =clib_host_to_net_u16(sizeof(*vss1) - - sizeof(vss1->opt)); - vss1->opt.option = clib_host_to_net_u16(DHCPV6_OPTION_VSS); - vss1->data[0] = 1; // type - vss1->data[1] = vss->vpn_id.oui >>16 & 0xff; - vss1->data[2] = vss->vpn_id.oui >>8 & 0xff; - vss1->data[3] = vss->vpn_id.oui & 0xff; - vss1->data[4] = vss->vpn_id.fib_id >> 24 & 0xff; - vss1->data[5] = vss->vpn_id.fib_id >> 16 & 0xff; - vss1->data[6] = vss->vpn_id.fib_id >> 8 & 0xff; - vss1->data[7] = vss->vpn_id.fib_id & 0xff; - u1->length += sizeof(*vss1); - } - - pkts_to_server++; - u1->checksum = 0; - u1->src_port = clib_host_to_net_u16(UDP_DST_PORT_dhcpv6_to_client); - u1->dst_port = clib_host_to_net_u16(UDP_DST_PORT_dhcpv6_to_server); - - u1->length = - clib_host_to_net_u16( clib_net_to_host_u16(fwd_opt->length) + - sizeof(*r1) + sizeof(*fwd_opt) + - sizeof(*u1) + sizeof(*id1) + u1->length); - - memset(ip1, 0, sizeof(*ip1)); - ip1->ip_version_traffic_class_and_flow_label = 0x60; - ip1->payload_length = u1->length; - ip1->protocol = PROTO_UDP; - ip1->hop_limit = HOP_COUNT_LIMIT; - src = (server->dhcp6_server.as_u64[0] || server->dhcp6_server.as_u64[1]) ? - &server->dhcp6_server : &dpm->all_dhcpv6_server_address; - copy_ip6_address(&ip1->dst_address, src); - - - ia0 = ip6_interface_first_global_or_site_address - (&ip6_main, vnet_buffer(b0)->sw_if_index[VLIB_RX]); - - src = (server->dhcp6_src_address.as_u64[0] || server->dhcp6_src_address.as_u64[1]) ? - &server->dhcp6_src_address : ia0; - if (ia0 == 0) - { - error0 = DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS; - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP; - pkts_no_src_address++; - goto do_trace; - } - - copy_ip6_address (&ip1->src_address, src); - - - u1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, ip1, - &bogus_length); - ASSERT(bogus_length == 0); - - next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP; - - do_trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - dhcpv6_proxy_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->which = 0; /* to server */ - tr->error = error0; - tr->original_sw_if_index = rx_sw_if_index; - tr->sw_if_index = sw_if_index; - if (DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_LOOKUP == next0) - copy_ip6_address((ip6_address_t *)&tr->packet_data[0], &server->dhcp6_server); - } - - do_enqueue: - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, - DHCPV6_PROXY_ERROR_RELAY_TO_CLIENT, - pkts_to_client); - vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, - DHCPV6_PROXY_ERROR_RELAY_TO_SERVER, - pkts_to_server); - vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, - DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS, - pkts_no_interface_address); - vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, - DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE, - pkts_wrong_msg_type); - vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, - DHCPV6_PROXY_ERROR_NO_SRC_ADDRESS, - pkts_no_src_address); - vlib_node_increment_counter (vm, dhcpv6_proxy_to_server_node.index, - DHCPV6_PROXY_ERROR_PKT_TOO_BIG, - pkts_too_big); - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dhcpv6_proxy_to_server_node) = { - .function = dhcpv6_proxy_to_server_input, - .name = "dhcpv6-proxy-to-server", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .n_errors = DHCPV6_PROXY_N_ERROR, - .error_strings = dhcpv6_proxy_error_strings, - - .n_next_nodes = DHCPV6_PROXY_TO_SERVER_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_##s] = n, - foreach_dhcpv6_proxy_to_server_input_next -#undef _ - }, - - .format_buffer = format_dhcpv6_proxy_header_with_length, - .format_trace = format_dhcpv6_proxy_trace, -#if 0 - .unformat_buffer = unformat_dhcpv6_proxy_header, -#endif -}; - -static uword -dhcpv6_proxy_to_client_input (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - - u32 n_left_from, * from; - ethernet_main_t *em = ethernet_get_main (vm); - dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; - dhcpv6_server_t * server; - vnet_main_t * vnm = vnet_get_main(); - int bogus_length; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - while (n_left_from > 0) - { - u32 bi0; - vlib_buffer_t * b0; - udp_header_t * u0, *u1=0; - dhcpv6_relay_hdr_t * h0; - ip6_header_t * ip1 = 0, *ip0; - ip6_address_t _ia0, * ia0 = &_ia0; - ip6_address_t client_address; - ethernet_interface_t *ei0; - ethernet_header_t *mac0; - vnet_hw_interface_t *hi0; - vlib_frame_t *f0; - u32 * to_next0; - u32 sw_if_index = ~0; - u32 original_sw_if_index = ~0; - vnet_sw_interface_t *si0; - u32 error0 = (u32)~0; - vnet_sw_interface_t *swif; - dhcpv6_option_t *r0 = 0, *o; - u16 len = 0; - u8 interface_opt_flag = 0; - u8 relay_msg_opt_flag = 0; - ip6_main_t * im = &ip6_main; - u32 server_fib_idx, client_fib_idx; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - - b0 = vlib_get_buffer (vm, bi0); - h0 = vlib_buffer_get_current (b0); - - if (DHCPV6_MSG_RELAY_REPL != h0->msg_type) - { - error0 = DHCPV6_PROXY_ERROR_WRONG_MESSAGE_TYPE; - - drop_packet: - vlib_node_increment_counter (vm, dhcpv6_proxy_to_client_node.index, - error0, 1); - - f0 = vlib_get_frame_to_node (vm, dm->error_drop_node_index); - to_next0 = vlib_frame_vector_args (f0); - to_next0[0] = bi0; - f0->n_vectors = 1; - vlib_put_frame_to_node (vm, dm->error_drop_node_index, f0); - goto do_trace; - } - /* hop count seems not need to be checked */ - if (HOP_COUNT_LIMIT < h0->hop_count) - { - error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS; - goto drop_packet; - } - u0 = (void *)h0 -(sizeof(*u0)); - ip0 = (void *)u0 -(sizeof(*ip0)); - - vlib_buffer_advance (b0, sizeof(*h0)); - o = vlib_buffer_get_current (b0); - - /* Parse through TLVs looking for option 18 (DHCPV6_OPTION_INTERFACE_ID) - _and_ option 9 (DHCPV6_OPTION_RELAY_MSG) option which must be there. - Currently assuming no other options need to be processed - The interface-ID is the FIB number we need - to track down the client-facing interface */ - - while ((u8 *) o < (b0->data + b0->current_data + b0->current_length)) - { - if (DHCPV6_OPTION_INTERFACE_ID == clib_net_to_host_u16(o->option)) - { - interface_opt_flag = 1; - if (clib_net_to_host_u16(o->length) == sizeof(sw_if_index)) - sw_if_index = clib_net_to_host_u32(((dhcpv6_int_id_t*)o)->int_idx); - if (sw_if_index >= vec_len (im->fib_index_by_sw_if_index)) - { - error0 = DHCPV6_PROXY_ERROR_WRONG_INTERFACE_ID_OPTION; - goto drop_packet; - } - } - if (DHCPV6_OPTION_RELAY_MSG == clib_net_to_host_u16(o->option)) - { - relay_msg_opt_flag = 1; - r0 = vlib_buffer_get_current (b0); - } - if ((relay_msg_opt_flag == 1) && (interface_opt_flag == 1)) - break; - vlib_buffer_advance (b0, sizeof(*o) + clib_net_to_host_u16(o->length)); - o = (dhcpv6_option_t *) (((uword) o) + clib_net_to_host_u16(o->length) + sizeof(*o)); - } - - if ((relay_msg_opt_flag == 0) || (r0 == 0)) - { - error0 = DHCPV6_PROXY_ERROR_NO_RELAY_MESSAGE_OPTION; - goto drop_packet; - } - - if ((u32)~0 == sw_if_index) - { - error0 = DHCPV6_PROXY_ERROR_NO_CIRCUIT_ID_OPTION; - goto drop_packet; - } - - //Advance buffer to start of encapsulated DHCPv6 message - vlib_buffer_advance (b0, sizeof(*r0)); - - client_fib_idx = im->fib_index_by_sw_if_index[sw_if_index]; - server = dhcpv6_get_server(dm, client_fib_idx); - - if (NULL == server) - { - error0 = DHCPV6_PROXY_ERROR_NO_SERVER; - goto drop_packet; - } - - server_fib_idx = im->fib_index_by_sw_if_index - [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; - - if (server_fib_idx != server->server_fib6_index || - ip0->src_address.as_u64[0] != server->dhcp6_server.as_u64[0] || - ip0->src_address.as_u64[1] != server->dhcp6_server.as_u64[1]) - { - //drop packet if not from server with configured address or FIB - error0 = DHCPV6_PROXY_ERROR_BAD_SVR_FIB_OR_ADDRESS; - goto drop_packet; - } - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = original_sw_if_index - = sw_if_index; - - swif = vnet_get_sw_interface (vnm, original_sw_if_index); - if (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) - sw_if_index = swif->unnumbered_sw_if_index; - - - /* - * udp_local hands us the DHCPV6 header, need udp hdr, - * ip hdr to relay to client - */ - vlib_buffer_advance (b0, -(sizeof(*u1))); - u1 = vlib_buffer_get_current (b0); - - vlib_buffer_advance (b0, -(sizeof(*ip1))); - ip1 = vlib_buffer_get_current (b0); - - copy_ip6_address(&client_address, &h0->peer_addr); - - ia0 = ip6_interface_first_address (&ip6_main, sw_if_index); - if (ia0 == 0) - { - error0 = DHCPV6_PROXY_ERROR_NO_INTERFACE_ADDRESS; - goto drop_packet; - } - - len = clib_net_to_host_u16(r0->length); - memset(ip1, 0, sizeof(*ip1)); - copy_ip6_address(&ip1->dst_address, &client_address); - u1->checksum = 0; - u1->src_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcpv6_to_server); - u1->dst_port = clib_net_to_host_u16 (UDP_DST_PORT_dhcpv6_to_client); - u1->length = clib_host_to_net_u16 (len + sizeof(udp_header_t)); - - ip1->ip_version_traffic_class_and_flow_label = - ip0->ip_version_traffic_class_and_flow_label & - 0x00000fff; - ip1->payload_length = u1->length; - ip1->protocol = PROTO_UDP; - ip1->hop_limit = HOP_COUNT_LIMIT; - copy_ip6_address(&ip1->src_address, ia0); - - u1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0, ip1, - &bogus_length); - ASSERT(bogus_length == 0); - - vlib_buffer_advance (b0, -(sizeof(ethernet_header_t))); - si0 = vnet_get_sw_interface (vnm, original_sw_if_index); - if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) - vlib_buffer_advance (b0, -4 /* space for VLAN tag */); - - mac0 = vlib_buffer_get_current (b0); - - hi0 = vnet_get_sup_hw_interface (vnm, original_sw_if_index); - ei0 = pool_elt_at_index (em->interfaces, hi0->hw_instance); - clib_memcpy (mac0->src_address, ei0->address, sizeof (ei0->address)); - memset (&mac0->dst_address, 0xff, sizeof (mac0->dst_address)); - mac0->type = (si0->type == VNET_SW_INTERFACE_TYPE_SUB) ? - clib_net_to_host_u16(0x8100) : clib_net_to_host_u16 (0x86dd); - - if (si0->type == VNET_SW_INTERFACE_TYPE_SUB) - { - u32 * vlan_tag = (u32 *)(mac0+1); - u32 tmp; - tmp = (si0->sub.id << 16) | 0x0800; - *vlan_tag = clib_host_to_net_u32 (tmp); - } - - /* $$$ consider adding a dynamic next to the graph node, for performance */ - f0 = vlib_get_frame_to_node (vm, hi0->output_node_index); - to_next0 = vlib_frame_vector_args (f0); - to_next0[0] = bi0; - f0->n_vectors = 1; - vlib_put_frame_to_node (vm, hi0->output_node_index, f0); - - do_trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - dhcpv6_proxy_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->which = 1; /* to client */ - if (ia0) - copy_ip6_address((ip6_address_t*)tr->packet_data, ia0); - tr->error = error0; - tr->original_sw_if_index = original_sw_if_index; - tr->sw_if_index = sw_if_index; - } - } - return from_frame->n_vectors; - -} - -VLIB_REGISTER_NODE (dhcpv6_proxy_to_client_node) = { - .function = dhcpv6_proxy_to_client_input, - .name = "dhcpv6-proxy-to-client", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .n_errors = DHCPV6_PROXY_N_ERROR, - .error_strings = dhcpv6_proxy_error_strings, - .format_buffer = format_dhcpv6_proxy_header_with_length, - .format_trace = format_dhcpv6_proxy_trace, -#if 0 - .unformat_buffer = unformat_dhcpv6_proxy_header, -#endif -}; - -clib_error_t * dhcpv6_proxy_init (vlib_main_t * vm) -{ - dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; - vlib_node_t * error_drop_node; - dhcpv6_server_t * server; - - dm->vlib_main = vm; - dm->vnet_main = vnet_get_main(); - error_drop_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); - dm->error_drop_node_index = error_drop_node->index; - - dm->vss_index_by_rx_fib_index = NULL; - - /* RFC says this is the dhcpv6 server address */ - dm->all_dhcpv6_server_address.as_u64[0] = clib_host_to_net_u64 (0xFF05000000000000); - dm->all_dhcpv6_server_address.as_u64[1] = clib_host_to_net_u64 (0x00010003); - - /* RFC says this is the server and agent address */ - dm->all_dhcpv6_server_relay_agent_address.as_u64[0] = clib_host_to_net_u64 (0xFF02000000000000); - dm->all_dhcpv6_server_relay_agent_address.as_u64[1] = clib_host_to_net_u64 (0x00010002); - - udp_register_dst_port (vm, UDP_DST_PORT_dhcpv6_to_client, - dhcpv6_proxy_to_client_node.index, 0 /* is_ip6 */); - - udp_register_dst_port (vm, UDP_DST_PORT_dhcpv6_to_server, - dhcpv6_proxy_to_server_node.index, 0 /* is_ip6 */); - - /* Create the default server, don't mark it valid */ - pool_get (dm->dhcp6_servers, server); - memset (server, 0, sizeof (*server)); - - return 0; -} - -VLIB_INIT_FUNCTION (dhcpv6_proxy_init); - -int dhcpv6_proxy_set_server (ip6_address_t *addr, - ip6_address_t *src_address, - u32 rx_fib_id, - u32 server_fib_id, - int is_del) -{ - dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; - dhcpv6_server_t * server = 0; - u32 rx_fib_index = 0; - int rc = 0; - - rx_fib_index = ip6_mfib_table_find_or_create_and_lock(rx_fib_id); - - const mfib_prefix_t all_dhcp_servers = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_grp_addr = { - .ip6 = dm->all_dhcpv6_server_relay_agent_address, - } - }; - - if (is_del) - { - server = dhcpv6_get_server(dm, rx_fib_index); - - if (NULL == server) - { - rc = VNET_API_ERROR_NO_SUCH_ENTRY; - goto out; - } - - /* - * release the locks held on the server fib and rx mfib - */ - mfib_table_entry_delete(rx_fib_index, - &all_dhcp_servers, - MFIB_SOURCE_DHCP); - mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); - fib_table_unlock(server->server_fib6_index, FIB_PROTOCOL_IP6); - - dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] = ~0; - - memset (server, 0, sizeof (*server)); - pool_put (dm->dhcp6_servers, server); - } - else - { - if (addr->as_u64[0] == 0 && - addr->as_u64[1] == 0 ) - { - rc = VNET_API_ERROR_INVALID_DST_ADDRESS; - goto out; - } - if (src_address->as_u64[0] == 0 && - src_address->as_u64[1] == 0) - { - rc = VNET_API_ERROR_INVALID_SRC_ADDRESS; - goto out; - } - - server = dhcpv6_get_server(dm, rx_fib_index); - - if (NULL != server) - { - /* modify of an existing entry */ - ip6_fib_t *fib; - - fib = ip6_fib_get(server->server_fib6_index); - - if (fib->table_id != server_fib_id) - { - /* swap tables */ - fib_table_unlock(server->server_fib6_index, FIB_PROTOCOL_IP6); - server->server_fib6_index = - ip6_fib_table_find_or_create_and_lock(server_fib_id); - } - } - else - { - /* Allocate a new server */ - pool_get (dm->dhcp6_servers, server); - - vec_validate_init_empty (dm->dhcp6_server_index_by_rx_fib_index, - rx_fib_index, ~0); - dm->dhcp6_server_index_by_rx_fib_index[rx_fib_index] = - server - dm->dhcp6_servers; - - server->server_fib6_index = - ip6_fib_table_find_or_create_and_lock(server_fib_id); - mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6); - - const mfib_prefix_t all_dhcp_servers = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_grp_addr = { - .ip6 = dm->all_dhcpv6_server_relay_agent_address, - } - }; - const fib_route_path_t path_for_us = { - .frp_proto = FIB_PROTOCOL_IP6, - .frp_addr = zero_addr, - .frp_sw_if_index = 0xffffffff, - .frp_fib_index = ~0, - .frp_weight = 0, - .frp_flags = FIB_ROUTE_PATH_LOCAL, - }; - mfib_table_entry_path_update(rx_fib_index, - &all_dhcp_servers, - MFIB_SOURCE_DHCP, - &path_for_us, - MFIB_ITF_FLAG_FORWARD); - /* - * Each interface that is enabled in this table, needs to be added - * as an accepting interface, but this is not easily doable in VPP. - * So we cheat. Add a flag to the entry that indicates accept form - * any interface. - * We will still only accept on v6 enabled interfaces, since the - * input feature ensures this. - */ - mfib_table_entry_update(rx_fib_index, - &all_dhcp_servers, - MFIB_SOURCE_DHCP, - MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF); - } - copy_ip6_address(&server->dhcp6_server, addr); - copy_ip6_address(&server->dhcp6_src_address, src_address); - } - -out: - mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); - - return (rc); -} - -static clib_error_t * -dhcpv6_proxy_set_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - ip6_address_t addr, src_addr; - int set_server = 0, set_src_address = 0; - u32 rx_fib_id = 0, server_fib_id = 0; - int is_del = 0; - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "server %U", - unformat_ip6_address, &addr)) - set_server = 1; - else if (unformat(input, "src-address %U", - unformat_ip6_address, &src_addr)) - set_src_address =1; - else if (unformat (input, "server-fib-id %d", &server_fib_id)) - ; - else if (unformat (input, "rx-fib-id %d", &rx_fib_id)) - ; - else if (unformat (input, "delete") || - unformat (input, "del")) - is_del = 1; - else - break; - } - - if (is_del || (set_server && set_src_address)) - { - int rv; - - rv = dhcpv6_proxy_set_server (&addr, &src_addr, rx_fib_id, - server_fib_id, is_del); - - //TODO: Complete the errors - switch (rv) - { - case 0: - return 0; - - case -1: - return clib_error_return (0, "FIB id %d does not exist", server_fib_id); - - default: - return clib_error_return (0, "BUG: rv %d", rv); - } - } - else - return clib_error_return (0, "parse error`%U'", - format_unformat_error, input); -} - -VLIB_CLI_COMMAND (dhcpv6_proxy_set_command, static) = { - .path = "set dhcpv6 proxy", - .short_help = "set dhcpv6 proxy [del] server src-address " - "[server-fib-id ] [rx-fib-id ] ", - .function = dhcpv6_proxy_set_command_fn, -}; - -u8 * format_dhcpv6_proxy_server (u8 * s, va_list * args) -{ - dhcpv6_proxy_main_t * dm = va_arg (*args, dhcpv6_proxy_main_t *); - dhcpv6_server_t * server = va_arg (*args, dhcpv6_server_t *); - u32 rx_fib_index = va_arg (*args, u32); - ip6_fib_t * rx_fib, * server_fib; - u32 server_fib_id = (u32)~0, rx_fib_id = ~0; - - if (dm == 0) - { - s = format (s, "%=40s%=40s%=14s%=14s", "Server Address", "Source Address", - "Server FIB", "RX FIB"); - return s; - } - - server_fib = ip6_fib_get(server->server_fib6_index); - if (server_fib) - server_fib_id= server_fib->table_id; - - rx_fib= ip6_fib_get(rx_fib_index); - - if (rx_fib) - rx_fib_id = rx_fib->table_id; - - s = format (s, "%=40U%=40U%=14u%=14u", - format_ip6_address, &server->dhcp6_server, - format_ip6_address, &server->dhcp6_src_address, - server_fib_id, rx_fib_id); - return s; -} - -static clib_error_t * -dhcpv6_proxy_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - dhcpv6_proxy_main_t * dpm = &dhcpv6_proxy_main; - int i; - u32 server_index; - dhcpv6_server_t * server; - - vlib_cli_output (vm, "%U", format_dhcpv6_proxy_server, 0 /* header line */, - 0, 0); - vec_foreach_index (i, dpm->dhcp6_server_index_by_rx_fib_index) - { - server_index = dpm->dhcp6_server_index_by_rx_fib_index[i]; - if (~0 == server_index) - continue; - - server = pool_elt_at_index (dpm->dhcp6_servers, server_index); - - vlib_cli_output (vm, "%U", format_dhcpv6_proxy_server, dpm, - server, i); - } - - return 0; -} - -VLIB_CLI_COMMAND (dhcpv6_proxy_show_command, static) = { - .path = "show dhcpv6 proxy", - .short_help = "Display dhcpv6 proxy info", - .function = dhcpv6_proxy_show_command_fn, -}; - -void -dhcpv6_proxy_dump (void *opaque, - u32 context) -{ - dhcpv6_proxy_main_t * dpm = &dhcpv6_proxy_main; - ip6_fib_t *s_fib, *r_fib; - dhcpv6_server_t * server; - u32 server_index, i; - dhcpv6_vss_info *v; - - vec_foreach_index (i, dpm->dhcp6_server_index_by_rx_fib_index) - { - server_index = dpm->dhcp6_server_index_by_rx_fib_index[i]; - if (~0 == server_index) - continue; - - server = pool_elt_at_index (dpm->dhcp6_servers, server_index); - v = dhcpv6_get_vss_info(dpm, i); - - ip46_address_t src_addr = { - .ip6 = server->dhcp6_src_address, - }; - ip46_address_t server_addr = { - .ip6 = server->dhcp6_server, - }; - - s_fib = ip6_fib_get(server->server_fib6_index); - r_fib = ip6_fib_get(i); - - dhcp_send_details(opaque, - context, - &server_addr, - &src_addr, - s_fib->table_id, - r_fib->table_id, - (v ? v->vpn_id.fib_id : 0), - (v ? v->vpn_id.oui : 0)); - } -} - -int dhcpv6_proxy_set_vss(u32 tbl_id, - u32 oui, - u32 fib_id, - int is_del) -{ - dhcpv6_proxy_main_t *dm = &dhcpv6_proxy_main; - dhcpv6_vss_info *v = NULL; - u32 rx_fib_index; - int rc = 0; - - rx_fib_index = ip6_fib_table_find_or_create_and_lock(tbl_id); - v = dhcpv6_get_vss_info(dm, rx_fib_index); - - if (NULL != v) - { - if (is_del) - { - /* release the lock held on the table when the VSS - * info was created */ - fib_table_unlock (rx_fib_index, - FIB_PROTOCOL_IP6); - - pool_put (dm->vss, v); - dm->vss_index_by_rx_fib_index[rx_fib_index] = ~0; - } - else - { - /* this is a modify */ - v->vpn_id.fib_id = fib_id; - v->vpn_id.oui = oui; - } - } - else - { - if (is_del) - rc = VNET_API_ERROR_NO_SUCH_ENTRY; - else - { - /* create a new entry */ - vec_validate_init_empty(dm->vss_index_by_rx_fib_index, - rx_fib_index, ~0); - - /* hold a lock on the table whilst the VSS info exist */ - fib_table_lock (rx_fib_index, - FIB_PROTOCOL_IP6); - - pool_get (dm->vss, v); - v->vpn_id.fib_id = fib_id; - v->vpn_id.oui = oui; - dm->vss_index_by_rx_fib_index[rx_fib_index] = v - dm->vss; - } - } - - /* Release the lock taken during the create_or_lock at the start */ - fib_table_unlock (rx_fib_index, - FIB_PROTOCOL_IP6); - - return (rc); -} - - -static clib_error_t * -dhcpv6_vss_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - int is_del = 0, got_new_vss=0; - u32 oui=0; - u32 fib_id=0, tbl_id=~0; - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "oui %d", &oui)) - got_new_vss = 1; - else if (unformat (input, "vpn-id %d", &fib_id)) - got_new_vss = 1; - else if (unformat (input, "table %d", &tbl_id)) - got_new_vss = 1; - else if (unformat(input, "delete") || unformat(input, "del")) - is_del = 1; - else - break; - } - - if (tbl_id ==~0) - return clib_error_return (0, "no table ID specified."); - - if (is_del || got_new_vss) - { - int rv; - - rv = dhcpv6_proxy_set_vss(tbl_id, oui, fib_id, is_del); - switch (rv) - { - case 0: - return 0; - - case VNET_API_ERROR_NO_SUCH_FIB: - return clib_error_return (0, "vss info (oui:%d, vpn-id:%d) not found in table %d.", - oui, fib_id, tbl_id); - - case VNET_API_ERROR_NO_SUCH_ENTRY: - return clib_error_return (0, "vss for table %d not found in pool.", - tbl_id); - - default: - return clib_error_return (0, "BUG: rv %d", rv); - } - } - else - return clib_error_return (0, "parse error`%U'", - format_unformat_error, input); - -} - -VLIB_CLI_COMMAND (dhcpv6_proxy_vss_command, static) = { - .path = "set dhcpv6 vss", - .short_help = "set dhcpv6 vss table oui vpn-idx ", - .function = dhcpv6_vss_command_fn, -}; - -static clib_error_t * -dhcpv6_vss_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) - -{ - dhcpv6_proxy_main_t * dm = &dhcpv6_proxy_main; - dhcpv6_vss_info *v; - ip6_fib_t *fib; - u32 *fib_index; - - vlib_cli_output (vm, "%=6s%=6s%=12s","Table", "OUI", "VPN ID"); - pool_foreach (fib_index, dm->vss_index_by_rx_fib_index, - ({ - fib = ip6_fib_get (*fib_index); - v = pool_elt_at_index (dm->vss, *fib_index); - - vlib_cli_output (vm, "%=6d%=6d%=12d", - fib->table_id, - v->vpn_id.oui, - v->vpn_id.fib_id); - })); - - return 0; -} - -VLIB_CLI_COMMAND (dhcpv6_proxy_vss_show_command, static) = { - .path = "show dhcpv6 vss", - .short_help = "show dhcpv6 VSS", - .function = dhcpv6_vss_show_command_fn, -}; - -static clib_error_t * -dhcpv6_link_address_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) - -{ - dhcpv6_proxy_main_t *dm = &dhcpv6_proxy_main; - vnet_main_t *vnm = vnet_get_main(); - u32 sw_if_index0=0, sw_if_index; - ip6_address_t *ia0; - vnet_sw_interface_t *swif; - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) - { - - if (unformat(input, "%U", - unformat_vnet_sw_interface, dm->vnet_main, &sw_if_index0)) - { - swif = vnet_get_sw_interface (vnm, sw_if_index0); - sw_if_index = (swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) ? - swif->unnumbered_sw_if_index : sw_if_index0; - ia0 = ip6_interface_first_address(&ip6_main, sw_if_index); - if (ia0) - { - vlib_cli_output (vm, "%=20s%=48s", "interface", "link-address"); - - vlib_cli_output (vm, "%=20U%=48U", - format_vnet_sw_if_index_name, dm->vnet_main, sw_if_index0, - format_ip6_address, ia0); - } else - vlib_cli_output (vm, "%=34s%=20U", "No IPv6 address configured on", - format_vnet_sw_if_index_name, dm->vnet_main, sw_if_index); - } else - break; - } - - return 0; -} - -VLIB_CLI_COMMAND (dhcpv6_proxy_address_show_command, static) = { - .path = "show dhcpv6 link-address interface", - .short_help = "show dhcpv6 link-address interface ", - .function = dhcpv6_link_address_show_command_fn, -}; diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 70b4e4c9..4cc6aa73 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -21,8 +21,7 @@ #include #include #include -#include -#include +#include #include #include #include diff --git a/test/test_dhcp.py b/test/test_dhcp.py index fbfb8a0c..6299975b 100644 --- a/test/test_dhcp.py +++ b/test/test_dhcp.py @@ -293,7 +293,7 @@ class TestDHCP(VppTestCase): # # Inject a response from the server # dropped, because there is no IP addrees on the - # clinet interfce to fill in the option. + # client interfce to fill in the option. # p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) / -- cgit 1.2.3-korg From def19da0c92bfffc23ad898e459271c7c9209110 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Wed, 22 Feb 2017 17:29:20 -0500 Subject: Clean up "binary-api" help string, arg parse bugs Change-Id: I12311be8ebd376b8aeac25364d010d70a85c7874 Signed-off-by: Dave Barach --- src/vat/api_format.c | 2 ++ src/vpp/api/api_main.c | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index b0df35fa..039fca06 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -17790,7 +17790,9 @@ api_sw_interface_set_mtu (vat_main_t * vam) static int q_or_quit (vat_main_t * vam) { +#if VPP_API_TEST_BUILTIN == 0 longjmp (vam->jump_buf, 1); +#endif return 0; /* not so much */ } diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index 396db25b..97b501e0 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -88,6 +88,7 @@ api_command_fn (vlib_main_t * vm, /* Split input into cmd + args */ this_cmd = cmdp = vam->inbuf; + /* Skip leading whitespace */ while (cmdp < (this_cmd + vec_len (this_cmd))) { if (*cmdp == ' ' || *cmdp == '\t' || *cmdp == '\n') @@ -99,15 +100,18 @@ api_command_fn (vlib_main_t * vm, } argsp = cmdp; + + /* Advance past the command */ while (argsp < (this_cmd + vec_len (this_cmd))) { - if (*argsp != ' ' && *argsp != '\t' && *argsp != '\n') + if (*argsp != ' ' && *argsp != '\t' && *argsp != '\n' && argsp != 0) { argsp++; } else break; } + /* NULL terminate the command */ *argsp++ = 0; while (argsp < (this_cmd + vec_len (this_cmd))) @@ -158,7 +162,7 @@ api_command_fn (vlib_main_t * vm, VLIB_CLI_COMMAND (api_command, static) = { .path = "binary-api", - .short_help = "binary-api []", + .short_help = "binary-api [help] []", .function = api_command_fn, }; /* *INDENT-ON* */ -- cgit 1.2.3-korg From b3bb10101ceffec1df0624c785acbd40858870ec Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 28 Feb 2017 21:55:28 +0100 Subject: devices: vnet_get_aggregate_rx_packets should not be dpdk specific Change-Id: I1152db4b7d1602653d7d8b2c6cb28cf5c526c4ca Signed-off-by: Damjan Marion --- src/vnet/devices/af_packet/node.c | 1 + src/vnet/devices/devices.c | 14 ++++++++++++++ src/vnet/devices/devices.h | 36 ++++++++++++++++++++++++++++++++++++ src/vnet/devices/dpdk/dpdk.h | 34 ---------------------------------- src/vnet/devices/dpdk/init.c | 8 -------- src/vnet/devices/dpdk/node.c | 3 +-- src/vnet/devices/netmap/node.c | 2 ++ src/vnet/devices/ssvm/node.c | 2 ++ src/vnet/devices/virtio/vhost-user.c | 2 ++ src/vpp/api/gmon.c | 10 +--------- 10 files changed, 59 insertions(+), 53 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index 476ccca9..69fc11c9 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -239,6 +239,7 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + VNET_INTERFACE_COUNTER_RX, os_get_cpu_number (), apif->hw_if_index, n_rx_packets, n_rx_bytes); + vnet_device_increment_rx_packets (cpu_index, n_rx_packets); return n_rx_packets; } diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index cd4386eb..c81043c6 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -19,6 +19,8 @@ #include #include +vnet_device_main_t vnet_device_main; + static uword device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -82,6 +84,18 @@ VNET_FEATURE_INIT (ethernet_input, static) = { }; /* *INDENT-ON* */ +static clib_error_t * +vnet_device_init (vlib_main_t * vm) +{ + vnet_device_main_t *vdm = &vnet_device_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + vec_validate_aligned (vdm->workers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + return 0; +} + +VLIB_INIT_FUNCTION (vnet_device_init); /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h index c46dab90..a5cbc35e 100644 --- a/src/vnet/devices/devices.h +++ b/src/vnet/devices/devices.h @@ -39,9 +39,45 @@ typedef enum [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = "mpls-input", \ } +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /* total input packet counter */ + u64 aggregate_rx_packets; +} vnet_device_per_worker_data_t; + +typedef struct +{ + vnet_device_per_worker_data_t *workers; +} vnet_device_main_t; + +extern vnet_device_main_t vnet_device_main; extern vlib_node_registration_t device_input_node; extern const u32 device_input_next_node_advance[]; +static inline u64 +vnet_get_aggregate_rx_packets (void) +{ + vnet_device_main_t *vdm = &vnet_device_main; + u64 sum = 0; + vnet_device_per_worker_data_t *pwd; + + vec_foreach (pwd, vdm->workers) sum += pwd->aggregate_rx_packets; + + return sum; +} + +static inline void +vnet_device_increment_rx_packets (u32 cpu_index, u64 count) +{ + vnet_device_main_t *vdm = &vnet_device_main; + vnet_device_per_worker_data_t *pwd; + + pwd = vec_elt_at_index (vdm->workers, cpu_index); + pwd->aggregate_rx_packets += count; +} + #endif /* included_vnet_vnet_device_h */ /* diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h index 79c694f7..bf9f2768 100644 --- a/src/vnet/devices/dpdk/dpdk.h +++ b/src/vnet/devices/dpdk/dpdk.h @@ -223,22 +223,6 @@ typedef struct #define DPDK_LINK_POLL_INTERVAL (3.0) #define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */ -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - - /* total input packet counter */ - u64 aggregate_rx_packets; -} dpdk_worker_t; - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - - /* total input packet counter */ - u64 aggregate_rx_packets; -} dpdk_hqos_thread_t; - typedef struct { u32 device; @@ -360,12 +344,6 @@ typedef struct /* vlib buffer free list, must be same size as an rte_mbuf */ u32 vlib_buffer_free_list_index; - /* dpdk worker "threads" */ - dpdk_worker_t *workers; - - /* dpdk HQoS "threads" */ - dpdk_hqos_thread_t *hqos_threads; - /* Ethernet input node index */ u32 ethernet_input_node_index; @@ -475,18 +453,6 @@ void dpdk_update_link_state (dpdk_device_t * xd, f64 now); void dpdk_device_lock_init (dpdk_device_t * xd); void dpdk_device_lock_free (dpdk_device_t * xd); -static inline u64 -vnet_get_aggregate_rx_packets (void) -{ - dpdk_main_t *dm = &dpdk_main; - u64 sum = 0; - dpdk_worker_t *dw; - - vec_foreach (dw, dm->workers) sum += dw->aggregate_rx_packets; - - return sum; -} - void dpdk_rx_trace (dpdk_main_t * dm, vlib_node_runtime_t * node, dpdk_device_t * xd, diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c index f4700133..29423e15 100755 --- a/src/vnet/devices/dpdk/init.c +++ b/src/vnet/devices/dpdk/init.c @@ -277,9 +277,6 @@ dpdk_lib_init (dpdk_main_t * dm) vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (dm->workers, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - dm->hqos_cpu_first_index = 0; dm->hqos_cpu_count = 0; @@ -296,9 +293,6 @@ dpdk_lib_init (dpdk_main_t * dm) vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (dm->hqos_threads, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - nports = rte_eth_dev_count (); if (nports < 1) { @@ -1756,8 +1750,6 @@ dpdk_init (vlib_main_t * vm) STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) == CLIB_CACHE_LINE_BYTES, "Data in cache line 0 is bigger than cache line size"); - STATIC_ASSERT (offsetof (dpdk_worker_t, cacheline0) == 0, - "Cache line marker must be 1st element in dpdk_worker_t"); STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, "Cache line marker must be 1st element in frame_queue_trace_t"); diff --git a/src/vnet/devices/dpdk/node.c b/src/vnet/devices/dpdk/node.c index bde9dfae..0d64ae08 100644 --- a/src/vnet/devices/dpdk/node.c +++ b/src/vnet/devices/dpdk/node.c @@ -556,8 +556,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, + VNET_INTERFACE_COUNTER_RX, cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); - dpdk_worker_t *dw = vec_elt_at_index (dm->workers, cpu_index); - dw->aggregate_rx_packets += mb_index; + vnet_device_increment_rx_packets (cpu_index, mb_index); return mb_index; } diff --git a/src/vnet/devices/netmap/node.c b/src/vnet/devices/netmap/node.c index 19895e47..835209a3 100644 --- a/src/vnet/devices/netmap/node.c +++ b/src/vnet/devices/netmap/node.c @@ -249,6 +249,8 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + VNET_INTERFACE_COUNTER_RX, os_get_cpu_number (), nif->hw_if_index, n_rx_packets, n_rx_bytes); + vnet_device_increment_rx_packets (cpu_index, n_rx_packets); + return n_rx_packets; } diff --git a/src/vnet/devices/ssvm/node.c b/src/vnet/devices/ssvm/node.c index 3a695b1d..a6c9dfd7 100644 --- a/src/vnet/devices/ssvm/node.c +++ b/src/vnet/devices/ssvm/node.c @@ -287,6 +287,8 @@ out: + VNET_INTERFACE_COUNTER_RX, cpu_index, intfc->vlib_hw_if_index, rx_queue_index, n_rx_bytes); + vnet_device_increment_rx_packets (cpu_index, rx_queue_index); + return rx_queue_index; } diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index c43f6e67..f490f0c1 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -1819,6 +1819,8 @@ vhost_user_if_input (vlib_main_t * vm, + VNET_INTERFACE_COUNTER_RX, os_get_cpu_number (), vui->sw_if_index, n_rx_packets, n_rx_bytes); + vnet_device_increment_rx_packets (cpu_index, n_rx_packets); + return n_rx_packets; } diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c index 20deb6a2..b28608f0 100644 --- a/src/vpp/api/gmon.c +++ b/src/vpp/api/gmon.c @@ -59,17 +59,9 @@ typedef struct } gmon_main_t; -#if DPDK == 0 -static inline u64 -vnet_get_aggregate_rx_packets (void) -{ - return 0; -} -#else #include #include -#include -#endif +#include gmon_main_t gmon_main; -- cgit 1.2.3-korg From 68b0fb0c620c7451ef1a6380c43c39de6614db51 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 28 Feb 2017 15:15:56 -0500 Subject: VPP-598: tcp stack initial commit Change-Id: I49e5ce0aae6e4ff634024387ceaf7dbc432a0351 Signed-off-by: Dave Barach Signed-off-by: Florin Coras --- src/Makefile.am | 1 + src/plugins/ioam/export-common/ioam_export.h | 2 +- src/plugins/ioam/ipfixcollector/ipfixcollector.c | 2 +- src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c | 2 +- src/plugins/snat/in2out.c | 26 +- src/plugins/snat/out2in.c | 24 +- src/scripts/vnet/tcp | 18 +- src/scripts/vnet/udp | 19 + src/scripts/vnet/uri/tcp-setup.sh | 39 + src/scripts/vnet/uri/tcp_server | 4 + src/scripts/vnet/uri/udp | 19 + src/svm.am | 10 +- src/svm/ssvm.c | 16 + src/svm/ssvm.h | 18 +- src/svm/svm_fifo.c | 568 ++++++ src/svm/svm_fifo.h | 157 ++ src/svm/svm_fifo_segment.c | 193 ++ src/svm/svm_fifo_segment.h | 89 + src/svm/test_svm_fifo1.c | 361 ++++ src/uri.am | 22 + src/uri/uri_tcp_test.c | 916 +++++++++ src/uri/uri_udp_test.c | 553 ++++++ src/uri/uri_udp_test2.c | 954 +++++++++ src/uri/uritest.c | 484 +++++ src/vlib/buffer.c | 2 +- src/vlib/buffer.h | 68 + src/vlibmemory/unix_shared_memory_queue.c | 12 +- src/vlibmemory/unix_shared_memory_queue.h | 2 +- src/vnet.am | 66 +- src/vnet/api_errno.h | 21 +- src/vnet/bfd/bfd_udp.c | 4 +- src/vnet/buffer.h | 10 + src/vnet/classify/vnet_classify.c | 4 +- src/vnet/dhcp/dhcp_proxy.h | 2 +- src/vnet/flow/flow_report.h | 2 +- src/vnet/ip/ip.h | 4 +- src/vnet/ip/ip4.h | 42 +- src/vnet/ip/ip4_forward.c | 173 +- src/vnet/ip/ip4_packet.h | 26 +- src/vnet/ip/ip6.h | 44 +- src/vnet/ip/ip6_packet.h | 26 +- src/vnet/ip/punt.c | 2 +- src/vnet/ip/tcp_packet.h | 141 -- src/vnet/ip/udp.h | 315 --- src/vnet/ip/udp_error.def | 21 - src/vnet/ip/udp_format.c | 91 - src/vnet/ip/udp_init.c | 71 - src/vnet/ip/udp_local.c | 645 ------ src/vnet/ip/udp_packet.h | 65 - src/vnet/ip/udp_pg.c | 237 --- src/vnet/ipsec/ikev2.c | 2 +- src/vnet/ipsec/ikev2_cli.c | 2 +- src/vnet/ipsec/ikev2_crypto.c | 2 +- src/vnet/lisp-cp/packets.c | 65 +- src/vnet/lisp-cp/packets.h | 45 - src/vnet/lisp-gpe/interface.c | 2 +- src/vnet/lisp-gpe/lisp_gpe.h | 4 +- src/vnet/lisp-gpe/lisp_gpe_adjacency.c | 2 + src/vnet/session/application.c | 343 ++++ src/vnet/session/application.h | 120 ++ src/vnet/session/application_interface.c | 459 +++++ src/vnet/session/application_interface.h | 136 ++ src/vnet/session/hashes.c | 28 + src/vnet/session/node.c | 435 ++++ src/vnet/session/session.api | 429 ++++ src/vnet/session/session.c | 1286 ++++++++++++ src/vnet/session/session.h | 380 ++++ src/vnet/session/session_api.c | 821 ++++++++ src/vnet/session/session_cli.c | 189 ++ src/vnet/session/transport.c | 64 + src/vnet/session/transport.h | 250 +++ src/vnet/tcp/tcp.c | 708 +++++++ src/vnet/tcp/tcp.h | 624 ++++++ src/vnet/tcp/tcp_error.def | 35 + src/vnet/tcp/tcp_format.c | 136 ++ src/vnet/tcp/tcp_input.c | 2316 ++++++++++++++++++++++ src/vnet/tcp/tcp_newreno.c | 93 + src/vnet/tcp/tcp_output.c | 1412 +++++++++++++ src/vnet/tcp/tcp_packet.h | 184 ++ src/vnet/tcp/tcp_pg.c | 236 +++ src/vnet/tcp/tcp_syn_filter4.c | 542 +++++ src/vnet/tcp/tcp_timer.h | 29 + src/vnet/udp/builtin_server.c | 239 +++ src/vnet/udp/udp.c | 342 ++++ src/vnet/udp/udp.h | 362 ++++ src/vnet/udp/udp_error.def | 21 + src/vnet/udp/udp_format.c | 91 + src/vnet/udp/udp_input.c | 314 +++ src/vnet/udp/udp_local.c | 666 +++++++ src/vnet/udp/udp_packet.h | 65 + src/vnet/udp/udp_pg.c | 237 +++ src/vnet/vnet_all_api_h.h | 1 + src/vnet/vxlan-gpe/vxlan_gpe.h | 2 +- src/vnet/vxlan/vxlan.h | 2 +- src/vpp/api/vpe.api | 1 + src/vppinfra.am | 5 + src/vppinfra/bihash_16_8.h | 103 + src/vppinfra/bihash_48_8.h | 116 ++ src/vppinfra/tw_timer_16t_1w_2048sl.c | 26 + src/vppinfra/tw_timer_16t_1w_2048sl.h | 46 + 100 files changed, 18737 insertions(+), 1874 deletions(-) create mode 100644 src/scripts/vnet/udp create mode 100755 src/scripts/vnet/uri/tcp-setup.sh create mode 100644 src/scripts/vnet/uri/tcp_server create mode 100644 src/scripts/vnet/uri/udp create mode 100644 src/svm/svm_fifo.c create mode 100644 src/svm/svm_fifo.h create mode 100644 src/svm/svm_fifo_segment.c create mode 100644 src/svm/svm_fifo_segment.h create mode 100644 src/svm/test_svm_fifo1.c create mode 100644 src/uri.am create mode 100644 src/uri/uri_tcp_test.c create mode 100644 src/uri/uri_udp_test.c create mode 100644 src/uri/uri_udp_test2.c create mode 100644 src/uri/uritest.c delete mode 100644 src/vnet/ip/tcp_packet.h delete mode 100644 src/vnet/ip/udp.h delete mode 100644 src/vnet/ip/udp_error.def delete mode 100644 src/vnet/ip/udp_format.c delete mode 100644 src/vnet/ip/udp_init.c delete mode 100644 src/vnet/ip/udp_local.c delete mode 100644 src/vnet/ip/udp_packet.h delete mode 100644 src/vnet/ip/udp_pg.c create mode 100644 src/vnet/session/application.c create mode 100644 src/vnet/session/application.h create mode 100644 src/vnet/session/application_interface.c create mode 100644 src/vnet/session/application_interface.h create mode 100644 src/vnet/session/hashes.c create mode 100644 src/vnet/session/node.c create mode 100644 src/vnet/session/session.api create mode 100644 src/vnet/session/session.c create mode 100644 src/vnet/session/session.h create mode 100644 src/vnet/session/session_api.c create mode 100644 src/vnet/session/session_cli.c create mode 100644 src/vnet/session/transport.c create mode 100644 src/vnet/session/transport.h create mode 100644 src/vnet/tcp/tcp.c create mode 100644 src/vnet/tcp/tcp.h create mode 100644 src/vnet/tcp/tcp_error.def create mode 100644 src/vnet/tcp/tcp_format.c create mode 100644 src/vnet/tcp/tcp_input.c create mode 100644 src/vnet/tcp/tcp_newreno.c create mode 100644 src/vnet/tcp/tcp_output.c create mode 100644 src/vnet/tcp/tcp_packet.h create mode 100644 src/vnet/tcp/tcp_pg.c create mode 100644 src/vnet/tcp/tcp_syn_filter4.c create mode 100644 src/vnet/tcp/tcp_timer.h create mode 100644 src/vnet/udp/builtin_server.c create mode 100644 src/vnet/udp/udp.c create mode 100644 src/vnet/udp/udp.h create mode 100644 src/vnet/udp/udp_error.def create mode 100644 src/vnet/udp/udp_format.c create mode 100644 src/vnet/udp/udp_input.c create mode 100644 src/vnet/udp/udp_local.c create mode 100644 src/vnet/udp/udp_packet.h create mode 100644 src/vnet/udp/udp_pg.c create mode 100644 src/vppinfra/bihash_16_8.h create mode 100644 src/vppinfra/bihash_48_8.h create mode 100644 src/vppinfra/tw_timer_16t_1w_2048sl.c create mode 100644 src/vppinfra/tw_timer_16t_1w_2048sl.h (limited to 'src/vpp/api') diff --git a/src/Makefile.am b/src/Makefile.am index 08feb29a..641707ed 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -88,6 +88,7 @@ include vlib-api.am include vnet.am include vpp.am include vpp-api-test.am +include uri.am SUBDIRS += plugins diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h index e84dab0b..dd48a93b 100644 --- a/src/plugins/ioam/export-common/ioam_export.h +++ b/src/plugins/ioam/export-common/ioam_export.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/plugins/ioam/ipfixcollector/ipfixcollector.c b/src/plugins/ioam/ipfixcollector/ipfixcollector.c index 4ae47edc..71b934ec 100644 --- a/src/plugins/ioam/ipfixcollector/ipfixcollector.c +++ b/src/plugins/ioam/ipfixcollector/ipfixcollector.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include ipfix_collector_main_t ipfix_collector_main; diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c index b42c357c..f334c983 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c index e30c913c..b4b7793d 100644 --- a/src/plugins/snat/in2out.c +++ b/src/plugins/snat/in2out.c @@ -689,12 +689,12 @@ snat_hairpinning (snat_main_t *sm, ip4_header_t, dst_address); ip0->checksum = ip_csum_fold (sum0); - old_dst_port0 = tcp0->ports.dst; + old_dst_port0 = tcp0->dst; if (PREDICT_TRUE(new_dst_port0 != old_dst_port0)) { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - tcp0->ports.dst = new_dst_port0; + tcp0->dst = new_dst_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t, dst_address); @@ -872,9 +872,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = s0->out2in.port; - new_port0 = tcp0->ports.src; + old_port0 = tcp0->src_port; + tcp0->src_port = s0->out2in.port; + new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -1012,9 +1012,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) { - old_port1 = tcp1->ports.src; - tcp1->ports.src = s1->out2in.port; - new_port1 = tcp1->ports.src; + old_port1 = tcp1->src_port; + tcp1->src_port = s1->out2in.port; + new_port1 = tcp1->src_port; sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, @@ -1188,9 +1188,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = s0->out2in.port; - new_port0 = tcp0->ports.src; + old_port0 = tcp0->src_port; + tcp0->src_port = s0->out2in.port; + new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -1667,8 +1667,8 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm, { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = new_port0; + old_port0 = tcp0->src_port; + tcp0->src_port = new_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c index 328f5ba4..3bfc0aa3 100644 --- a/src/plugins/snat/out2in.c +++ b/src/plugins/snat/out2in.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include @@ -602,9 +602,9 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.dst; - tcp0->ports.dst = s0->in2out.port; - new_port0 = tcp0->ports.dst; + old_port0 = tcp0->dst_port; + tcp0->dst_port = s0->in2out.port; + new_port0 = tcp0->dst_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -737,9 +737,9 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) { - old_port1 = tcp1->ports.dst; - tcp1->ports.dst = s1->in2out.port; - new_port1 = tcp1->ports.dst; + old_port1 = tcp1->dst_port; + tcp1->dst_port = s1->in2out.port; + new_port1 = tcp1->dst_port; sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, @@ -907,9 +907,9 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.dst; - tcp0->ports.dst = s0->in2out.port; - new_port0 = tcp0->ports.dst; + old_port0 = tcp0->dst_port; + tcp0->dst_port = s0->in2out.port; + new_port0 = tcp0->dst_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -1369,8 +1369,8 @@ snat_out2in_fast_node_fn (vlib_main_t * vm, { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.dst; - tcp0->ports.dst = new_port0; + old_port0 = tcp0->dst_port; + tcp0->dst_port = new_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, diff --git a/src/scripts/vnet/tcp b/src/scripts/vnet/tcp index a2ee8b2d..b9c23c3a 100644 --- a/src/scripts/vnet/tcp +++ b/src/scripts/vnet/tcp @@ -1,16 +1,18 @@ +loop create +set int ip address loop0 192.168.1.1/8 +set int state loop0 up + packet-generator new { name x - limit 1 + limit 2048 node ip4-input - size 64-64 + size 100-100 + interface loop0 no-recycle data { - TCP: 1.2.3.4 -> 5.6.7.8 - TCP: 1234 -> 5678 + TCP: 192.168.1.2 -> 192.168.1.1 + TCP: 32415 -> 80 + SYN incrementing 100 } } - -tr add pg-input 100 -ip route 5.6.7.8/32 via local -ip route 1.2.3.4/32 via local diff --git a/src/scripts/vnet/udp b/src/scripts/vnet/udp new file mode 100644 index 00000000..7dda1eec --- /dev/null +++ b/src/scripts/vnet/udp @@ -0,0 +1,19 @@ +loop create +set int ip address loop0 192.168.1.1/8 +set int state loop0 up + +packet-generator new { + name udp + limit 512 + rate 1e4 + node ip4-input + size 100-100 + interface loop0 + no-recycle + data { + UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1 + UDP: 4321 -> 1234 + length 72 + incrementing 100 + } +} diff --git a/src/scripts/vnet/uri/tcp-setup.sh b/src/scripts/vnet/uri/tcp-setup.sh new file mode 100755 index 00000000..e0b01588 --- /dev/null +++ b/src/scripts/vnet/uri/tcp-setup.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +function topo_setup +{ + ip netns add vppns1 + ip link add veth_vpp1 type veth peer name vpp1 + ip link set dev vpp1 up + ip link set dev veth_vpp1 up netns vppns1 + + ip netns exec vppns1 \ + bash -c " + ip link set dev lo up + ip addr add 6.0.1.2/24 dev veth_vpp1 + " + + ethtool --offload vpp1 rx off tx off + ip netns exec vppns1 ethtool --offload veth_vpp1 rx off tx off + +} + +function topo_clean +{ + ip link del dev veth_vpp1 &> /dev/null + ip netns del vppns1 &> /dev/null +} + +if [ "$1" == "clean" ] ; then + topo_clean + exit 0 +else + topo_setup +fi + +# to test connectivity do: +# sudo ip netns exec vppns1 telnet 6.0.1.1 1234 +# to push traffic to the server +# dd if=/dev/zero bs=1024K count=512 | nc 6.0.1.1 +# to listen for incoming connection from vpp +# nc -l 1234 diff --git a/src/scripts/vnet/uri/tcp_server b/src/scripts/vnet/uri/tcp_server new file mode 100644 index 00000000..7f5a86de --- /dev/null +++ b/src/scripts/vnet/uri/tcp_server @@ -0,0 +1,4 @@ +create host-interface name vpp1 +set int state host-vpp1 up +set int ip address host-vpp1 6.0.1.1/24 +trace add af-packet-input 10 diff --git a/src/scripts/vnet/uri/udp b/src/scripts/vnet/uri/udp new file mode 100644 index 00000000..ca13b83c --- /dev/null +++ b/src/scripts/vnet/uri/udp @@ -0,0 +1,19 @@ +loop create +set int ip address loop0 10.0.0.1/32 +set int state loop0 up + +packet-generator new { + name udp + limit 512 + rate 1e4 + node ip4-input + size 100-100 + interface loop0 + no-recycle + data { + UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1 + UDP: 4321 -> 1234 + length 72 + incrementing 100 + } +} diff --git a/src/svm.am b/src/svm.am index 2cd385bd..442eba8e 100644 --- a/src/svm.am +++ b/src/svm.am @@ -13,13 +13,14 @@ bin_PROGRAMS += svmtool svmdbtool -nobase_include_HEADERS += svm/svm.h svm/ssvm.h svm/svmdb.h +nobase_include_HEADERS += svm/svm.h svm/ssvm.h svm/svmdb.h \ + svm/svm_fifo.h svm/svm_fifo_segment.h lib_LTLIBRARIES += libsvm.la libsvmdb.la +libsvm_la_SOURCES = svm/svm.c svm/ssvm.c svm/svm_fifo.c svm/svm_fifo_segment.c libsvm_la_LIBADD = libvppinfra.la -lrt -lpthread libsvm_la_DEPENDENCIES = libvppinfra.la -libsvm_la_SOURCES = svm/svm.c svm/ssvm.c svmtool_SOURCES = svm/svmtool.c svmtool_LDADD = libsvm.la libvppinfra.la -lpthread -lrt @@ -31,4 +32,9 @@ libsvmdb_la_SOURCES = svm/svmdb.c svmdbtool_SOURCES = svm/svmdbtool.c svmdbtool_LDADD = libsvmdb.la libsvm.la libvppinfra.la -lpthread -lrt +noinst_PROGRAMS += test_svm_fifo1 +test_svm_fifo1_SOURCES = svm/test_svm_fifo1.c +test_svm_fifo1_LDADD = libsvm.la libvppinfra.la -lpthread -lrt +test_svm_fifo1_LDFLAGS = -static + # vi:syntax=automake diff --git a/src/svm/ssvm.c b/src/svm/ssvm.c index 6f409eb6..6cda1f27 100644 --- a/src/svm/ssvm.c +++ b/src/svm/ssvm.c @@ -169,6 +169,22 @@ re_map_it: return 0; } +void +ssvm_delete (ssvm_private_t * ssvm) +{ + u8 *fn; + + fn = format (0, "/dev/shm/%s%c", ssvm->name, 0); + + /* Throw away the backing file */ + if (unlink ((char *) fn) < 0) + clib_unix_warning ("unlink segment '%s'", ssvm->name); + + munmap ((void *) ssvm->requested_va, ssvm->ssvm_size); + vec_free (fn); +} + + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/svm/ssvm.h b/src/svm/ssvm.h index 9e61b9a0..bccfc164 100644 --- a/src/svm/ssvm.h +++ b/src/svm/ssvm.h @@ -38,7 +38,10 @@ #include #include -#define MMAP_PAGESIZE (4<<10) +#ifndef MMAP_PAGESIZE +#define MMAP_PAGESIZE (clib_mem_get_page_size()) +#endif + #define SSVM_N_OPAQUE 7 typedef struct @@ -125,12 +128,12 @@ ssvm_pop_heap (void *oldheap) } #define foreach_ssvm_api_error \ -_(NO_NAME, "No shared segment name", -10) \ -_(NO_SIZE, "Size not set (master)", -11) \ -_(CREATE_FAILURE, "Create failed", -12) \ -_(SET_SIZE, "Set size failed", -13) \ -_(MMAP, "mmap failed", -14) \ -_(SLAVE_TIMEOUT, "Slave map timeout", -15) +_(NO_NAME, "No shared segment name", -100) \ +_(NO_SIZE, "Size not set (master)", -101) \ +_(CREATE_FAILURE, "Create failed", -102) \ +_(SET_SIZE, "Set size failed", -103) \ +_(MMAP, "mmap failed", -104) \ +_(SLAVE_TIMEOUT, "Slave map timeout", -105) typedef enum { @@ -143,6 +146,7 @@ typedef enum int ssvm_master_init (ssvm_private_t * ssvm, u32 master_index); int ssvm_slave_init (ssvm_private_t * ssvm, int timeout_in_seconds); +void ssvm_delete (ssvm_private_t * ssvm); #endif /* __included_ssvm_h__ */ diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c new file mode 100644 index 00000000..11f90193 --- /dev/null +++ b/src/svm/svm_fifo.c @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svm_fifo.h" + +/** create an svm fifo, in the current heap. Fails vs blow up the process */ +svm_fifo_t * +svm_fifo_create (u32 data_size_in_bytes) +{ + svm_fifo_t *f; + pthread_mutexattr_t attr; + pthread_condattr_t cattr; + + f = clib_mem_alloc_aligned_or_null (sizeof (*f) + data_size_in_bytes, + CLIB_CACHE_LINE_BYTES); + if (f == 0) + return 0; + + memset (f, 0, sizeof (*f) + data_size_in_bytes); + f->nitems = data_size_in_bytes; + f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX; + + memset (&attr, 0, sizeof (attr)); + memset (&cattr, 0, sizeof (cattr)); + + if (pthread_mutexattr_init (&attr)) + clib_unix_warning ("mutexattr_init"); + if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("pthread_mutexattr_setpshared"); + if (pthread_mutex_init (&f->mutex, &attr)) + clib_unix_warning ("mutex_init"); + if (pthread_mutexattr_destroy (&attr)) + clib_unix_warning ("mutexattr_destroy"); + if (pthread_condattr_init (&cattr)) + clib_unix_warning ("condattr_init"); + if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("condattr_setpshared"); + if (pthread_cond_init (&f->condvar, &cattr)) + clib_unix_warning ("cond_init1"); + if (pthread_condattr_destroy (&cattr)) + clib_unix_warning ("cond_init2"); + + return (f); +} + +always_inline ooo_segment_t * +ooo_segment_new (svm_fifo_t * f, u32 start, u32 length) +{ + ooo_segment_t *s; + + pool_get (f->ooo_segments, s); + + s->fifo_position = start; + s->length = length; + + s->prev = s->next = OOO_SEGMENT_INVALID_INDEX; + + return s; +} + +always_inline void +ooo_segment_del (svm_fifo_t * f, u32 index) +{ + ooo_segment_t *cur, *prev = 0, *next = 0; + cur = pool_elt_at_index (f->ooo_segments, index); + + if (cur->next != OOO_SEGMENT_INVALID_INDEX) + { + next = pool_elt_at_index (f->ooo_segments, cur->next); + next->prev = cur->prev; + } + + if (cur->prev != OOO_SEGMENT_INVALID_INDEX) + { + prev = pool_elt_at_index (f->ooo_segments, cur->prev); + prev->next = cur->next; + } + else + { + f->ooos_list_head = cur->next; + } + + pool_put (f->ooo_segments, cur); +} + +/** + * Add segment to fifo's out-of-order segment list. Takes care of merging + * adjacent segments and removing overlapping ones. + */ +static void +ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) +{ + ooo_segment_t *s, *new_s, *prev, *next, *it; + u32 new_index, position, end_offset, s_sof, s_eof, s_index; + + position = (f->tail + offset) % f->nitems; + end_offset = offset + length; + + if (f->ooos_list_head == OOO_SEGMENT_INVALID_INDEX) + { + s = ooo_segment_new (f, position, length); + f->ooos_list_head = s - f->ooo_segments; + f->ooos_newest = f->ooos_list_head; + return; + } + + /* Find first segment that starts after new segment */ + s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head); + while (s->next != OOO_SEGMENT_INVALID_INDEX + && ooo_segment_offset (f, s) <= offset) + s = pool_elt_at_index (f->ooo_segments, s->next); + + s_index = s - f->ooo_segments; + s_sof = ooo_segment_offset (f, s); + s_eof = ooo_segment_end_offset (f, s); + + /* No overlap, add before current segment */ + if (end_offset < s_sof) + { + new_s = ooo_segment_new (f, position, length); + new_index = new_s - f->ooo_segments; + + /* Pool might've moved, get segment again */ + s = pool_elt_at_index (f->ooo_segments, s_index); + + if (s->prev != OOO_SEGMENT_INVALID_INDEX) + { + new_s->prev = s->prev; + + prev = pool_elt_at_index (f->ooo_segments, new_s->prev); + prev->next = new_index; + } + else + { + /* New head */ + f->ooos_list_head = new_index; + } + + new_s->next = s - f->ooo_segments; + s->prev = new_index; + f->ooos_newest = new_index; + return; + } + /* No overlap, add after current segment */ + else if (s_eof < offset) + { + new_s = ooo_segment_new (f, position, length); + new_index = new_s - f->ooo_segments; + + /* Pool might've moved, get segment again */ + s = pool_elt_at_index (f->ooo_segments, s_index); + + if (s->next != OOO_SEGMENT_INVALID_INDEX) + { + new_s->next = s->next; + + next = pool_elt_at_index (f->ooo_segments, new_s->next); + next->prev = new_index; + } + + new_s->prev = s - f->ooo_segments; + s->next = new_index; + f->ooos_newest = new_index; + + return; + } + + /* + * Merge needed + */ + + /* Merge at head */ + if (offset <= s_sof) + { + /* If we have a previous, check if we overlap */ + if (s->prev != OOO_SEGMENT_INVALID_INDEX) + { + prev = pool_elt_at_index (f->ooo_segments, s->prev); + + /* New segment merges prev and current. Remove previous and + * update position of current. */ + if (ooo_segment_end_offset (f, prev) >= offset) + { + s->fifo_position = prev->fifo_position; + s->length = s_eof - ooo_segment_offset (f, prev); + ooo_segment_del (f, s->prev); + } + } + else + { + s->fifo_position = position; + s->length = s_eof - ooo_segment_offset (f, s); + } + + /* The new segment's tail may cover multiple smaller ones */ + if (s_eof < end_offset) + { + /* Remove segments completely covered */ + it = (s->next != OOO_SEGMENT_INVALID_INDEX) ? + pool_elt_at_index (f->ooo_segments, s->next) : 0; + while (it && ooo_segment_end_offset (f, it) < end_offset) + { + next = (it->next != OOO_SEGMENT_INVALID_INDEX) ? + pool_elt_at_index (f->ooo_segments, it->next) : 0; + ooo_segment_del (f, it - f->ooo_segments); + it = next; + } + + /* Update length. Segment's start might have changed. */ + s->length = end_offset - ooo_segment_offset (f, s); + + /* If partial overlap with last, merge */ + if (it && ooo_segment_offset (f, it) < end_offset) + { + s->length += + it->length - (ooo_segment_offset (f, it) - end_offset); + ooo_segment_del (f, it - f->ooo_segments); + } + } + } + /* Last but overlapping previous */ + else if (s_eof <= end_offset) + { + s->length = end_offset - ooo_segment_offset (f, s); + } + /* New segment completely covered by current one */ + else + { + /* Do Nothing */ + } + + /* Most recently updated segment */ + f->ooos_newest = s - f->ooo_segments; +} + +/** + * Removes segments that can now be enqueued because the fifo's tail has + * advanced. Returns the number of bytes added to tail. + */ +static int +ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued) +{ + ooo_segment_t *s; + u32 index, bytes = 0, diff; + + s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head); + + /* If last tail update overlaps one/multiple ooo segments, remove them */ + diff = (f->nitems + f->tail - s->fifo_position) % f->nitems; + while (0 < diff && diff < n_bytes_enqueued) + { + /* Segment end is beyond the tail. Advance tail and be done */ + if (diff < s->length) + { + f->tail += s->length - diff; + f->tail %= f->nitems; + break; + } + /* If we have next go on */ + else if (s->next != OOO_SEGMENT_INVALID_INDEX) + { + index = s - f->ooo_segments; + s = pool_elt_at_index (f->ooo_segments, s->next); + diff = (f->nitems + f->tail - s->fifo_position) % f->nitems; + ooo_segment_del (f, index); + } + /* End of search */ + else + { + break; + } + } + + /* If tail is adjacent to an ooo segment, 'consume' it */ + if (diff == 0) + { + bytes = ((f->nitems - f->cursize) >= s->length) ? s->length : + f->nitems - f->cursize; + + f->tail += bytes; + f->tail %= f->nitems; + + ooo_segment_del (f, s - f->ooo_segments); + } + + return bytes; +} + +static int +svm_fifo_enqueue_internal (svm_fifo_t * f, + int pid, u32 max_bytes, u8 * copy_from_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + + if (PREDICT_FALSE (f->cursize == f->nitems)) + return -2; /* fifo stuffed */ + + /* read cursize, which can only decrease while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (nitems - cursize) < max_bytes ? + (nitems - cursize) : max_bytes; + + if (PREDICT_TRUE (copy_from_here != 0)) + { + /* Number of bytes in first copy segment */ + first_copy_bytes = ((nitems - f->tail) < total_copy_bytes) + ? (nitems - f->tail) : total_copy_bytes; + + clib_memcpy (&f->data[f->tail], copy_from_here, first_copy_bytes); + f->tail += first_copy_bytes; + f->tail = (f->tail == nitems) ? 0 : f->tail; + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + clib_memcpy (&f->data[f->tail], copy_from_here + first_copy_bytes, + second_copy_bytes); + f->tail += second_copy_bytes; + f->tail = (f->tail == nitems) ? 0 : f->tail; + } + } + else + { + /* Account for a zero-copy enqueue done elsewhere */ + ASSERT (max_bytes <= (nitems - cursize)); + f->tail += max_bytes; + f->tail = f->tail % nitems; + total_copy_bytes = max_bytes; + } + + /* Any out-of-order segments to collect? */ + if (PREDICT_FALSE (f->ooos_list_head != OOO_SEGMENT_INVALID_INDEX)) + total_copy_bytes += ooo_segment_try_collect (f, total_copy_bytes); + + /* Atomically increase the queue length */ + __sync_fetch_and_add (&f->cursize, total_copy_bytes); + + return (total_copy_bytes); +} + +int +svm_fifo_enqueue_nowait (svm_fifo_t * f, + int pid, u32 max_bytes, u8 * copy_from_here) +{ + return svm_fifo_enqueue_internal (f, pid, max_bytes, copy_from_here); +} + +/** Enqueue a future segment. + * Two choices: either copies the entire segment, or copies nothing + * Returns 0 of the entire segment was copied + * Returns -1 if none of the segment was copied due to lack of space + */ + +static int +svm_fifo_enqueue_with_offset_internal2 (svm_fifo_t * f, + int pid, + u32 offset, + u32 required_bytes, + u8 * copy_from_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + u32 tail_plus_offset; + + ASSERT (offset > 0); + + /* read cursize, which can only decrease while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Will this request fit? */ + if ((required_bytes + offset) > (nitems - cursize)) + return -1; + + ooo_segment_add (f, offset, required_bytes); + + /* Number of bytes we're going to copy */ + total_copy_bytes = required_bytes; + tail_plus_offset = (f->tail + offset) % nitems; + + /* Number of bytes in first copy segment */ + first_copy_bytes = ((nitems - tail_plus_offset) < total_copy_bytes) + ? (nitems - tail_plus_offset) : total_copy_bytes; + + clib_memcpy (&f->data[tail_plus_offset], copy_from_here, first_copy_bytes); + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + tail_plus_offset += first_copy_bytes; + tail_plus_offset %= nitems; + + ASSERT (tail_plus_offset == 0); + + clib_memcpy (&f->data[tail_plus_offset], + copy_from_here + first_copy_bytes, second_copy_bytes); + } + + return (0); +} + + +int +svm_fifo_enqueue_with_offset (svm_fifo_t * f, + int pid, + u32 offset, + u32 required_bytes, u8 * copy_from_here) +{ + return svm_fifo_enqueue_with_offset_internal2 + (f, pid, offset, required_bytes, copy_from_here); +} + + +static int +svm_fifo_dequeue_internal2 (svm_fifo_t * f, + int pid, u32 max_bytes, u8 * copy_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + + if (PREDICT_FALSE (f->cursize == 0)) + return -2; /* nothing in the fifo */ + + /* read cursize, which can only increase while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (cursize < max_bytes) ? cursize : max_bytes; + + if (PREDICT_TRUE (copy_here != 0)) + { + /* Number of bytes in first copy segment */ + first_copy_bytes = ((nitems - f->head) < total_copy_bytes) + ? (nitems - f->head) : total_copy_bytes; + clib_memcpy (copy_here, &f->data[f->head], first_copy_bytes); + f->head += first_copy_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + clib_memcpy (copy_here + first_copy_bytes, + &f->data[f->head], second_copy_bytes); + f->head += second_copy_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + } + } + else + { + /* Account for a zero-copy dequeue done elsewhere */ + ASSERT (max_bytes <= cursize); + f->head += max_bytes; + f->head = f->head % nitems; + cursize -= max_bytes; + total_copy_bytes = max_bytes; + } + + __sync_fetch_and_sub (&f->cursize, total_copy_bytes); + + return (total_copy_bytes); +} + +int +svm_fifo_dequeue_nowait (svm_fifo_t * f, + int pid, u32 max_bytes, u8 * copy_here) +{ + return svm_fifo_dequeue_internal2 (f, pid, max_bytes, copy_here); +} + +int +svm_fifo_peek (svm_fifo_t * f, int pid, u32 offset, u32 max_bytes, + u8 * copy_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + + if (PREDICT_FALSE (f->cursize == 0)) + return -2; /* nothing in the fifo */ + + /* read cursize, which can only increase while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (cursize < max_bytes) ? cursize : max_bytes; + + if (PREDICT_TRUE (copy_here != 0)) + { + /* Number of bytes in first copy segment */ + first_copy_bytes = + ((nitems - f->head) < total_copy_bytes) ? + (nitems - f->head) : total_copy_bytes; + clib_memcpy (copy_here, &f->data[f->head], first_copy_bytes); + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + clib_memcpy (copy_here + first_copy_bytes, &f->data[0], + second_copy_bytes); + } + } + return total_copy_bytes; +} + +int +svm_fifo_dequeue_drop (svm_fifo_t * f, int pid, u32 max_bytes) +{ + u32 total_drop_bytes, first_drop_bytes, second_drop_bytes; + u32 cursize, nitems; + + if (PREDICT_FALSE (f->cursize == 0)) + return -2; /* nothing in the fifo */ + + /* read cursize, which can only increase while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Number of bytes we're going to drop */ + total_drop_bytes = (cursize < max_bytes) ? cursize : max_bytes; + + /* Number of bytes in first copy segment */ + first_drop_bytes = + ((nitems - f->head) < total_drop_bytes) ? + (nitems - f->head) : total_drop_bytes; + f->head += first_drop_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + + /* Number of bytes in second drop segment, if any */ + second_drop_bytes = total_drop_bytes - first_drop_bytes; + if (second_drop_bytes) + { + f->head += second_drop_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + } + + __sync_fetch_and_sub (&f->cursize, total_drop_bytes); + + return total_drop_bytes; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h new file mode 100644 index 00000000..70624b74 --- /dev/null +++ b/src/svm/svm_fifo.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ssvm_fifo_h__ +#define __included_ssvm_fifo_h__ + +#include +#include +#include +#include +#include +#include +#include + +typedef enum +{ + SVM_FIFO_TAG_NOT_HELD = 0, + SVM_FIFO_TAG_DEQUEUE, + SVM_FIFO_TAG_ENQUEUE, +} svm_lock_tag_t; + +/** Out-of-order segment */ +typedef struct +{ + u32 next; /**< Next linked-list element pool index */ + u32 prev; /**< Previous linked-list element pool index */ + + u32 fifo_position; /**< Start of segment, normalized*/ + u32 length; /**< Length of segment */ +} ooo_segment_t; + +#define OOO_SEGMENT_INVALID_INDEX ((u32)~0) + +typedef struct +{ + pthread_mutex_t mutex; /* 8 bytes */ + pthread_cond_t condvar; /* 8 bytes */ + u32 owner_pid; + svm_lock_tag_t tag; + volatile u32 cursize; + u32 nitems; + + /* Backpointers */ + u32 server_session_index; + u32 client_session_index; + u8 server_thread_index; + u8 client_thread_index; + CLIB_CACHE_LINE_ALIGN_MARK (end_shared); + u32 head; + CLIB_CACHE_LINE_ALIGN_MARK (end_consumer); + + /* producer */ + u32 tail; + + ooo_segment_t *ooo_segments; /**< Pool of ooo segments */ + u32 ooos_list_head; /**< Head of out-of-order linked-list */ + u32 ooos_newest; /**< Last segment to have been updated */ + + CLIB_CACHE_LINE_ALIGN_MARK (data); +} svm_fifo_t; + +static inline int +svm_fifo_lock (svm_fifo_t * f, u32 pid, u32 tag, int nowait) +{ + if (PREDICT_TRUE (nowait == 0)) + pthread_mutex_lock (&f->mutex); + else + { + if (pthread_mutex_trylock (&f->mutex)) + return -1; + } + f->owner_pid = pid; + f->tag = tag; + return 0; +} + +static inline void +svm_fifo_unlock (svm_fifo_t * f) +{ + f->owner_pid = 0; + f->tag = 0; + CLIB_MEMORY_BARRIER (); + pthread_mutex_unlock (&f->mutex); +} + +static inline u32 +svm_fifo_max_dequeue (svm_fifo_t * f) +{ + return f->cursize; +} + +static inline u32 +svm_fifo_max_enqueue (svm_fifo_t * f) +{ + return f->nitems - f->cursize; +} + +static inline u8 +svm_fifo_has_ooo_data (svm_fifo_t * f) +{ + return f->ooos_list_head != OOO_SEGMENT_INVALID_INDEX; +} + +svm_fifo_t *svm_fifo_create (u32 data_size_in_bytes); + +int svm_fifo_enqueue_nowait (svm_fifo_t * f, int pid, u32 max_bytes, + u8 * copy_from_here); + +int svm_fifo_enqueue_with_offset (svm_fifo_t * f, int pid, + u32 offset, u32 required_bytes, + u8 * copy_from_here); + +int svm_fifo_dequeue_nowait (svm_fifo_t * f, int pid, u32 max_bytes, + u8 * copy_here); + +int svm_fifo_peek (svm_fifo_t * f, int pid, u32 offset, u32 max_bytes, + u8 * copy_here); +int svm_fifo_dequeue_drop (svm_fifo_t * f, int pid, u32 max_bytes); + +always_inline ooo_segment_t * +svm_fifo_newest_ooo_segment (svm_fifo_t * f) +{ + return f->ooo_segments + f->ooos_newest; +} + +always_inline u32 +ooo_segment_offset (svm_fifo_t * f, ooo_segment_t * s) +{ + return ((f->nitems + s->fifo_position - f->tail) % f->nitems); +} + +always_inline u32 +ooo_segment_end_offset (svm_fifo_t * f, ooo_segment_t * s) +{ + return ((f->nitems + s->fifo_position + s->length - f->tail) % f->nitems); +} + +#endif /* __included_ssvm_fifo_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c new file mode 100644 index 00000000..acabb3bd --- /dev/null +++ b/src/svm/svm_fifo_segment.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +svm_fifo_segment_main_t svm_fifo_segment_main; + +/** (master) create an svm fifo segment */ +int +svm_fifo_segment_create (svm_fifo_segment_create_args_t * a) +{ + int rv; + svm_fifo_segment_private_t *s; + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + void *oldheap; + + /* Allocate a fresh segment */ + pool_get (sm->segments, s); + memset (s, 0, sizeof (*s)); + + s->ssvm.ssvm_size = a->segment_size; + s->ssvm.i_am_master = 1; + s->ssvm.my_pid = getpid (); + s->ssvm.name = (u8 *) a->segment_name; + s->ssvm.requested_va = sm->next_baseva; + + rv = ssvm_master_init (&s->ssvm, s - sm->segments); + + if (rv) + { + _vec_len (s) = vec_len (s) - 1; + return (rv); + } + + /* Note; requested_va updated due to seg base addr randomization */ + sm->next_baseva = s->ssvm.requested_va + a->segment_size; + + sh = s->ssvm.sh; + oldheap = ssvm_push_heap (sh); + + /* Set up svm_fifo_segment shared header */ + fsh = clib_mem_alloc (sizeof (*fsh)); + memset (fsh, 0, sizeof (*fsh)); + sh->opaque[0] = fsh; + s->h = fsh; + fsh->segment_name = format (0, "%s%c", a->segment_name, 0); + + /* Avoid vec_add1(...) failure when adding a fifo, etc. */ + vec_validate (fsh->fifos, 64); + _vec_len (fsh->fifos) = 0; + + ssvm_pop_heap (oldheap); + + sh->ready = 1; + a->new_segment_index = s - sm->segments; + return (0); +} + +/** (slave) attach to an svm fifo segment */ +int +svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a) +{ + int rv; + svm_fifo_segment_private_t *s; + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + + /* Allocate a fresh segment */ + pool_get (sm->segments, s); + + memset (s, 0, sizeof (*s)); + + s->ssvm.ssvm_size = a->segment_size; + s->ssvm.my_pid = getpid (); + s->ssvm.name = (u8 *) a->segment_name; + s->ssvm.requested_va = sm->next_baseva; + + rv = ssvm_slave_init (&s->ssvm, sm->timeout_in_seconds); + + if (rv) + { + _vec_len (s) = vec_len (s) - 1; + return (rv); + } + + /* Fish the segment header */ + sh = s->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + s->h = fsh; + + a->new_segment_index = s - sm->segments; + return (0); +} + +void +svm_fifo_segment_delete (svm_fifo_segment_private_t * s) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + ssvm_delete (&s->ssvm); + pool_put (sm->segments, s); +} + +svm_fifo_t * +svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s, + u32 data_size_in_bytes) +{ + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + svm_fifo_t *f; + void *oldheap; + + sh = s->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + oldheap = ssvm_push_heap (sh); + + /* Note: this can fail, in which case: create another segment */ + f = svm_fifo_create (data_size_in_bytes); + if (f == 0) + { + ssvm_pop_heap (oldheap); + return (0); + } + + vec_add1 (fsh->fifos, f); + + ssvm_pop_heap (oldheap); + return (f); +} + +void +svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, svm_fifo_t * f) +{ + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + void *oldheap; + int i; + + sh = s->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + oldheap = ssvm_push_heap (sh); + + for (i = 0; i < vec_len (fsh->fifos); i++) + { + if (fsh->fifos[i] == f) + { + vec_delete (fsh->fifos, 1, i); + goto found; + } + } + clib_warning ("fifo 0x%llx not found in fifo table...", f); + +found: + clib_mem_free (f); + ssvm_pop_heap (oldheap); +} + +void +svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + + sm->next_baseva = baseva; + sm->timeout_in_seconds = timeout_in_seconds; +} + +u32 +svm_fifo_segment_index (svm_fifo_segment_private_t * s) +{ + return s - svm_fifo_segment_main.segments; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo_segment.h b/src/svm/svm_fifo_segment.h new file mode 100644 index 00000000..793fa7c8 --- /dev/null +++ b/src/svm/svm_fifo_segment.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ssvm_fifo_segment_h__ +#define __included_ssvm_fifo_segment_h__ + +#include "svm_fifo.h" +#include "ssvm.h" + +typedef struct +{ + volatile svm_fifo_t **fifos; + u8 *segment_name; +} svm_fifo_segment_header_t; + +typedef struct +{ + ssvm_private_t ssvm; + svm_fifo_segment_header_t *h; +} svm_fifo_segment_private_t; + +typedef struct +{ + /** pool of segments */ + svm_fifo_segment_private_t *segments; + /* Where to put the next one */ + u64 next_baseva; + u32 timeout_in_seconds; +} svm_fifo_segment_main_t; + +extern svm_fifo_segment_main_t svm_fifo_segment_main; + +typedef struct +{ + char *segment_name; + u32 segment_size; + u32 new_segment_index; +} svm_fifo_segment_create_args_t; + +static inline svm_fifo_segment_private_t * +svm_fifo_get_segment (u32 segment_index) +{ + svm_fifo_segment_main_t *ssm = &svm_fifo_segment_main; + return vec_elt_at_index (ssm->segments, segment_index); +} + +#define foreach_ssvm_fifo_segment_api_error \ +_(OUT_OF_SPACE, "Out of space in segment", -200) + +typedef enum +{ +#define _(n,s,c) SSVM_FIFO_SEGMENT_API_ERROR_##n = c, + foreach_ssvm_fifo_segment_api_error +#undef _ +} ssvm_fifo_segment_api_error_enum_t; + +int svm_fifo_segment_create (svm_fifo_segment_create_args_t * a); +int svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a); +void svm_fifo_segment_delete (svm_fifo_segment_private_t * s); + +svm_fifo_t *svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s, + u32 data_size_in_bytes); +void svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, + svm_fifo_t * f); + +void svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds); + +u32 svm_fifo_segment_index (svm_fifo_segment_private_t * s); + +#endif /* __included_ssvm_fifo_segment_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/test_svm_fifo1.c b/src/svm/test_svm_fifo1.c new file mode 100644 index 00000000..355653df --- /dev/null +++ b/src/svm/test_svm_fifo1.c @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svm_fifo_segment.h" + +clib_error_t * +hello_world (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + int rv; + u8 *test_data; + u8 *retrieved_data = 0; + clib_error_t *error = 0; + int pid = getpid (); + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + + f = svm_fifo_segment_alloc_fifo (sp, 4096); + + if (f == 0) + return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed"); + + test_data = format (0, "Hello world%c", 0); + vec_validate (retrieved_data, vec_len (test_data) - 1); + + while (svm_fifo_max_enqueue (f) >= vec_len (test_data)) + svm_fifo_enqueue_nowait (f, pid, vec_len (test_data), test_data); + + while (svm_fifo_max_dequeue (f) >= vec_len (test_data)) + svm_fifo_dequeue_nowait (f, pid, vec_len (retrieved_data), + retrieved_data); + + while (svm_fifo_max_enqueue (f) >= vec_len (test_data)) + svm_fifo_enqueue_nowait (f, pid, vec_len (test_data), test_data); + + while (svm_fifo_max_dequeue (f) >= vec_len (test_data)) + svm_fifo_dequeue_nowait (f, pid, vec_len (retrieved_data), + retrieved_data); + + if (!memcmp (retrieved_data, test_data, vec_len (test_data))) + error = clib_error_return (0, "data test OK, got '%s'", retrieved_data); + else + error = clib_error_return (0, "data test FAIL!"); + + svm_fifo_segment_free_fifo (sp, f); + + return error; +} + +clib_error_t * +master (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + int rv; + u8 *test_data; + u8 *retrieved_data = 0; + int i; + int pid = getpid (); + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + + f = svm_fifo_segment_alloc_fifo (sp, 4096); + + if (f == 0) + return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed"); + + test_data = format (0, "Hello world%c", 0); + vec_validate (retrieved_data, vec_len (test_data) - 1); + + for (i = 0; i < 1000; i++) + svm_fifo_enqueue_nowait (f, pid, vec_len (test_data), test_data); + + return clib_error_return (0, "master (enqueue) done"); +} + +clib_error_t * +mempig (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + svm_fifo_t **flist = 0; + int rv; + int i; + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + + for (i = 0; i < 1000; i++) + { + f = svm_fifo_segment_alloc_fifo (sp, 4096); + if (f == 0) + break; + vec_add1 (flist, f); + } + + fformat (stdout, "Try #1: created %d fifos...\n", vec_len (flist)); + for (i = 0; i < vec_len (flist); i++) + { + f = flist[i]; + svm_fifo_segment_free_fifo (sp, f); + } + + _vec_len (flist) = 0; + + for (i = 0; i < 1000; i++) + { + f = svm_fifo_segment_alloc_fifo (sp, 4096); + if (f == 0) + break; + vec_add1 (flist, f); + } + + fformat (stdout, "Try #2: created %d fifos...\n", vec_len (flist)); + for (i = 0; i < vec_len (flist); i++) + { + f = flist[i]; + svm_fifo_segment_free_fifo (sp, f); + } + + return 0; +} + +clib_error_t * +offset (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + int rv; + u32 *test_data = 0; + u32 *recovered_data = 0; + int i; + int pid = getpid (); + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + + f = svm_fifo_segment_alloc_fifo (sp, 200 << 10); + + if (f == 0) + return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed"); + + for (i = 0; i < (3 * 1024); i++) + vec_add1 (test_data, i); + + /* Enqueue the first 1024 u32's */ + svm_fifo_enqueue_nowait (f, pid, 4096 /* bytes to enqueue */ , + (u8 *) test_data); + + /* Enqueue the third 1024 u32's 2048 ahead of the current tail */ + svm_fifo_enqueue_with_offset (f, pid, 4096, 4096, (u8 *) & test_data[2048]); + + /* Enqueue the second 1024 u32's at the current tail */ + svm_fifo_enqueue_nowait (f, pid, 4096 /* bytes to enqueue */ , + (u8 *) & test_data[1024]); + + vec_validate (recovered_data, (3 * 1024) - 1); + + svm_fifo_dequeue_nowait (f, pid, 3 * 4096, (u8 *) recovered_data); + + for (i = 0; i < (3 * 1024); i++) + { + if (recovered_data[i] != test_data[i]) + { + clib_warning ("[%d] expected %d recovered %d", i, + test_data[i], recovered_data[i]); + return clib_error_return (0, "offset test FAILED"); + } + } + + return clib_error_return (0, "offset test OK"); +} + +clib_error_t * +slave (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_segment_header_t *fsh; + svm_fifo_t *f; + ssvm_shared_header_t *sh; + int rv; + u8 *test_data; + u8 *retrieved_data = 0; + int pid = getpid (); + int i; + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + + rv = svm_fifo_segment_attach (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_attach returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + sh = sp->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + /* might wanna wait.. */ + f = (svm_fifo_t *) fsh->fifos[0]; + + /* Lazy bastards united */ + test_data = format (0, "Hello world%c", 0); + vec_validate (retrieved_data, vec_len (test_data) - 1); + + for (i = 0; i < 1000; i++) + { + svm_fifo_dequeue_nowait (f, pid, vec_len (retrieved_data), + retrieved_data); + if (memcmp (retrieved_data, test_data, vec_len (retrieved_data))) + return clib_error_return (0, "retrieved data incorrect, '%s'", + retrieved_data); + } + + return clib_error_return (0, "slave (dequeue) done"); +} + + +int +test_ssvm_fifo1 (unformat_input_t * input) +{ + clib_error_t *error = 0; + int verbose = 0; + int test_id = 0; + + svm_fifo_segment_init (0x200000000ULL, 20); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose %d", &verbose)) + ; + else if (unformat (input, "verbose")) + verbose = 1; + else if (unformat (input, "master")) + test_id = 1; + else if (unformat (input, "slave")) + test_id = 2; + else if (unformat (input, "mempig")) + test_id = 3; + else if (unformat (input, "offset")) + test_id = 4; + else + { + error = clib_error_create ("unknown input `%U'\n", + format_unformat_error, input); + goto out; + } + } + + switch (test_id) + { + case 0: + error = hello_world (verbose); + break; + + case 1: + error = master (verbose); + break; + + case 2: + error = slave (verbose); + break; + + case 3: + error = mempig (verbose); + break; + + case 4: + error = offset (verbose); + break; + + default: + error = clib_error_return (0, "test id %d unknown", test_id); + break; + } + +out: + if (error) + clib_error_report (error); + + return 0; +} + + + +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + int r; + + unformat_init_command_line (&i, argv); + r = test_ssvm_fifo1 (&i); + unformat_free (&i); + return r; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/uri.am b/src/uri.am new file mode 100644 index 00000000..8cdd77c6 --- /dev/null +++ b/src/uri.am @@ -0,0 +1,22 @@ +# Copyright (c) 2016 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +noinst_PROGRAMS += uri_udp_test2 uri_tcp_test + +uri_udp_test2_SOURCES = uri/uri_udp_test2.c +uri_udp_test2_LDADD = libvlibmemoryclient.la libvlibapi.la libsvm.la \ + libvppinfra.la -lpthread -lm -lrt + +uri_tcp_test_SOURCES = uri/uri_tcp_test.c +uri_tcp_test_LDADD = libvlibmemoryclient.la libvlibapi.la libsvm.la \ + libvppinfra.la -lpthread -lm -lrt diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c new file mode 100644 index 00000000..ed5a37d8 --- /dev/null +++ b/src/uri/uri_tcp_test.c @@ -0,0 +1,916 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "../vnet/session/application_interface.h" + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Satisfy external references when not linking with -lvlib */ +vlib_main_t vlib_global_main; +vlib_main_t **vlib_mains; + +typedef struct +{ + svm_fifo_t * server_rx_fifo; + svm_fifo_t * server_tx_fifo; + + u32 vpp_session_index; + u32 vpp_session_thread; +} session_t; + +typedef enum +{ + STATE_START, + STATE_READY, + STATE_DISCONNECTING, + STATE_FAILED +} connection_state_t; + +typedef struct +{ + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* The URI we're playing with */ + u8 * uri; + + /* Session pool */ + session_t * sessions; + + /* Hash table for disconnect processing */ + uword * session_index_by_vpp_handles; + + /* intermediate rx buffer */ + u8 * rx_buf; + + /* URI for slave's connect */ + u8 * connect_uri; + + u32 connected_session_index; + + int i_am_master; + + /* drop all packets */ + int drop_packets; + + /* Our event queue */ + unix_shared_memory_queue_t * our_event_queue; + + /* $$$ single thread only for the moment */ + unix_shared_memory_queue_t * vpp_event_queue; + + pid_t my_pid; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile connection_state_t state; + + /* Signal variables */ + volatile int time_to_stop; + volatile int time_to_print_stats; + + u32 configured_segment_size; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword * error_string_by_error_number; + + /* convenience */ + svm_fifo_segment_main_t * segment_main; + + u8 *connect_test_data; +} uri_tcp_test_main_t; + +uri_tcp_test_main_t uri_tcp_test_main; + +#if CLIB_DEBUG > 0 +#define NITER 10000 +#else +#define NITER 4000000 +#endif + +int +wait_for_state_change (uri_tcp_test_main_t * utm, connection_state_t state) +{ +#if CLIB_DEBUG > 0 +#define TIMEOUT 600.0 +#else +#define TIMEOUT 600.0 +#endif + + f64 timeout = clib_time_now (&utm->clib_time) + TIMEOUT; + + while (clib_time_now (&utm->clib_time) < timeout) + { + if (utm->state == state) + return 0; + if (utm->state == STATE_FAILED) + return -1; + } + clib_warning ("timeout waiting for STATE_READY"); + return -1; +} + +static void +init_error_string_table (uri_tcp_test_main_t * utm) +{ + utm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (utm->error_string_by_error_number, 99, "Misc"); +} + +static void +stop_signal (int signum) +{ + uri_tcp_test_main_t *um = &uri_tcp_test_main; + + um->time_to_stop = 1; +} + +static void +stats_signal (int signum) +{ + uri_tcp_test_main_t *um = &uri_tcp_test_main; + + um->time_to_print_stats = 1; +} + +static clib_error_t * +setup_signal_handlers (void) +{ + signal (SIGINT, stats_signal); + signal (SIGQUIT, stop_signal); + signal (SIGTERM, stop_signal); + + return 0; +} + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG"); +} + +int +connect_to_vpp (char *name) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0) + return -1; + + utm->vl_input_queue = am->shmem_hdr->vl_input_queue; + utm->my_client_index = am->my_client_index; + + return 0; +} + +static void +vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t *mp) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", + mp->segment_name); + return; + } + clib_warning ("Mapped new segment '%s' size %d", mp->segment_name, + mp->segment_size); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + session_t * session; + vl_api_disconnect_session_reply_t * rmp; + uword * p; + int rv = 0; + u64 key; + + key = (((u64)mp->session_thread_index) << 32) | (u64)mp->session_index; + + p = hash_get (utm->session_index_by_vpp_handles, key); + + if (p) + { + session = pool_elt_at_index (utm->sessions, p[0]); + hash_unset (utm->session_index_by_vpp_handles, key); + pool_put (utm->sessions, session); + } + else + { + clib_warning ("couldn't find session key %llx", key); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = rv; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&rmp); +} + +static void +vl_api_reset_session_t_handler (vl_api_reset_session_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + session_t * session; + vl_api_reset_session_reply_t * rmp; + uword * p; + int rv = 0; + u64 key; + + key = (((u64)mp->session_thread_index) << 32) | (u64)mp->session_index; + + p = hash_get(utm->session_index_by_vpp_handles, key); + + if (p) + { + session = pool_elt_at_index(utm->sessions, p[0]); + hash_unset(utm->session_index_by_vpp_handles, key); + pool_put(utm->sessions, session); + } + else + { + clib_warning("couldn't find session key %llx", key); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = rv; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&rmp); +} + +void +handle_fifo_event_connect_rx (uri_tcp_test_main_t *utm, session_fifo_event_t * e) +{ + svm_fifo_t * rx_fifo; + int n_read, bytes; + + rx_fifo = e->fifo; + + bytes = e->enqueue_length; + do + { + n_read = svm_fifo_dequeue_nowait (rx_fifo, 0, vec_len(utm->rx_buf), + utm->rx_buf); + if (n_read > 0) + bytes -= n_read; + } + while (n_read < 0 || bytes > 0); + + // bytes_to_read = svm_fifo_max_dequeue (rx_fifo); + // + // bytes_to_read = vec_len(utm->rx_buf) > bytes_to_read ? + // bytes_to_read : vec_len(utm->rx_buf); + // + // buffer_offset = 0; + // while (bytes_to_read > 0) + // { + // rv = svm_fifo_dequeue_nowait2 (rx_fifo, mypid, + // bytes_to_read, + // utm->rx_buf + buffer_offset); + // if (rv > 0) + // { + // bytes_to_read -= rv; + // buffer_offset += rv; + // bytes_received += rv; + // } + // } + + + // while (bytes_received < bytes_sent) + // { + // rv = svm_fifo_dequeue_nowait2 (rx_fifo, mypid, + // vec_len (utm->rx_buf), + // utm->rx_buf); + // if (rv > 0) + // { + //#if CLIB_DEBUG > 0 + // int j; + // for (j = 0; j < rv; j++) + // { + // if (utm->rx_buf[j] != ((bytes_received + j) & 0xff)) + // { + // clib_warning ("error at byte %lld, 0x%x not 0x%x", + // bytes_received + j, + // utm->rx_buf[j], + // ((bytes_received + j )&0xff)); + // } + // } + //#endif + // bytes_received += (u64) rv; + // } + // } +} + +void +handle_connect_event_queue (uri_tcp_test_main_t * utm) +{ + session_fifo_event_t _e, *e = &_e;; + + unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e, 0 /* nowait */); + switch (e->event_type) + { + case FIFO_EVENT_SERVER_RX: + handle_fifo_event_connect_rx (utm, e); + break; + + case FIFO_EVENT_SERVER_EXIT: + return; + + default: + clib_warning("unknown event type %d", e->event_type); + break; + } +} + +void +uri_tcp_connect_send (uri_tcp_test_main_t *utm) +{ + u8 *test_data = utm->connect_test_data; + u64 bytes_sent = 0; + int rv; + int mypid = getpid(); + session_t * session; + svm_fifo_t *tx_fifo; + int buffer_offset, bytes_to_send = 0; + session_fifo_event_t evt; + static int serial_number = 0; + int i; + u32 max_chunk = 64 << 10, write; + + session = pool_elt_at_index (utm->sessions, utm->connected_session_index); + tx_fifo = session->server_tx_fifo; + + vec_validate (utm->rx_buf, vec_len (test_data) - 1); + + for (i = 0; i < 10; i++) + { + bytes_to_send = vec_len (test_data); + buffer_offset = 0; + while (bytes_to_send > 0) + { + write = bytes_to_send > max_chunk ? max_chunk : bytes_to_send; + rv = svm_fifo_enqueue_nowait (tx_fifo, mypid, write, + test_data + buffer_offset); + + if (rv > 0) + { + bytes_to_send -= rv; + buffer_offset += rv; + bytes_sent += rv; + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = rv; + evt.event_id = serial_number++; + + unix_shared_memory_queue_add (utm->vpp_event_queue, (u8 *) &evt, + 0 /* do wait for mutex */); + } + } + } +} + +static void +uri_tcp_client_test (uri_tcp_test_main_t * utm) +{ + vl_api_connect_uri_t * cmp; + vl_api_disconnect_session_t *dmp; + session_t *connected_session; + int i; + + cmp = vl_msg_api_alloc (sizeof (*cmp)); + memset (cmp, 0, sizeof (*cmp)); + + cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI); + cmp->client_index = utm->my_client_index; + cmp->context = ntohl(0xfeedface); + memcpy (cmp->uri, utm->connect_uri, vec_len (utm->connect_uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&cmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + return; + } + + /* Init test data */ + vec_validate (utm->connect_test_data, 64 * 1024 - 1); + for (i = 0; i < vec_len (utm->connect_test_data); i++) + utm->connect_test_data[i] = i & 0xff; + + /* Start reader thread */ + /* handle_connect_event_queue (utm); */ + + /* Start send */ + uri_tcp_connect_send (utm); + + /* Disconnect */ + connected_session = pool_elt_at_index(utm->sessions, + utm->connected_session_index); + dmp = vl_msg_api_alloc (sizeof (*dmp)); + memset (dmp, 0, sizeof (*dmp)); + dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION); + dmp->client_index = utm->my_client_index; + dmp->session_index = connected_session->vpp_session_index; + dmp->session_thread_index = connected_session->vpp_session_thread; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&dmp); +} + +void +handle_fifo_event_server_rx (uri_tcp_test_main_t *utm, session_fifo_event_t * e) +{ + svm_fifo_t * rx_fifo, * tx_fifo; + int n_read; + + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + int rv, bytes; + + rx_fifo = e->fifo; + tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo; + + bytes = e->enqueue_length; + do + { + n_read = svm_fifo_dequeue_nowait (rx_fifo, 0, vec_len(utm->rx_buf), + utm->rx_buf); + + /* Reflect if a non-drop session */ + if (!utm->drop_packets && n_read > 0) + { + do + { + rv = svm_fifo_enqueue_nowait (tx_fifo, 0, n_read, utm->rx_buf); + } + while (rv == -2); + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = n_read; + evt.event_id = e->event_id; + q = utm->vpp_event_queue; + unix_shared_memory_queue_add (q, (u8 *) &evt, 0 /* do wait for mutex */); + } + + if (n_read > 0) + bytes -= n_read; + } + while (n_read < 0 || bytes > 0); +} + +void +handle_event_queue (uri_tcp_test_main_t * utm) +{ + session_fifo_event_t _e, *e = &_e;; + + while (1) + { + unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *)e, + 0 /* nowait */); + switch (e->event_type) + { + case FIFO_EVENT_SERVER_RX: + handle_fifo_event_server_rx (utm, e); + break; + + case FIFO_EVENT_SERVER_EXIT: + return; + + default: + clib_warning ("unknown event type %d", e->event_type); + break; + } + if (PREDICT_FALSE(utm->time_to_stop == 1)) + break; + if (PREDICT_FALSE(utm->time_to_print_stats == 1)) + { + utm->time_to_print_stats = 0; + fformat(stdout, "%d connections\n", pool_elts (utm->sessions)); + } + } +} + +static void +vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + if (mp->retval) + { + clib_warning("bind failed: %d", mp->retval); + return; + } + + if (mp->segment_name_length == 0) + { + clib_warning("segment_name_length zero"); + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + ASSERT(mp->server_event_queue_address); + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning("svm_fifo_segment_attach ('%s') failed", mp->segment_name); + return; + } + + utm->our_event_queue = + (unix_shared_memory_queue_t *) mp->server_event_queue_address; + + utm->state = STATE_READY; +} + +static void +vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + session_t *session; + u32 session_index; + svm_fifo_t *rx_fifo, *tx_fifo; + int rv; + + if (mp->retval) + { + clib_warning ("connection failed with code: %d", mp->retval); + utm->state = STATE_FAILED; + return; + } + /* + * Attatch to segment + */ + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + utm->state = STATE_FAILED; + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + ASSERT(mp->client_event_queue_address); + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", + mp->segment_name); + return; + } + + /* + * Save the queues + */ + + utm->our_event_queue = (unix_shared_memory_queue_t *) + mp->client_event_queue_address; + + utm->vpp_event_queue = (unix_shared_memory_queue_t *) + mp->vpp_event_queue_address; + + /* + * Setup session + */ + + pool_get (utm->sessions, session); + session_index = session - utm->sessions; + + rx_fifo = (svm_fifo_t *)mp->server_rx_fifo; + rx_fifo->client_session_index = session_index; + tx_fifo = (svm_fifo_t *)mp->server_tx_fifo; + tx_fifo->client_session_index = session_index; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + session->vpp_session_index = mp->session_index; + session->vpp_session_thread = mp->session_thread_index; + + /* Save handle */ + utm->connected_session_index = session_index; + + utm->state = STATE_READY; +} + +void +uri_tcp_bind (uri_tcp_test_main_t *utm) +{ + vl_api_bind_uri_t * bmp; + u32 fifo_size = 3 << 20; + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_URI); + bmp->client_index = utm->my_client_index; + bmp->context = ntohl(0xfeedface); + bmp->initial_segment_size = 256<<20; /* size of initial segment */ + bmp->options[SESSION_OPTIONS_FLAGS] = + SESSION_OPTIONS_FLAGS_USE_FIFO | SESSION_OPTIONS_FLAGS_ADD_SEGMENT; + bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = fifo_size; + bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = fifo_size; + bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = 128<<20; + memcpy (bmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&bmp); +} + +static void +vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t *mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + + if (mp->retval != 0) + clib_warning ("returned %d", ntohl(mp->retval)); + + utm->state = STATE_START; +} + +void +uri_tcp_unbind (uri_tcp_test_main_t *utm) +{ + vl_api_unbind_uri_t * ump; + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI); + ump->client_index = utm->my_client_index; + memcpy (ump->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&ump); +} + +static void +vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + vl_api_accept_session_reply_t *rmp; + svm_fifo_t * rx_fifo, * tx_fifo; + session_t * session; + static f64 start_time; + u64 key; + u32 session_index; + + if (start_time == 0.0) + start_time = clib_time_now (&utm->clib_time); + + utm->vpp_event_queue = (unix_shared_memory_queue_t *) + mp->vpp_event_queue_address; + + /* Allocate local session and set it up */ + pool_get (utm->sessions, session); + session_index = session - utm->sessions; + + rx_fifo = (svm_fifo_t *)mp->server_rx_fifo; + rx_fifo->client_session_index = session_index; + tx_fifo = (svm_fifo_t *)mp->server_tx_fifo; + tx_fifo->client_session_index = session_index; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + + /* Add it to lookup table */ + key = (((u64)mp->session_thread_index) << 32) | (u64)mp->session_index; + hash_set (utm->session_index_by_vpp_handles, key, session_index); + + utm->state = STATE_READY; + + /* Stats printing */ + if (pool_elts (utm->sessions) && (pool_elts(utm->sessions) % 20000) == 0) + { + f64 now = clib_time_now (&utm->clib_time); + fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n", + pool_elts(utm->sessions), now - start_time, + (f64)pool_elts(utm->sessions) / (now - start_time)); + } + + /* Send accept reply to vpp */ + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY); + rmp->session_type = mp->session_type; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&rmp); +} + +void +uri_tcp_server_test (uri_tcp_test_main_t * utm) +{ + + /* Bind to uri */ + uri_tcp_bind (utm); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + /* Enter handle event loop */ + handle_event_queue (utm); + + /* Cleanup */ + uri_tcp_unbind (utm); + + if (wait_for_state_change (utm, STATE_START)) + { + clib_warning ("timeout waiting for STATE_START"); + return; + } + + fformat (stdout, "Test complete...\n"); +} + +#define foreach_uri_msg \ +_(BIND_URI_REPLY, bind_uri_reply) \ +_(UNBIND_URI_REPLY, unbind_uri_reply) \ +_(ACCEPT_SESSION, accept_session) \ +_(CONNECT_URI_REPLY, connect_uri_reply) \ +_(DISCONNECT_SESSION, disconnect_session) \ +_(RESET_SESSION, reset_session) \ +_(MAP_ANOTHER_SEGMENT, map_another_segment) + +void +uri_api_hookup (uri_tcp_test_main_t * utm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_uri_msg; +#undef _ +} + +int +main (int argc, char **argv) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + unformat_input_t _argv, *a = &_argv; + u8 *chroot_prefix; + u8 *heap; + u8 * bind_name = (u8 *) "tcp://0.0.0.0/1234"; + u32 tmp; + mheap_t *h; + session_t * session; + int i; + int i_am_master = 1, drop_packets = 0; + + clib_mem_init (0, 256 << 20); + + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + vec_validate (utm->rx_buf, 65536); + + utm->session_index_by_vpp_handles = + hash_create (0, sizeof(uword)); + + utm->my_pid = getpid(); + utm->configured_segment_size = 1<<20; + + clib_time_init (&utm->clib_time); + init_error_string_table (utm); + svm_fifo_segment_init(0x200000000ULL, 20); + unformat_init_command_line (a, argv); + + while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) + { + if (unformat (a, "chroot prefix %s", &chroot_prefix)) + { + vl_set_memory_root_path ((char *) chroot_prefix); + } + else if (unformat (a, "uri %s", &bind_name)) + ; + else if (unformat (a, "segment-size %dM", &tmp)) + utm->configured_segment_size = tmp<<20; + else if (unformat (a, "segment-size %dG", &tmp)) + utm->configured_segment_size = tmp<<30; + else if (unformat (a, "master")) + i_am_master = 1; + else if (unformat (a, "slave")) + i_am_master = 0; + else if (unformat (a, "drop")) + drop_packets = 1; + else + { + fformat (stderr, "%s: usage [master|slave]\n"); + exit (1); + } + } + + utm->uri = format (0, "%s%c", bind_name, 0); + utm->i_am_master = i_am_master; + utm->segment_main = &svm_fifo_segment_main; + utm->drop_packets = drop_packets; + + utm->connect_uri = format (0, "tcp://6.0.1.2/1234%c", 0); + + setup_signal_handlers(); + uri_api_hookup (utm); + + if (connect_to_vpp (i_am_master? "uri_tcp_server":"uri_tcp_client") < 0) + { + svm_region_exit (); + fformat (stderr, "Couldn't connect to vpe, exiting...\n"); + exit (1); + } + + if (i_am_master == 0) + { + uri_tcp_client_test (utm); + exit (0); + } + + /* $$$$ hack preallocation */ + for (i = 0; i < 200000; i++) + { + pool_get (utm->sessions, session); + memset (session, 0, sizeof (*session)); + } + for (i = 0; i < 200000; i++) + pool_put_index (utm->sessions, i); + + uri_tcp_server_test (utm); + + vl_client_disconnect_from_vlib (); + exit (0); +} diff --git a/src/uri/uri_udp_test.c b/src/uri/uri_udp_test.c new file mode 100644 index 00000000..6f5284c9 --- /dev/null +++ b/src/uri/uri_udp_test.c @@ -0,0 +1,553 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Satisfy external references when not linking with -lvlib */ +vlib_main_t vlib_global_main; +vlib_main_t **vlib_mains; + +typedef enum +{ + STATE_START, + STATE_READY, + STATE_DISCONNECTING, +} connection_state_t; + +typedef struct +{ + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; +} session_t; + +typedef struct +{ + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* The URI we're playing with */ + u8 *uri; + + /* Session pool */ + session_t *sessions; + + /* Hash table for disconnect processing */ + uword *session_index_by_vpp_handles; + + /* fifo segment */ + svm_fifo_segment_private_t *seg; + + /* intermediate rx buffer */ + u8 *rx_buf; + + /* Our event queue */ + unix_shared_memory_queue_t *our_event_queue; + + /* $$$ single thread only for the moment */ + unix_shared_memory_queue_t *vpp_event_queue; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile connection_state_t state; + + volatile int time_to_stop; + volatile int time_to_print_stats; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword *error_string_by_error_number; +} uri_udp_test_main_t; + +#if CLIB_DEBUG > 0 +#define NITER 1000 +#else +#define NITER 1000000 +#endif + +uri_udp_test_main_t uri_udp_test_main; + +static void +stop_signal (int signum) +{ + uri_udp_test_main_t *um = &uri_udp_test_main; + + um->time_to_stop = 1; +} + +static void +stats_signal (int signum) +{ + uri_udp_test_main_t *um = &uri_udp_test_main; + + um->time_to_print_stats = 1; +} + +static clib_error_t * +setup_signal_handlers (void) +{ + signal (SIGINT, stats_signal); + signal (SIGQUIT, stop_signal); + signal (SIGTERM, stop_signal); + + return 0; +} + +u8 * +format_api_error (u8 * s, va_list * args) +{ + uri_udp_test_main_t *utm = va_arg (*args, uri_udp_test_main_t *); + i32 error = va_arg (*args, u32); + uword *p; + + p = hash_get (utm->error_string_by_error_number, -error); + + if (p) + s = format (s, "%s", p[0]); + else + s = format (s, "%d", error); + return s; +} + +int +wait_for_state_change (uri_udp_test_main_t * utm, connection_state_t state) +{ + f64 timeout = clib_time_now (&utm->clib_time) + 5.0; + + while (clib_time_now (&utm->clib_time) < timeout) + { + if (utm->state == state) + return 0; + } + return -1; +} + +static void +vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + a->segment_name = (char *) mp->segment_name; + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%s') failed", mp->segment_name); + return; + } + + utm->our_event_queue = (unix_shared_memory_queue_t *) + mp->server_event_queue_address; + + utm->state = STATE_READY; +} + +static void +vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + + if (mp->retval != 0) + clib_warning ("returned %d", ntohl (mp->retval)); + + utm->state = STATE_START; +} + +static void +vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + vl_api_accept_session_reply_t *rmp; + svm_fifo_t *rx_fifo, *tx_fifo; + session_t *session; + static f64 start_time; + u64 key; + + if (start_time == 0.0) + start_time = clib_time_now (&utm->clib_time); + + utm->vpp_event_queue = (unix_shared_memory_queue_t *) + mp->vpp_event_queue_address; + + pool_get (utm->sessions, session); + + rx_fifo = (svm_fifo_t *) mp->server_rx_fifo; + rx_fifo->client_session_index = session - utm->sessions; + tx_fifo = (svm_fifo_t *) mp->server_tx_fifo; + tx_fifo->client_session_index = session - utm->sessions; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + + key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index; + + hash_set (utm->session_index_by_vpp_handles, key, session - utm->sessions); + + utm->state = STATE_READY; + + if (pool_elts (utm->sessions) && (pool_elts (utm->sessions) % 20000) == 0) + { + f64 now = clib_time_now (&utm->clib_time); + fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n", + pool_elts (utm->sessions), now - start_time, + (f64) pool_elts (utm->sessions) / (now - start_time)); + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY); + rmp->session_type = mp->session_type; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + session_t *session; + vl_api_disconnect_session_reply_t *rmp; + uword *p; + int rv = 0; + u64 key; + + key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index; + + p = hash_get (utm->session_index_by_vpp_handles, key); + + if (p) + { + session = pool_elt_at_index (utm->sessions, p[0]); + hash_unset (utm->session_index_by_vpp_handles, key); + pool_put (utm->sessions, session); + } + else + { + clib_warning ("couldn't find session key %llx", key); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = rv; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp); +} + +#define foreach_uri_msg \ +_(BIND_URI_REPLY, bind_uri_reply) \ +_(UNBIND_URI_REPLY, unbind_uri_reply) \ +_(ACCEPT_SESSION, accept_session) \ +_(DISCONNECT_SESSION, disconnect_session) + +void +uri_api_hookup (uri_udp_test_main_t * utm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_uri_msg; +#undef _ + +} + + +int +connect_to_vpp (char *name) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0) + return -1; + + utm->vl_input_queue = am->shmem_hdr->vl_input_queue; + utm->my_client_index = am->my_client_index; + + return 0; +} + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG"); +} + +static void +init_error_string_table (uri_udp_test_main_t * utm) +{ + utm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (utm->error_string_by_error_number, 99, "Misc"); +} + +void +handle_fifo_event_server_rx (uri_udp_test_main_t * utm, + session_fifo_event_t * e) +{ + svm_fifo_t *rx_fifo, *tx_fifo; + int nbytes; + + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + int rv; + + rx_fifo = e->fifo; + tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo; + + do + { + nbytes = svm_fifo_dequeue_nowait (rx_fifo, 0, + vec_len (utm->rx_buf), utm->rx_buf); + } + while (nbytes <= 0); + do + { + rv = svm_fifo_enqueue_nowait (tx_fifo, 0, nbytes, utm->rx_buf); + } + while (rv == -2); + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = nbytes; + evt.event_id = e->event_id; + q = utm->vpp_event_queue; + unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* do wait for mutex */ ); +} + +void +handle_event_queue (uri_udp_test_main_t * utm) +{ + session_fifo_event_t _e, *e = &_e;; + + while (1) + { + unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e, + 0 /* nowait */ ); + switch (e->event_type) + { + case FIFO_EVENT_SERVER_RX: + handle_fifo_event_server_rx (utm, e); + break; + + case FIFO_EVENT_SERVER_EXIT: + return; + + default: + clib_warning ("unknown event type %d", e->event_type); + break; + } + if (PREDICT_FALSE (utm->time_to_stop == 1)) + break; + if (PREDICT_FALSE (utm->time_to_print_stats == 1)) + { + utm->time_to_print_stats = 0; + fformat (stdout, "%d connections\n", pool_elts (utm->sessions)); + } + } +} + +void +uri_udp_test (uri_udp_test_main_t * utm) +{ + vl_api_bind_uri_t *bmp; + vl_api_unbind_uri_t *ump; + + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_URI); + bmp->client_index = utm->my_client_index; + bmp->context = ntohl (0xfeedface); + bmp->segment_size = 2 << 30; + memcpy (bmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + handle_event_queue (utm); + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI); + ump->client_index = utm->my_client_index; + memcpy (ump->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump); + + if (wait_for_state_change (utm, STATE_START)) + { + clib_warning ("timeout waiting for STATE_START"); + return; + } + + fformat (stdout, "Test complete...\n"); +} + +int +main (int argc, char **argv) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + unformat_input_t _argv, *a = &_argv; + u8 *chroot_prefix; + u8 *heap; + u8 *bind_name = (u8 *) "udp4:1234"; + mheap_t *h; + session_t *session; + int i; + + clib_mem_init (0, 256 << 20); + + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + vec_validate (utm->rx_buf, 8192); + + utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword)); + + clib_time_init (&utm->clib_time); + init_error_string_table (utm); + svm_fifo_segment_init (0x200000000ULL, 20); + unformat_init_command_line (a, argv); + + while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) + { + if (unformat (a, "chroot prefix %s", &chroot_prefix)) + { + vl_set_memory_root_path ((char *) chroot_prefix); + } + else if (unformat (a, "uri %s", &bind_name)) + ; + else + { + fformat (stderr, "%s: usage [master|slave]\n"); + exit (1); + } + } + + utm->uri = format (0, "%s%c", bind_name, 0); + + setup_signal_handlers (); + + uri_api_hookup (utm); + + if (connect_to_vpp ("uri_udp_test") < 0) + { + svm_region_exit (); + fformat (stderr, "Couldn't connect to vpe, exiting...\n"); + exit (1); + } + + /* $$$$ hack preallocation */ + for (i = 0; i < 200000; i++) + { + pool_get (utm->sessions, session); + memset (session, 0, sizeof (*session)); + } + for (i = 0; i < 200000; i++) + pool_put_index (utm->sessions, i); + + uri_udp_test (utm); + + vl_client_disconnect_from_vlib (); + exit (0); +} + +#undef vl_api_version +#define vl_api_version(n,v) static u32 vpe_api_version = v; +#include +#undef vl_api_version + +void +vl_client_add_api_signatures (vl_api_memclnt_create_t * mp) +{ + /* + * Send the main API signature in slot 0. This bit of code must + * match the checks in ../vpe/api/api.c: vl_msg_api_version_check(). + */ + mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/uri/uri_udp_test2.c b/src/uri/uri_udp_test2.c new file mode 100644 index 00000000..ddfffaa6 --- /dev/null +++ b/src/uri/uri_udp_test2.c @@ -0,0 +1,954 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../vnet/session/application_interface.h" + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Satisfy external references when not linking with -lvlib */ +vlib_main_t vlib_global_main; +vlib_main_t **vlib_mains; + +typedef enum +{ + STATE_START, + STATE_READY, + STATE_DISCONNECTING, +} connection_state_t; + +typedef struct +{ + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; +} session_t; + +typedef struct +{ + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* The URI we're playing with */ + u8 *uri; + + /* Session pool */ + session_t *sessions; + + /* Hash table for disconnect processing */ + uword *session_index_by_vpp_handles; + + /* fifo segment */ + svm_fifo_segment_private_t *seg; + + /* intermediate rx buffer */ + u8 *rx_buf; + + /* URI for connect */ + u8 *connect_uri; + + int i_am_master; + + /* Our event queue */ + unix_shared_memory_queue_t *our_event_queue; + + /* $$$ single thread only for the moment */ + unix_shared_memory_queue_t *vpp_event_queue; + + /* $$$$ hack: cut-through session index */ + volatile u32 cut_through_session_index; + + /* unique segment name counter */ + u32 unique_segment_index; + + pid_t my_pid; + + /* pthread handle */ + pthread_t cut_through_thread_handle; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile connection_state_t state; + + volatile int time_to_stop; + volatile int time_to_print_stats; + + u32 configured_segment_size; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword *error_string_by_error_number; + + /* convenience */ + svm_fifo_segment_main_t *segment_main; + +} uri_udp_test_main_t; + +#if CLIB_DEBUG > 0 +#define NITER 10000 +#else +#define NITER 4000000 +#endif + +uri_udp_test_main_t uri_udp_test_main; + +static void +stop_signal (int signum) +{ + uri_udp_test_main_t *um = &uri_udp_test_main; + + um->time_to_stop = 1; +} + +static void +stats_signal (int signum) +{ + uri_udp_test_main_t *um = &uri_udp_test_main; + + um->time_to_print_stats = 1; +} + +static clib_error_t * +setup_signal_handlers (void) +{ + signal (SIGINT, stats_signal); + signal (SIGQUIT, stop_signal); + signal (SIGTERM, stop_signal); + + return 0; +} + +u8 * +format_api_error (u8 * s, va_list * args) +{ + uri_udp_test_main_t *utm = va_arg (*args, uri_udp_test_main_t *); + i32 error = va_arg (*args, u32); + uword *p; + + p = hash_get (utm->error_string_by_error_number, -error); + + if (p) + s = format (s, "%s", p[0]); + else + s = format (s, "%d", error); + return s; +} + +int +wait_for_state_change (uri_udp_test_main_t * utm, connection_state_t state) +{ +#if CLIB_DEBUG > 0 +#define TIMEOUT 600.0 +#else +#define TIMEOUT 600.0 +#endif + + f64 timeout = clib_time_now (&utm->clib_time) + TIMEOUT; + + while (clib_time_now (&utm->clib_time) < timeout) + { + if (utm->state == state) + return 0; + } + return -1; +} + +u64 server_bytes_received, server_bytes_sent; + +static void * +cut_through_thread_fn (void *arg) +{ + session_t *s; + svm_fifo_t *rx_fifo; + svm_fifo_t *tx_fifo; + u8 *my_copy_buffer = 0; + uri_udp_test_main_t *utm = &uri_udp_test_main; + i32 actual_transfer; + int rv; + u32 buffer_offset; + + while (utm->cut_through_session_index == ~0) + ; + + s = pool_elt_at_index (utm->sessions, utm->cut_through_session_index); + + rx_fifo = s->server_rx_fifo; + tx_fifo = s->server_tx_fifo; + + vec_validate (my_copy_buffer, 64 * 1024 - 1); + + while (true) + { + /* We read from the tx fifo and write to the rx fifo */ + do + { + actual_transfer = svm_fifo_dequeue_nowait (tx_fifo, 0, + vec_len (my_copy_buffer), + my_copy_buffer); + } + while (actual_transfer <= 0); + + server_bytes_received += actual_transfer; + + buffer_offset = 0; + while (actual_transfer > 0) + { + rv = svm_fifo_enqueue_nowait (rx_fifo, 0, actual_transfer, + my_copy_buffer + buffer_offset); + if (rv > 0) + { + actual_transfer -= rv; + buffer_offset += rv; + server_bytes_sent += rv; + } + + } + if (PREDICT_FALSE (utm->time_to_stop)) + break; + } + + pthread_exit (0); +} + +static void +uri_udp_slave_test (uri_udp_test_main_t * utm) +{ + vl_api_connect_uri_t *cmp; + int i; + u8 *test_data = 0; + u64 bytes_received = 0, bytes_sent = 0; + i32 bytes_to_read; + int rv; + int mypid = getpid (); + f64 before, after, delta, bytes_per_second; + session_t *session; + svm_fifo_t *rx_fifo, *tx_fifo; + int buffer_offset, bytes_to_send = 0; + + vec_validate (test_data, 64 * 1024 - 1); + for (i = 0; i < vec_len (test_data); i++) + test_data[i] = i & 0xff; + + cmp = vl_msg_api_alloc (sizeof (*cmp)); + memset (cmp, 0, sizeof (*cmp)); + + cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI); + cmp->client_index = utm->my_client_index; + cmp->context = ntohl (0xfeedface); + memcpy (cmp->uri, utm->connect_uri, vec_len (utm->connect_uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & cmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + session = pool_elt_at_index (utm->sessions, utm->cut_through_session_index); + rx_fifo = session->server_rx_fifo; + tx_fifo = session->server_tx_fifo; + + before = clib_time_now (&utm->clib_time); + + vec_validate (utm->rx_buf, vec_len (test_data) - 1); + + for (i = 0; i < NITER; i++) + { + bytes_to_send = vec_len (test_data); + buffer_offset = 0; + while (bytes_to_send > 0) + { + rv = svm_fifo_enqueue_nowait (tx_fifo, mypid, + bytes_to_send, + test_data + buffer_offset); + + if (rv > 0) + { + bytes_to_send -= rv; + buffer_offset += rv; + bytes_sent += rv; + } + } + + bytes_to_read = svm_fifo_max_dequeue (rx_fifo); + + bytes_to_read = vec_len (utm->rx_buf) > bytes_to_read ? + bytes_to_read : vec_len (utm->rx_buf); + + buffer_offset = 0; + while (bytes_to_read > 0) + { + rv = svm_fifo_dequeue_nowait (rx_fifo, mypid, + bytes_to_read, + utm->rx_buf + buffer_offset); + if (rv > 0) + { + bytes_to_read -= rv; + buffer_offset += rv; + bytes_received += rv; + } + } + } + while (bytes_received < bytes_sent) + { + rv = svm_fifo_dequeue_nowait (rx_fifo, mypid, + vec_len (utm->rx_buf), utm->rx_buf); + if (rv > 0) + { +#if CLIB_DEBUG > 0 + int j; + for (j = 0; j < rv; j++) + { + if (utm->rx_buf[j] != ((bytes_received + j) & 0xff)) + { + clib_warning ("error at byte %lld, 0x%x not 0x%x", + bytes_received + j, + utm->rx_buf[j], + ((bytes_received + j) & 0xff)); + } + } +#endif + bytes_received += (u64) rv; + } + } + + after = clib_time_now (&utm->clib_time); + delta = after - before; + bytes_per_second = 0.0; + + if (delta > 0.0) + bytes_per_second = (f64) bytes_received / delta; + + fformat (stdout, + "Done: %lld recv bytes in %.2f seconds, %.2f bytes/sec...\n\n", + bytes_received, delta, bytes_per_second); + fformat (stdout, + "Done: %lld sent bytes in %.2f seconds, %.2f bytes/sec...\n\n", + bytes_sent, delta, bytes_per_second); + fformat (stdout, + "client -> server -> client round trip: %.2f Gbit/sec \n\n", + (bytes_per_second * 8.0) / 1e9); +} + +static void +vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + ASSERT (mp->server_event_queue_address); + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", + mp->segment_name); + return; + } + + utm->our_event_queue = (unix_shared_memory_queue_t *) + mp->server_event_queue_address; + + utm->state = STATE_READY; +} + +static void +vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", + mp->segment_name); + return; + } + clib_warning ("Mapped new segment '%s' size %d", mp->segment_name, + mp->segment_size); +} + +static void +vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp) +{ + u32 segment_index; + uri_udp_test_main_t *utm = &uri_udp_test_main; + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *seg; + unix_shared_memory_queue_t *client_q; + vl_api_connect_uri_reply_t *rmp; + session_t *session; + int rv = 0; + + /* Create the segment */ + a->segment_name = (char *) format (0, "%d:segment%d%c", utm->my_pid, + utm->unique_segment_index++, 0); + a->segment_size = utm->configured_segment_size; + + rv = svm_fifo_segment_create (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%s') failed", a->segment_name); + rv = VNET_API_ERROR_URI_FIFO_CREATE_FAILED; + goto send_reply; + } + + vec_add2 (utm->seg, seg, 1); + + segment_index = vec_len (sm->segments) - 1; + + memcpy (seg, sm->segments + segment_index, sizeof (utm->seg[0])); + + pool_get (utm->sessions, session); + + /* + * By construction the master's idea of the rx fifo ends up in + * fsh->fifos[0], and the master's idea of the tx fifo ends up in + * fsh->fifos[1]. + */ + session->server_rx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, + 128 * 1024); + ASSERT (session->server_rx_fifo); + + session->server_tx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, + 128 * 1024); + ASSERT (session->server_tx_fifo); + + session->server_rx_fifo->server_session_index = session - utm->sessions; + session->server_tx_fifo->server_session_index = session - utm->sessions; + utm->cut_through_session_index = session - utm->sessions; + + rv = pthread_create (&utm->cut_through_thread_handle, + NULL /*attr */ , cut_through_thread_fn, 0); + if (rv) + { + clib_warning ("pthread_create returned %d", rv); + rv = VNET_API_ERROR_SYSCALL_ERROR_1; + } + +send_reply: + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + + rmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI_REPLY); + rmp->context = mp->context; + rmp->retval = ntohl (rv); + rmp->segment_name_length = vec_len (a->segment_name); + memcpy (rmp->segment_name, a->segment_name, vec_len (a->segment_name)); + + vec_free (a->segment_name); + + client_q = (unix_shared_memory_queue_t *) mp->client_queue_address; + vl_msg_api_send_shmem (client_q, (u8 *) & rmp); +} + +static void +vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + + if (mp->retval != 0) + clib_warning ("returned %d", ntohl (mp->retval)); + + utm->state = STATE_START; +} + +static void +vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + vl_api_accept_session_reply_t *rmp; + svm_fifo_t *rx_fifo, *tx_fifo; + session_t *session; + static f64 start_time; + u64 key; + + if (start_time == 0.0) + start_time = clib_time_now (&utm->clib_time); + + utm->vpp_event_queue = (unix_shared_memory_queue_t *) + mp->vpp_event_queue_address; + + pool_get (utm->sessions, session); + + rx_fifo = (svm_fifo_t *) mp->server_rx_fifo; + rx_fifo->client_session_index = session - utm->sessions; + tx_fifo = (svm_fifo_t *) mp->server_tx_fifo; + tx_fifo->client_session_index = session - utm->sessions; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + + key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index; + + hash_set (utm->session_index_by_vpp_handles, key, session - utm->sessions); + + utm->state = STATE_READY; + + if (pool_elts (utm->sessions) && (pool_elts (utm->sessions) % 20000) == 0) + { + f64 now = clib_time_now (&utm->clib_time); + fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n", + pool_elts (utm->sessions), now - start_time, + (f64) pool_elts (utm->sessions) / (now - start_time)); + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY); + rmp->session_type = mp->session_type; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + session_t *session; + vl_api_disconnect_session_reply_t *rmp; + uword *p; + int rv = 0; + u64 key; + + key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index; + + p = hash_get (utm->session_index_by_vpp_handles, key); + + if (p) + { + session = pool_elt_at_index (utm->sessions, p[0]); + hash_unset (utm->session_index_by_vpp_handles, key); + pool_put (utm->sessions, session); + } + else + { + clib_warning ("couldn't find session key %llx", key); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = rv; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp); +} + +static void +vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + uri_udp_test_main_t *utm = &uri_udp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + ssvm_shared_header_t *sh; + svm_fifo_segment_private_t *seg; + svm_fifo_segment_header_t *fsh; + session_t *session; + u32 segment_index; + int rv; + + ASSERT (utm->i_am_master == 0); + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + memset (a, 0, sizeof (*a)); + + a->segment_name = (char *) mp->segment_name; + + sleep (1); + + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%v') failed", mp->segment_name); + return; + } + + segment_index = vec_len (sm->segments) - 1; + + vec_add2 (utm->seg, seg, 1); + + memcpy (seg, sm->segments + segment_index, sizeof (*seg)); + sh = seg->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + while (vec_len (fsh->fifos) < 2) + sleep (1); + + pool_get (utm->sessions, session); + utm->cut_through_session_index = session - utm->sessions; + + session->server_rx_fifo = (svm_fifo_t *) fsh->fifos[0]; + ASSERT (session->server_rx_fifo); + session->server_tx_fifo = (svm_fifo_t *) fsh->fifos[1]; + ASSERT (session->server_tx_fifo); + + /* security: could unlink /dev/shm/segment_name> here, maybe */ + + utm->state = STATE_READY; +} + +#define foreach_uri_msg \ +_(BIND_URI_REPLY, bind_uri_reply) \ +_(CONNECT_URI, connect_uri) \ +_(CONNECT_URI_REPLY, connect_uri_reply) \ +_(UNBIND_URI_REPLY, unbind_uri_reply) \ +_(ACCEPT_SESSION, accept_session) \ +_(DISCONNECT_SESSION, disconnect_session) \ +_(MAP_ANOTHER_SEGMENT, map_another_segment) + +void +uri_api_hookup (uri_udp_test_main_t * utm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_uri_msg; +#undef _ + +} + + +int +connect_to_vpp (char *name) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0) + return -1; + + utm->vl_input_queue = am->shmem_hdr->vl_input_queue; + utm->my_client_index = am->my_client_index; + + return 0; +} + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG"); +} + +static void +init_error_string_table (uri_udp_test_main_t * utm) +{ + utm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (utm->error_string_by_error_number, 99, "Misc"); +} + +void +handle_fifo_event_server_rx (uri_udp_test_main_t * utm, + session_fifo_event_t * e) +{ + svm_fifo_t *rx_fifo, *tx_fifo; + int nbytes; + + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + int rv; + + rx_fifo = e->fifo; + tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo; + + do + { + nbytes = svm_fifo_dequeue_nowait (rx_fifo, 0, + vec_len (utm->rx_buf), utm->rx_buf); + } + while (nbytes <= 0); + do + { + rv = svm_fifo_enqueue_nowait (tx_fifo, 0, nbytes, utm->rx_buf); + } + while (rv == -2); + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = nbytes; + evt.event_id = e->event_id; + q = utm->vpp_event_queue; + unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* do wait for mutex */ ); +} + +void +handle_event_queue (uri_udp_test_main_t * utm) +{ + session_fifo_event_t _e, *e = &_e;; + + while (1) + { + unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e, + 0 /* nowait */ ); + switch (e->event_type) + { + case FIFO_EVENT_SERVER_RX: + handle_fifo_event_server_rx (utm, e); + break; + + case FIFO_EVENT_SERVER_EXIT: + return; + + default: + clib_warning ("unknown event type %d", e->event_type); + break; + } + if (PREDICT_FALSE (utm->time_to_stop == 1)) + break; + if (PREDICT_FALSE (utm->time_to_print_stats == 1)) + { + utm->time_to_print_stats = 0; + fformat (stdout, "%d connections\n", pool_elts (utm->sessions)); + } + } +} + +void +uri_udp_test (uri_udp_test_main_t * utm) +{ + vl_api_bind_uri_t *bmp; + vl_api_unbind_uri_t *ump; + + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_URI); + bmp->client_index = utm->my_client_index; + bmp->context = ntohl (0xfeedface); + bmp->initial_segment_size = 256 << 20; /* size of initial segment */ + bmp->options[SESSION_OPTIONS_FLAGS] = + SESSION_OPTIONS_FLAGS_USE_FIFO | SESSION_OPTIONS_FLAGS_ADD_SEGMENT; + bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = 16 << 10; + bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = 16 << 10; + bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = 128 << 20; + memcpy (bmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + handle_event_queue (utm); + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI); + ump->client_index = utm->my_client_index; + memcpy (ump->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump); + + if (wait_for_state_change (utm, STATE_START)) + { + clib_warning ("timeout waiting for STATE_START"); + return; + } + + fformat (stdout, "Test complete...\n"); +} + +int +main (int argc, char **argv) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + unformat_input_t _argv, *a = &_argv; + u8 *chroot_prefix; + u8 *heap; + u8 *bind_name = (u8 *) "udp://0.0.0.0/1234"; + u32 tmp; + mheap_t *h; + session_t *session; + int i; + int i_am_master = 1; + + clib_mem_init (0, 256 << 20); + + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + vec_validate (utm->rx_buf, 8192); + + utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword)); + + utm->my_pid = getpid (); + utm->configured_segment_size = 1 << 20; + + clib_time_init (&utm->clib_time); + init_error_string_table (utm); + svm_fifo_segment_init (0x200000000ULL, 20); + unformat_init_command_line (a, argv); + + while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) + { + if (unformat (a, "chroot prefix %s", &chroot_prefix)) + { + vl_set_memory_root_path ((char *) chroot_prefix); + } + else if (unformat (a, "uri %s", &bind_name)) + ; + else if (unformat (a, "segment-size %dM", &tmp)) + utm->configured_segment_size = tmp << 20; + else if (unformat (a, "segment-size %dG", &tmp)) + utm->configured_segment_size = tmp << 30; + else if (unformat (a, "master")) + i_am_master = 1; + else if (unformat (a, "slave")) + i_am_master = 0; + else + { + fformat (stderr, "%s: usage [master|slave]\n"); + exit (1); + } + } + + utm->cut_through_session_index = ~0; + utm->uri = format (0, "%s%c", bind_name, 0); + utm->i_am_master = i_am_master; + utm->segment_main = &svm_fifo_segment_main; + + utm->connect_uri = format (0, "udp://10.0.0.1/1234%c", 0); + + setup_signal_handlers (); + + uri_api_hookup (utm); + + if (connect_to_vpp (i_am_master ? "uri_udp_master" : "uri_udp_slave") < 0) + { + svm_region_exit (); + fformat (stderr, "Couldn't connect to vpe, exiting...\n"); + exit (1); + } + + if (i_am_master == 0) + { + uri_udp_slave_test (utm); + exit (0); + } + + /* $$$$ hack preallocation */ + for (i = 0; i < 200000; i++) + { + pool_get (utm->sessions, session); + memset (session, 0, sizeof (*session)); + } + for (i = 0; i < 200000; i++) + pool_put_index (utm->sessions, i); + + uri_udp_test (utm); + + vl_client_disconnect_from_vlib (); + exit (0); +} + +#undef vl_api_version +#define vl_api_version(n,v) static u32 vpe_api_version = v; +#include +#undef vl_api_version + +void +vl_client_add_api_signatures (vl_api_memclnt_create_t * mp) +{ + /* + * Send the main API signature in slot 0. This bit of code must + * match the checks in ../vpe/api/api.c: vl_msg_api_version_check(). + */ + mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version); +} + +u32 +vl (void *p) +{ + return vec_len (p); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/uri/uritest.c b/src/uri/uritest.c new file mode 100644 index 00000000..edcdb3ad --- /dev/null +++ b/src/uri/uritest.c @@ -0,0 +1,484 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +typedef enum +{ + STATE_START, + STATE_READY, + STATE_DISCONNECTING, +} connection_state_t; + +typedef struct +{ + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* role */ + int i_am_master; + + /* The URI we're playing with */ + u8 *uri; + + /* fifo segment */ + svm_fifo_segment_private_t *seg; + + svm_fifo_t *rx_fifo; + svm_fifo_t *tx_fifo; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile connection_state_t state; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword *error_string_by_error_number; +} uritest_main_t; + +#if CLIB_DEBUG > 0 +#define NITER 1000 +#else +#define NITER 1000000 +#endif + +uritest_main_t uritest_main; + +u8 * +format_api_error (u8 * s, va_list * args) +{ + uritest_main_t *utm = va_arg (*args, uritest_main_t *); + i32 error = va_arg (*args, u32); + uword *p; + + p = hash_get (utm->error_string_by_error_number, -error); + + if (p) + s = format (s, "%s", p[0]); + else + s = format (s, "%d", error); + return s; +} + +int +wait_for_state_change (uritest_main_t * utm, connection_state_t state) +{ + f64 timeout = clib_time_now (&utm->clib_time) + 1.0; + + while (clib_time_now (&utm->clib_time) < timeout) + { + if (utm->state == state) + return 0; + } + return -1; +} + +static void +vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp) +{ + uritest_main_t *utm = &uritest_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + ASSERT (utm->i_am_master); + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + /* Create the segment */ + rv = svm_fifo_segment_create (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%s') failed", mp->segment_name); + return; + } + + vec_validate (utm->seg, 0); + + memcpy (utm->seg, a->rv, sizeof (*utm->seg)); + + /* + * By construction the master's idea of the rx fifo ends up in + * fsh->fifos[0], and the master's idea of the tx fifo ends up in + * fsh->fifos[1]. + */ + utm->rx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, 10240); + ASSERT (utm->rx_fifo); + + utm->tx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, 10240); + ASSERT (utm->tx_fifo); + + utm->state = STATE_READY; +} + +static void +vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) +{ + uritest_main_t *utm = &uritest_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + int rv; + + ASSERT (utm->i_am_master == 0); + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + memset (a, 0, sizeof (*a)); + + a->segment_name = (char *) mp->segment_name; + + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%s') failed", mp->segment_name); + return; + } + + vec_validate (utm->seg, 0); + + memcpy (utm->seg, a->rv, sizeof (*utm->seg)); + sh = utm->seg->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + while (vec_len (fsh->fifos) < 2) + sleep (1); + + utm->rx_fifo = (svm_fifo_t *) fsh->fifos[1]; + ASSERT (utm->rx_fifo); + utm->tx_fifo = (svm_fifo_t *) fsh->fifos[0]; + ASSERT (utm->tx_fifo); + + /* security: could unlink /dev/shm/segment_name> here, maybe */ + + utm->state = STATE_READY; +} + +static void +vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp) +{ + uritest_main_t *utm = &uritest_main; + + if (mp->retval != 0) + clib_warning ("returned %d", ntohl (mp->retval)); + + utm->state = STATE_START; +} + +#define foreach_uri_msg \ +_(BIND_URI_REPLY, bind_uri_reply) \ +_(CONNECT_URI_REPLY, connect_uri_reply) \ +_(UNBIND_URI_REPLY, unbind_uri_reply) + +void +uri_api_hookup (uritest_main_t * utm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_uri_msg; +#undef _ + +} + + +int +connect_to_vpp (char *name) +{ + uritest_main_t *utm = &uritest_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0) + return -1; + + utm->vl_input_queue = am->shmem_hdr->vl_input_queue; + utm->my_client_index = am->my_client_index; + + return 0; +} + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG"); +} + +static void +init_error_string_table (uritest_main_t * utm) +{ + utm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (utm->error_string_by_error_number, 99, "Misc"); +} + +void +uritest_master (uritest_main_t * utm) +{ + vl_api_bind_uri_t *bmp; + vl_api_unbind_uri_t *ump; + int i; + u8 *test_data = 0; + u8 *reply = 0; + u32 reply_len; + int mypid = getpid (); + + for (i = 0; i < 2048; i++) + vec_add1 (test_data, 'a' + (i % 32)); + + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_URI); + bmp->client_index = utm->my_client_index; + bmp->context = ntohl (0xfeedface); + bmp->segment_size = 256 << 10; + memcpy (bmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + for (i = 0; i < NITER; i++) + svm_fifo_enqueue (utm->tx_fifo, mypid, vec_len (test_data), test_data); + + vec_validate (reply, 0); + + reply_len = svm_fifo_dequeue (utm->rx_fifo, mypid, vec_len (reply), reply); + + if (reply_len != 1) + clib_warning ("reply length %d", reply_len); + + if (reply[0] == 1) + fformat (stdout, "Test OK..."); + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI); + ump->client_index = utm->my_client_index; + memcpy (ump->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump); + + if (wait_for_state_change (utm, STATE_START)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + fformat (stdout, "Master done...\n"); +} + +void +uritest_slave (uritest_main_t * utm) +{ + vl_api_connect_uri_t *cmp; + int i, j; + u8 *test_data = 0; + u8 *reply = 0; + u32 bytes_received = 0; + u32 actual_bytes; + int mypid = getpid (); + u8 ok; + f64 before, after, delta, bytes_per_second; + + vec_validate (test_data, 4095); + + cmp = vl_msg_api_alloc (sizeof (*cmp)); + memset (cmp, 0, sizeof (*cmp)); + + cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI); + cmp->client_index = utm->my_client_index; + cmp->context = ntohl (0xfeedface); + memcpy (cmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & cmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + ok = 1; + before = clib_time_now (&utm->clib_time); + for (i = 0; i < NITER; i++) + { + actual_bytes = svm_fifo_dequeue (utm->rx_fifo, mypid, + vec_len (test_data), test_data); + j = 0; + while (j < actual_bytes) + { + if (test_data[j] != ('a' + (bytes_received % 32))) + ok = 0; + bytes_received++; + j++; + } + if (bytes_received == NITER * 2048) + break; + } + + vec_add1 (reply, ok); + + svm_fifo_enqueue (utm->tx_fifo, mypid, vec_len (reply), reply); + after = clib_time_now (&utm->clib_time); + delta = after - before; + bytes_per_second = 0.0; + + if (delta > 0.0) + bytes_per_second = (f64) bytes_received / delta; + + fformat (stdout, + "Slave done, %d bytes in %.2f seconds, %.2f bytes/sec...\n", + bytes_received, delta, bytes_per_second); +} + +int +main (int argc, char **argv) +{ + uritest_main_t *utm = &uritest_main; + unformat_input_t _argv, *a = &_argv; + u8 *chroot_prefix; + u8 *heap; + char *bind_name = "fifo:uritest"; + mheap_t *h; + int i_am_master = 0; + + clib_mem_init (0, 128 << 20); + + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + clib_time_init (&utm->clib_time); + init_error_string_table (utm); + svm_fifo_segment_init (0x200000000ULL, 20); + unformat_init_command_line (a, argv); + + utm->uri = format (0, "%s%c", bind_name, 0); + + while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) + { + if (unformat (a, "master")) + i_am_master = 1; + else if (unformat (a, "slave")) + i_am_master = 0; + else if (unformat (a, "chroot prefix %s", &chroot_prefix)) + { + vl_set_memory_root_path ((char *) chroot_prefix); + } + else + { + fformat (stderr, "%s: usage [master|slave]\n"); + exit (1); + } + } + + uri_api_hookup (utm); + + if (connect_to_vpp (i_am_master ? "uritest_master" : "uritest_slave") < 0) + { + svm_region_exit (); + fformat (stderr, "Couldn't connect to vpe, exiting...\n"); + exit (1); + } + + utm->i_am_master = i_am_master; + + if (i_am_master) + uritest_master (utm); + else + uritest_slave (utm); + + vl_client_disconnect_from_vlib (); + exit (0); +} + +#undef vl_api_version +#define vl_api_version(n,v) static u32 vpe_api_version = v; +#include +#undef vl_api_version + +void +vl_client_add_api_signatures (vl_api_memclnt_create_t * mp) +{ + /* + * Send the main API signature in slot 0. This bit of code must + * match the checks in ../vpe/api/api.c: vl_msg_api_version_check(). + */ + mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 4f5eb09d..9f26bec7 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -360,7 +360,7 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, memset (f, 0, sizeof (f[0])); f->index = f - bm->buffer_free_list_pool; f->n_data_bytes = vlib_buffer_round_size (n_data_bytes); - f->min_n_buffers_each_physmem_alloc = 16; + f->min_n_buffers_each_physmem_alloc = VLIB_FRAME_SIZE; f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name); /* Setup free buffer template. */ diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index 1f723f3b..69c8c7cc 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -240,6 +240,74 @@ vlib_get_buffer_opaque2 (vlib_buffer_t * b) return (void *) b->opaque2; } +/** \brief Get pointer to the end of buffer's data + * @param b pointer to the buffer + * @return pointer to tail of packet's data + */ +always_inline u8 * +vlib_buffer_get_tail (vlib_buffer_t * b) +{ + return b->data + b->current_data + b->current_length; +} + +/** \brief Append uninitialized data to buffer + * @param b pointer to the buffer + * @param size number of uninitialized bytes + * @return pointer to beginning of uninitialized data + */ +always_inline void * +vlib_buffer_put_uninit (vlib_buffer_t * b, u8 size) +{ + void *p = vlib_buffer_get_tail (b); + /* XXX make sure there's enough space */ + b->current_length += size; + return p; +} + +/** \brief Prepend uninitialized data to buffer + * @param b pointer to the buffer + * @param size number of uninitialized bytes + * @return pointer to beginning of uninitialized data + */ +always_inline void * +vlib_buffer_push_uninit (vlib_buffer_t * b, u8 size) +{ + ASSERT (b->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= size); + b->current_data -= size; + b->current_length += size; + + return vlib_buffer_get_current (b); +} + +/** \brief Make head room, typically for packet headers + * @param b pointer to the buffer + * @param size number of head room bytes + * @return pointer to start of buffer (current data) + */ +always_inline void * +vlib_buffer_make_headroom (vlib_buffer_t * b, u8 size) +{ + ASSERT (b->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= size); + b->current_data += size; + return vlib_buffer_get_current (b); +} + +/** \brief Retrieve bytes from buffer head + * @param b pointer to the buffer + * @param size number of bytes to pull + * @return pointer to start of buffer (current data) + */ +always_inline void * +vlib_buffer_pull (vlib_buffer_t * b, u8 size) +{ + if (b->current_length + VLIB_BUFFER_PRE_DATA_SIZE < size) + return 0; + + void *data = vlib_buffer_get_current (b); + vlib_buffer_advance (b, size); + return data; +} + /* Forward declaration. */ struct vlib_main_t; diff --git a/src/vlibmemory/unix_shared_memory_queue.c b/src/vlibmemory/unix_shared_memory_queue.c index 25d28910..e86edec3 100644 --- a/src/vlibmemory/unix_shared_memory_queue.c +++ b/src/vlibmemory/unix_shared_memory_queue.c @@ -33,18 +33,13 @@ * nels = number of elements on the queue * elsize = element size, presumably 4 and cacheline-size will * be popular choices. - * coid = consumer coid, from ChannelCreate * pid = consumer pid - * pulse_code = pulse code consumer expects - * pulse_value = pulse value consumer expects - * consumer_prio = consumer's priority, so pulses won't change - * the consumer's priority. * * The idea is to call this function in the queue consumer, * and e-mail the queue pointer to the producer(s). * - * The spp process / main thread allocates one of these - * at startup; its main input queue. The spp main input queue + * The vpp process / main thread allocates one of these + * at startup; its main input queue. The vpp main input queue * has a pointer to it in the shared memory segment header. * * You probably want to be on an svm data heap before calling this @@ -70,7 +65,7 @@ unix_shared_memory_queue_init (int nels, q->signal_when_queue_non_empty = signal_when_queue_non_empty; memset (&attr, 0, sizeof (attr)); - memset (&cattr, 0, sizeof (attr)); + memset (&cattr, 0, sizeof (cattr)); if (pthread_mutexattr_init (&attr)) clib_unix_warning ("mutexattr_init"); @@ -277,6 +272,7 @@ unix_shared_memory_queue_sub (unix_shared_memory_queue_t * q, clib_memcpy (elem, headp, q->elsize); q->head++; + /* $$$$ JFC shouldn't this be == 0? */ if (q->cursize == q->maxsize) need_broadcast = 1; diff --git a/src/vlibmemory/unix_shared_memory_queue.h b/src/vlibmemory/unix_shared_memory_queue.h index f758f17c..13800065 100644 --- a/src/vlibmemory/unix_shared_memory_queue.h +++ b/src/vlibmemory/unix_shared_memory_queue.h @@ -29,7 +29,7 @@ typedef struct _unix_shared_memory_queue pthread_cond_t condvar; /* 8 bytes */ int head; int tail; - int cursize; + volatile int cursize; int maxsize; int elsize; int consumer_pid; diff --git a/src/vnet.am b/src/vnet.am index 64484e18..923f61d8 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -324,11 +324,7 @@ libvnet_la_SOURCES += \ vnet/ip/ip_input_acl.c \ vnet/ip/lookup.c \ vnet/ip/ping.c \ - vnet/ip/punt.c \ - vnet/ip/udp_format.c \ - vnet/ip/udp_init.c \ - vnet/ip/udp_local.c \ - vnet/ip/udp_pg.c + vnet/ip/punt.c nobase_include_HEADERS += \ vnet/ip/format.h \ @@ -354,11 +350,7 @@ nobase_include_HEADERS += \ vnet/ip/ports.def \ vnet/ip/protocols.def \ vnet/ip/punt_error.def \ - vnet/ip/punt.h \ - vnet/ip/tcp_packet.h \ - vnet/ip/udp_error.def \ - vnet/ip/udp.h \ - vnet/ip/udp_packet.h + vnet/ip/punt.h API_FILES += vnet/ip/ip.api @@ -473,6 +465,38 @@ test_map_LDADD = libvnet.la libvppinfra.la libvlib.la \ test_map_LDFLAGS = -static endif +######################################## +# Layer 4 protocol: tcp +######################################## +libvnet_la_SOURCES += \ + vnet/tcp/tcp_format.c \ + vnet/tcp/tcp_pg.c \ + vnet/tcp/tcp_syn_filter4.c \ + vnet/tcp/tcp_output.c \ + vnet/tcp/tcp_input.c \ + vnet/tcp/tcp_newreno.c \ + vnet/tcp/tcp.c + +nobase_include_HEADERS += \ + vnet/tcp/tcp_packet.h \ + vnet/tcp/tcp_timer.h \ + vnet/tcp/tcp.h + +######################################## +# Layer 4 protocol: udp +######################################## +libvnet_la_SOURCES += \ + vnet/udp/udp.c \ + vnet/udp/udp_input.c \ + vnet/udp/builtin_server.c \ + vnet/udp/udp_format.c \ + vnet/udp/udp_local.c \ + vnet/udp/udp_pg.c + +nobase_include_HEADERS += \ + vnet/udp/udp_error.def \ + vnet/udp/udp.h \ + vnet/udp/udp_packet.h ######################################## # Tunnel protocol: gre @@ -833,6 +857,28 @@ libvnet_la_SOURCES += \ nobase_include_HEADERS += \ vnet/devices/ssvm/ssvm_eth.h +######################################## +# session managmeent +######################################## + +libvnet_la_SOURCES += \ + vnet/session/session.c \ + vnet/session/node.c \ + vnet/session/transport.c \ + vnet/session/application.c \ + vnet/session/session_cli.c \ + vnet/session/hashes.c \ + vnet/session/application_interface.c \ + vnet/session/session_api.c + +nobase_include_HEADERS += \ + vnet/session/session.h \ + vnet/session/application.h \ + vnet/session/transport.h \ + vnet/session/application_interface.h + +API_FILES += vnet/session/session.api + ######################################## # Linux packet interface ######################################## diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 8680ef7c..861a5767 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -91,14 +91,19 @@ _(INVALID_ADDRESS_FAMILY, -97, "Invalid address family") \ _(INVALID_SUB_SW_IF_INDEX, -98, "Invalid sub-interface sw_if_index") \ _(TABLE_TOO_BIG, -99, "Table too big") \ _(CANNOT_ENABLE_DISABLE_FEATURE, -100, "Cannot enable/disable feature") \ -_(BFD_EEXIST, -101, "Duplicate BFD object") \ -_(BFD_ENOENT, -102, "No such BFD object") \ -_(BFD_EINUSE, -103, "BFD object in use") \ -_(BFD_NOTSUPP, -104, "BFD feature not supported") \ -_(LISP_RLOC_LOCAL, -105, "RLOC address is local") \ -_(BFD_EAGAIN, -106, "BFD object cannot be manipulated at this time") \ -_(INVALID_GPE_MODE, -107, "Invalid GPE mode") \ -_(LISP_GPE_ENTRIES_PRESENT, -108, "LISP GPE entries are present") +_(BFD_EEXIST, -101, "Duplicate BFD object") \ +_(BFD_ENOENT, -102, "No such BFD object") \ +_(BFD_EINUSE, -103, "BFD object in use") \ +_(BFD_NOTSUPP, -104, "BFD feature not supported") \ +_(ADDRESS_IN_USE, -105, "Address in use") \ +_(ADDRESS_NOT_IN_USE, -106, "Address not in use") \ +_(QUEUE_FULL, -107, "Queue full") \ +_(UNKNOWN_URI_TYPE, -108, "Unknown URI type") \ +_(URI_FIFO_CREATE_FAILED, -109, "URI FIFO segment create failed") \ +_(LISP_RLOC_LOCAL, -110, "RLOC address is local") \ +_(BFD_EAGAIN, -111, "BFD object cannot be manipulated at this time") \ +_(INVALID_GPE_MODE, -112, "Invalid GPE mode") \ +_(LISP_GPE_ENTRIES_PRESENT, -113, "LISP GPE entries are present") typedef enum { diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 146faad6..cf05089b 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -18,12 +18,12 @@ #include #include #include -#include +#include +#include #include #include #include #include -#include #include #include #include diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index f1cc6371..3de01f2a 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -277,6 +277,16 @@ typedef struct u16 buffer_advance; } device_input_feat; + /* TCP */ + struct + { + u32 connection_index; + u32 seq_number; + u32 seq_end; + u32 ack_number; + u8 flags; + } tcp; + u32 unused[6]; }; } vnet_buffer_opaque_t; diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c index 6093e2ac..b651a1f1 100644 --- a/src/vnet/classify/vnet_classify.c +++ b/src/vnet/classify/vnet_classify.c @@ -695,8 +695,8 @@ int vnet_classify_add_del_table (vnet_classify_main_t * cm, } #define foreach_tcp_proto_field \ -_(src_port) \ -_(dst_port) +_(src) \ +_(dst) #define foreach_udp_proto_field \ _(src_port) \ diff --git a/src/vnet/dhcp/dhcp_proxy.h b/src/vnet/dhcp/dhcp_proxy.h index c0d79c41..4586d883 100644 --- a/src/vnet/dhcp/dhcp_proxy.h +++ b/src/vnet/dhcp/dhcp_proxy.h @@ -26,7 +26,7 @@ #include #include #include -#include +#include typedef enum { #define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n, diff --git a/src/vnet/flow/flow_report.h b/src/vnet/flow/flow_report.h index 4e764377..e8ed3818 100644 --- a/src/vnet/flow/flow_report.h +++ b/src/vnet/flow/flow_report.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h index 02a1a963..70b4ccd8 100644 --- a/src/vnet/ip/ip.h +++ b/src/vnet/ip/ip.h @@ -50,8 +50,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index b184fbae..4e075d0f 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -309,8 +309,8 @@ ip4_compute_flow_hash (const ip4_header_t * ip, b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2; b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0; - t1 = is_tcp_udp ? tcp->ports.src : 0; - t2 = is_tcp_udp ? tcp->ports.dst : 0; + t1 = is_tcp_udp ? tcp->src : 0; + t2 = is_tcp_udp ? tcp->dst : 0; t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0; t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0; @@ -334,6 +334,44 @@ u8 *format_ip4_forward_next_trace (u8 * s, va_list * args); u32 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0); +#define IP_DF 0x4000 /* don't fragment */ + +/** + * Push IPv4 header to buffer + * + * This does not support fragmentation. + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param src - source IP + * @param dst - destination IP + * @param prot - payload proto + * + * @return - pointer to start of IP header + */ +always_inline void * +vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b, + ip4_address_t * src, ip4_address_t * dst, int proto) +{ + ip4_header_t *ih; + + /* make some room */ + ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t)); + + ih->ip_version_and_header_length = 0x45; + ih->tos = 0; + ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b)); + + /* No fragments */ + ih->flags_and_fragment_offset = clib_host_to_net_u16 (IP_DF); + ih->ttl = 255; + ih->protocol = proto; + ih->src_address.as_u32 = src->as_u32; + ih->dst_address.as_u32 = dst->as_u32; + + ih->checksum = ip4_header_checksum (ih); + return ih; +} #endif /* included_ip_ip4_h */ /* diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 8081b34b..66d91ab6 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1478,8 +1478,18 @@ ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) return p0->flags; } -static uword -ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +/* *INDENT-OFF* */ +VNET_FEATURE_ARC_INIT (ip4_local) = +{ + .arc_name = "ip4-local", + .start_nodes = VNET_FEATURES ("ip4-local"), +}; +/* *INDENT-ON* */ + +static inline uword +ip4_local_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int head_of_feature_arc) { ip4_main_t *im = &ip4_main; ip_lookup_main_t *lm = &im->lookup_main; @@ -1487,6 +1497,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) u32 *from, *to_next, n_left_from, n_left_to_next; vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip4_input_node.index); + u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -1513,7 +1524,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) i32 len_diff0, len_diff1; u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1; - u8 enqueue_code; + u32 sw_if_index0, sw_if_index1; pi0 = to_next[0] = from[0]; pi1 = to_next[1] = from[1]; @@ -1522,6 +1533,8 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) to_next += 2; n_left_to_next -= 2; + next0 = next1 = IP_LOCAL_NEXT_DROP; + p0 = vlib_get_buffer (vm, pi0); p1 = vlib_get_buffer (vm, pi1); @@ -1531,14 +1544,18 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data; - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - fib_index1 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p1)->sw_if_index[VLIB_RX]); + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1); fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; @@ -1557,6 +1574,13 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) until support of IP frag reassembly is implemented */ proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol; + + if (head_of_feature_arc == 0) + { + error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL; + goto skip_checks; + } + is_udp0 = proto0 == IP_PROTOCOL_UDP; is_udp1 = proto1 == IP_PROTOCOL_UDP; is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; @@ -1686,6 +1710,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) next0 = lm->local_next_by_ip_protocol[proto0]; next1 = lm->local_next_by_ip_protocol[proto1]; + skip_checks: next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; next1 = @@ -1694,44 +1719,17 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p0->error = error0 ? error_node->errors[error0] : 0; p1->error = error1 ? error_node->errors[error1] : 0; - enqueue_code = (next0 != next_index) + 2 * (next1 != next_index); - - if (PREDICT_FALSE (enqueue_code != 0)) + if (head_of_feature_arc) { - switch (enqueue_code) - { - case 1: - /* A B A */ - to_next[-2] = pi1; - to_next -= 1; - n_left_to_next += 1; - vlib_set_next_frame_buffer (vm, node, next0, pi0); - break; - - case 2: - /* A A B */ - to_next -= 1; - n_left_to_next += 1; - vlib_set_next_frame_buffer (vm, node, next1, pi1); - break; - - case 3: - /* A B B or A B C */ - to_next -= 2; - n_left_to_next += 2; - vlib_set_next_frame_buffer (vm, node, next0, pi0); - vlib_set_next_frame_buffer (vm, node, next1, pi1); - if (next0 == next1) - { - vlib_put_next_frame (vm, node, next_index, - n_left_to_next); - next_index = next1; - vlib_get_next_frame (vm, node, next_index, to_next, - n_left_to_next); - } - break; - } + if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0); + if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1); } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, + next0, next1); } while (n_left_from > 0 && n_left_to_next > 0) @@ -1746,6 +1744,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; load_balance_t *lb0; const dpo_id_t *dpo0; + u32 sw_if_index0; pi0 = to_next[0] = from[0]; from += 1; @@ -1753,14 +1752,18 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) to_next += 1; n_left_to_next -= 1; + next0 = IP_LOCAL_NEXT_DROP; + p0 = vlib_get_buffer (vm, pi0); ip0 = vlib_buffer_get_current (p0); vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); + fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; @@ -1775,6 +1778,13 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) /* Treat IP frag packets as "experimental" protocol for now until support of IP frag reassembly is implemented */ proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; + + if (head_of_feature_arc == 0) + { + error0 = IP4_ERROR_UNKNOWN_PROTOCOL; + goto skip_check; + } + is_udp0 = proto0 == IP_PROTOCOL_UDP; is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; @@ -1847,6 +1857,8 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip0->dst_address.as_u32 != 0xFFFFFFFF) ? IP4_ERROR_SRC_LOOKUP_MISS : error0); + skip_check: + next0 = lm->local_next_by_ip_protocol[proto0]; next0 = @@ -1854,18 +1866,15 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p0->error = error0 ? error_node->errors[error0] : 0; - if (PREDICT_FALSE (next0 != next_index)) + if (head_of_feature_arc) { - n_left_to_next += 1; - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - - next_index = next0; - vlib_get_next_frame (vm, node, next_index, to_next, - n_left_to_next); - to_next[0] = pi0; - to_next += 1; - n_left_to_next -= 1; + if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0); } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, next0); + } vlib_put_next_frame (vm, node, next_index, n_left_to_next); @@ -1874,21 +1883,57 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) return frame->n_vectors; } +static uword +ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ ); +} + +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_local_node) = { - .function = ip4_local,.name = "ip4-local",.vector_size = - sizeof (u32),.format_trace = - format_ip4_forward_next_trace,.n_next_nodes = - IP_LOCAL_N_NEXT,.next_nodes = + .function = ip4_local, + .name = "ip4-local", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_next_nodes = IP_LOCAL_N_NEXT, + .next_nodes = { - [IP_LOCAL_NEXT_DROP] = "error-drop", - [IP_LOCAL_NEXT_PUNT] = "error-punt", - [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup", - [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",} -,}; + [IP_LOCAL_NEXT_DROP] = "error-drop", + [IP_LOCAL_NEXT_PUNT] = "error-punt", + [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup", + [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",}, +}; +/* *INDENT-ON* */ VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local); +static uword +ip4_local_end_of_arc (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = { + .function = ip4_local_end_of_arc, + .name = "ip4-local-end-of-arc", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + .sibling_of = "ip4-local", +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc) + +VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = { + .arc_name = "ip4-local", + .node_name = "ip4-local-end-of-arc", + .runs_before = 0, /* not before any other features */ +}; +/* *INDENT-ON* */ + void ip4_register_protocol (u32 protocol, u32 node_index) { diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h index 8da788b4..b2c1fcd4 100644 --- a/src/vnet/ip/ip4_packet.h +++ b/src/vnet/ip/ip4_packet.h @@ -41,7 +41,7 @@ #define included_ip4_packet_h #include /* for ip_csum_t */ -#include /* for tcp_header_t */ +#include /* for tcp_header_t */ #include /* for clib_net_to_host_u16 */ /* IP4 address which can be accessed either as 4 bytes @@ -342,10 +342,10 @@ ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0) ip0->src_address.data_u32 = dst0; ip0->dst_address.data_u32 = src0; - src0 = tcp0->ports.src; - dst0 = tcp0->ports.dst; - tcp0->ports.src = dst0; - tcp0->ports.dst = src0; + src0 = tcp0->src; + dst0 = tcp0->dst; + tcp0->src = dst0; + tcp0->dst = src0; } always_inline void @@ -363,14 +363,14 @@ ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1, ip0->dst_address.data_u32 = src0; ip1->dst_address.data_u32 = src1; - src0 = tcp0->ports.src; - src1 = tcp1->ports.src; - dst0 = tcp0->ports.dst; - dst1 = tcp1->ports.dst; - tcp0->ports.src = dst0; - tcp1->ports.src = dst1; - tcp0->ports.dst = src0; - tcp1->ports.dst = src1; + src0 = tcp0->src; + src1 = tcp1->src; + dst0 = tcp0->dst; + dst1 = tcp1->dst; + tcp0->src = dst0; + tcp1->src = dst1; + tcp0->dst = src0; + tcp1->dst = src1; } #endif /* included_ip4_packet_h */ diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index 5456f0f2..2615fbfa 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -461,8 +461,8 @@ ip6_compute_flow_hash (const ip6_header_t * ip, b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2; b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0; - t1 = is_tcp_udp ? tcp->ports.src : 0; - t2 = is_tcp_udp ? tcp->ports.dst : 0; + t1 = is_tcp_udp ? tcp->src : 0; + t2 = is_tcp_udp ? tcp->dst : 0; t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0; t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0; @@ -497,6 +497,46 @@ int ip6_hbh_register_option (u8 option, int ip6_hbh_unregister_option (u8 option); void ip6_hbh_set_next_override (uword next); +/** + * Push IPv6 header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param src - source IP + * @param dst - destination IP + * @param prot - payload proto + * + * @return - pointer to start of IP header + */ +always_inline void * +vlib_buffer_push_ip6 (vlib_main_t * vm, vlib_buffer_t * b, + ip6_address_t * src, ip6_address_t * dst, int proto) +{ + ip6_header_t *ip6h; + u16 payload_length; + + /* make some room */ + ip6h = vlib_buffer_push_uninit (b, sizeof (ip6_header_t)); + + ip6h->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + + /* calculate ip6 payload length */ + payload_length = vlib_buffer_length_in_chain (vm, b); + payload_length -= sizeof (*ip6h); + + ip6h->payload_length = clib_host_to_net_u16 (payload_length); + + ip6h->hop_limit = 0xff; + ip6h->protocol = proto; + clib_memcpy (ip6h->src_address.as_u8, src->as_u8, + sizeof (ip6h->src_address)); + clib_memcpy (ip6h->dst_address.as_u8, dst->as_u8, + sizeof (ip6h->src_address)); + + return ip6h; +} + #endif /* included_ip_ip6_h */ /* diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index 1e551c8b..4fd14b96 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -40,7 +40,7 @@ #ifndef included_ip6_packet_h #define included_ip6_packet_h -#include +#include #include typedef union @@ -373,10 +373,10 @@ ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0) { u16 src0, dst0; - src0 = tcp0->ports.src; - dst0 = tcp0->ports.dst; - tcp0->ports.src = dst0; - tcp0->ports.dst = src0; + src0 = tcp0->src; + dst0 = tcp0->dst; + tcp0->src = dst0; + tcp0->dst = src0; } } @@ -400,14 +400,14 @@ ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1, { u16 src0, dst0, src1, dst1; - src0 = tcp0->ports.src; - src1 = tcp1->ports.src; - dst0 = tcp0->ports.dst; - dst1 = tcp1->ports.dst; - tcp0->ports.src = dst0; - tcp1->ports.src = dst1; - tcp0->ports.dst = src0; - tcp1->ports.dst = src1; + src0 = tcp0->src; + src1 = tcp1->src; + dst0 = tcp0->dst; + dst1 = tcp1->dst; + tcp0->src = dst0; + tcp1->src = dst1; + tcp0->dst = src0; + tcp1->dst = src1; } } diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c index 9c735128..48558401 100644 --- a/src/vnet/ip/punt.c +++ b/src/vnet/ip/punt.c @@ -23,7 +23,7 @@ */ #include #include -#include +#include #include #define foreach_punt_next \ diff --git a/src/vnet/ip/tcp_packet.h b/src/vnet/ip/tcp_packet.h deleted file mode 100644 index 93f73e01..00000000 --- a/src/vnet/ip/tcp_packet.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip4/tcp_packet.h: TCP packet format (see RFC 793) - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef included_tcp_packet_h -#define included_tcp_packet_h - -/* TCP flags bit 0 first. */ -#define foreach_tcp_flag \ - _ (FIN) \ - _ (SYN) \ - _ (RST) \ - _ (PSH) \ - _ (ACK) \ - _ (URG) \ - _ (ECE) \ - _ (CWR) - -enum -{ -#define _(f) TCP_FLAG_BIT_##f, - foreach_tcp_flag -#undef _ - TCP_N_FLAG_BITS, - -#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f, - foreach_tcp_flag -#undef _ -}; - -typedef struct -{ - /* Source and destination port. */ - union - { - union - { - struct - { - u16 src, dst; - }; - u32 src_and_dst; - } ports; - struct - { - u16 src_port, dst_port; - }; - }; - - /* Sequence and acknowledgment number. */ - u32 seq_number, ack_number; - - /* Size of TCP header in 32-bit units plus 4 reserved bits. */ - u8 tcp_header_u32s_and_reserved; - - /* see foreach_tcp_flag for enumation of tcp flags. */ - u8 flags; - - /* Current window advertised by sender. - This is the number of bytes sender is willing to receive - right now. */ - u16 window; - - /* Checksum of TCP pseudo header and data. */ - u16 checksum; - - u16 urgent_pointer; -} tcp_header_t; - -always_inline int -tcp_header_bytes (tcp_header_t * t) -{ - return (t->tcp_header_u32s_and_reserved >> 4) * sizeof (u32); -} - -/* TCP options. */ -typedef enum tcp_option_type -{ - TCP_OPTION_END = 0, - TCP_OPTION_NOP = 1, - TCP_OPTION_MSS = 2, - TCP_OPTION_WINDOW_SCALE = 3, - TCP_OPTION_SACK_PERMITTED = 4, - TCP_OPTION_SACK_BLOCK = 5, - TCP_OPTION_TIME_STAMP = 8, -} tcp_option_type_t; - -/* All except NOP and END have 1 byte length field. */ -typedef struct -{ - tcp_option_type_t type:8; - - /* Length of this option in bytes. */ - u8 length; -} tcp_option_with_length_t; - -#endif /* included_tcp_packet_h */ - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp.h b/src/vnet/ip/udp.h deleted file mode 100644 index bad58b5d..00000000 --- a/src/vnet/ip/udp.h +++ /dev/null @@ -1,315 +0,0 @@ -/* - * ip/udp.h: udp protocol - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_udp_h -#define included_udp_h - -#include -#include -#include -#include -#include -#include -#include - -typedef enum -{ -#define udp_error(n,s) UDP_ERROR_##n, -#include -#undef udp_error - UDP_N_ERROR, -} udp_error_t; - -#define foreach_udp4_dst_port \ -_ (67, dhcp_to_server) \ -_ (68, dhcp_to_client) \ -_ (500, ikev2) \ -_ (3784, bfd4) \ -_ (3785, bfd_echo4) \ -_ (4341, lisp_gpe) \ -_ (4342, lisp_cp) \ -_ (4739, ipfix) \ -_ (4789, vxlan) \ -_ (4789, vxlan6) \ -_ (4790, vxlan_gpe) \ -_ (6633, vpath_3) - - -#define foreach_udp6_dst_port \ -_ (547, dhcpv6_to_server) \ -_ (546, dhcpv6_to_client) \ -_ (3784, bfd6) \ -_ (3785, bfd_echo6) \ -_ (4341, lisp_gpe6) \ -_ (4342, lisp_cp6) \ -_ (4790, vxlan6_gpe) \ -_ (6633, vpath6_3) - -typedef enum -{ -#define _(n,f) UDP_DST_PORT_##f = n, - foreach_udp4_dst_port foreach_udp6_dst_port -#undef _ -} udp_dst_port_t; - -typedef enum -{ -#define _(n,f) UDP6_DST_PORT_##f = n, - foreach_udp6_dst_port -#undef _ -} udp6_dst_port_t; - -typedef struct -{ - /* Name (a c string). */ - char *name; - - /* GRE protocol type in host byte order. */ - udp_dst_port_t dst_port; - - /* Node which handles this type. */ - u32 node_index; - - /* Next index for this type. */ - u32 next_index; -} udp_dst_port_info_t; - -typedef enum -{ - UDP_IP6 = 0, - UDP_IP4, /* the code is full of is_ip4... */ - N_UDP_AF, -} udp_af_t; - -typedef struct -{ - udp_dst_port_info_t *dst_port_infos[N_UDP_AF]; - - /* Hash tables mapping name/protocol to protocol info index. */ - uword *dst_port_info_by_name[N_UDP_AF]; - uword *dst_port_info_by_dst_port[N_UDP_AF]; - - /* convenience */ - vlib_main_t *vlib_main; -} udp_main_t; - -always_inline udp_dst_port_info_t * -udp_get_dst_port_info (udp_main_t * um, udp_dst_port_t dst_port, u8 is_ip4) -{ - uword *p = hash_get (um->dst_port_info_by_dst_port[is_ip4], dst_port); - return p ? vec_elt_at_index (um->dst_port_infos[is_ip4], p[0]) : 0; -} - -format_function_t format_udp_header; -format_function_t format_udp_rx_trace; - -unformat_function_t unformat_udp_header; - -void udp_register_dst_port (vlib_main_t * vm, - udp_dst_port_t dst_port, - u32 node_index, u8 is_ip4); - -void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add); - -always_inline void -ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) -{ - u16 new_l0; - udp_header_t *udp0; - - if (is_ip4) - { - ip4_header_t *ip0; - ip_csum_t sum0; - u16 old_l0 = 0; - - ip0 = vlib_buffer_get_current (b0); - - /* fix the ing outer-IP checksum */ - sum0 = ip0->checksum; - /* old_l0 always 0, see the rewrite setup */ - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - - sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */ ); - ip0->checksum = ip_csum_fold (sum0); - ip0->length = new_l0; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - udp0->length = new_l0; - } - else - { - ip6_header_t *ip0; - int bogus0; - - ip0 = vlib_buffer_get_current (b0); - - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - ip0->payload_length = new_l0; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - udp0->length = new_l0; - - udp0->checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); - ASSERT (bogus0 == 0); - - if (udp0->checksum == 0) - udp0->checksum = 0xffff; - } -} - -always_inline void -ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, - u8 is_ip4) -{ - vlib_buffer_advance (b0, -ec_len); - - if (is_ip4) - { - ip4_header_t *ip0; - - ip0 = vlib_buffer_get_current (b0); - - /* Apply the encap string. */ - clib_memcpy (ip0, ec0, ec_len); - ip_udp_fixup_one (vm, b0, 1); - } - else - { - ip6_header_t *ip0; - - ip0 = vlib_buffer_get_current (b0); - - /* Apply the encap string. */ - clib_memcpy (ip0, ec0, ec_len); - ip_udp_fixup_one (vm, b0, 0); - } -} - -always_inline void -ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, - u8 * ec0, u8 * ec1, word ec_len, u8 is_v4) -{ - u16 new_l0, new_l1; - udp_header_t *udp0, *udp1; - - ASSERT (_vec_len (ec0) == _vec_len (ec1)); - - vlib_buffer_advance (b0, -ec_len); - vlib_buffer_advance (b1, -ec_len); - - if (is_v4) - { - ip4_header_t *ip0, *ip1; - ip_csum_t sum0, sum1; - u16 old_l0 = 0, old_l1 = 0; - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* Apply the encap string */ - clib_memcpy (ip0, ec0, ec_len); - clib_memcpy (ip1, ec1, ec_len); - - /* fix the ing outer-IP checksum */ - sum0 = ip0->checksum; - sum1 = ip1->checksum; - - /* old_l0 always 0, see the rewrite setup */ - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); - - sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */ ); - sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, - length /* changed member */ ); - - ip0->checksum = ip_csum_fold (sum0); - ip1->checksum = ip_csum_fold (sum1); - - ip0->length = new_l0; - ip1->length = new_l1; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - udp1 = (udp_header_t *) (ip1 + 1); - - new_l0 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - new_l1 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) - - sizeof (*ip1)); - udp0->length = new_l0; - udp1->length = new_l1; - } - else - { - ip6_header_t *ip0, *ip1; - int bogus0, bogus1; - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* Apply the encap string. */ - clib_memcpy (ip0, ec0, ec_len); - clib_memcpy (ip1, ec1, ec_len); - - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) - - sizeof (*ip1)); - ip0->payload_length = new_l0; - ip1->payload_length = new_l1; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - udp1 = (udp_header_t *) (ip1 + 1); - - udp0->length = new_l0; - udp1->length = new_l1; - - udp0->checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); - udp1->checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, b1, ip1, &bogus1); - ASSERT (bogus0 == 0); - ASSERT (bogus1 == 0); - - if (udp0->checksum == 0) - udp0->checksum = 0xffff; - if (udp1->checksum == 0) - udp1->checksum = 0xffff; - } -} - -#endif /* included_udp_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_error.def b/src/vnet/ip/udp_error.def deleted file mode 100644 index bfdae0ac..00000000 --- a/src/vnet/ip/udp_error.def +++ /dev/null @@ -1,21 +0,0 @@ -/* - * udp_error.def: udp errors - * - * Copyright (c) 2013-2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -udp_error (NONE, "no error") -udp_error (NO_LISTENER, "no listener for dst port") -udp_error (LENGTH_ERROR, "UDP packets with length errors") -udp_error (PUNT, "no listener punt") diff --git a/src/vnet/ip/udp_format.c b/src/vnet/ip/udp_format.c deleted file mode 100644 index abdf561e..00000000 --- a/src/vnet/ip/udp_format.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip/udp_format.c: udp formatting - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include - -/* Format UDP header. */ -u8 * -format_udp_header (u8 * s, va_list * args) -{ - udp_header_t *udp = va_arg (*args, udp_header_t *); - u32 max_header_bytes = va_arg (*args, u32); - uword indent; - u32 header_bytes = sizeof (udp[0]); - - /* Nothing to do. */ - if (max_header_bytes < sizeof (udp[0])) - return format (s, "UDP header truncated"); - - indent = format_get_indent (s); - indent += 2; - - s = format (s, "UDP: %d -> %d", - clib_net_to_host_u16 (udp->src_port), - clib_net_to_host_u16 (udp->dst_port)); - - s = format (s, "\n%Ulength %d, checksum 0x%04x", - format_white_space, indent, - clib_net_to_host_u16 (udp->length), - clib_net_to_host_u16 (udp->checksum)); - - /* Recurse into next protocol layer. */ - if (max_header_bytes != 0 && header_bytes < max_header_bytes) - { - ip_main_t *im = &ip_main; - tcp_udp_port_info_t *pi; - - pi = ip_get_tcp_udp_port_info (im, udp->dst_port); - - if (pi && pi->format_header) - s = format (s, "\n%U%U", - format_white_space, indent - 2, pi->format_header, - /* next protocol header */ (udp + 1), - max_header_bytes - sizeof (udp[0])); - } - - return s; -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_init.c b/src/vnet/ip/udp_init.c deleted file mode 100644 index 1241ca4a..00000000 --- a/src/vnet/ip/udp_init.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip/udp_init.c: udp initialization - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include - -clib_error_t * -udp_init (vlib_main_t * vm) -{ - ip_main_t *im = &ip_main; - ip_protocol_info_t *pi; - clib_error_t *error; - - error = vlib_call_init_function (vm, ip_main_init); - - if (!error) - { - pi = ip_get_protocol_info (im, IP_PROTOCOL_UDP); - if (pi == 0) - return clib_error_return (0, "UDP protocol info AWOL"); - pi->format_header = format_udp_header; - pi->unformat_pg_edit = unformat_pg_udp_header; - } - - return 0; -} - -VLIB_INIT_FUNCTION (udp_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_local.c b/src/vnet/ip/udp_local.c deleted file mode 100644 index 13ab6e4f..00000000 --- a/src/vnet/ip/udp_local.c +++ /dev/null @@ -1,645 +0,0 @@ -/* - * node.c: udp packet processing - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -udp_main_t udp_main; - -#define foreach_udp_input_next \ - _ (PUNT, "error-punt") \ - _ (DROP, "error-drop") \ - _ (ICMP4_ERROR, "ip4-icmp-error") \ - _ (ICMP6_ERROR, "ip6-icmp-error") - -typedef enum -{ -#define _(s,n) UDP_INPUT_NEXT_##s, - foreach_udp_input_next -#undef _ - UDP_INPUT_N_NEXT, -} udp_input_next_t; - -typedef struct -{ - u16 src_port; - u16 dst_port; - u8 bound; -} udp_rx_trace_t; - -u8 * -format_udp_rx_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - udp_rx_trace_t *t = va_arg (*args, udp_rx_trace_t *); - - s = format (s, "UDP: src-port %d dst-port %d%s", - clib_net_to_host_u16 (t->src_port), - clib_net_to_host_u16 (t->dst_port), - t->bound ? "" : " (no listener)"); - return s; -} - -typedef struct -{ - /* Sparse vector mapping udp dst_port in network byte order - to next index. */ - u16 *next_by_dst_port; - u8 punt_unknown; -} udp_input_runtime_t; - -vlib_node_registration_t udp4_input_node; -vlib_node_registration_t udp6_input_node; - -always_inline uword -udp46_input_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame, int is_ip4) -{ - udp_input_runtime_t *rt = is_ip4 ? - (void *) vlib_node_get_runtime_data (vm, udp4_input_node.index) - : (void *) vlib_node_get_runtime_data (vm, udp6_input_node.index); - __attribute__ ((unused)) u32 n_left_from, next_index, *from, *to_next; - word n_no_listener = 0; - u8 punt_unknown = rt->punt_unknown; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - udp_header_t *h0 = 0, *h1 = 0; - u32 i0, i1, dst_port0, dst_port1; - u32 advance0, advance1; - u32 error0, next0, error1, next1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD); - CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - /* ip4/6_local hands us the ip header, not the udp header */ - if (is_ip4) - { - advance0 = sizeof (ip4_header_t); - advance1 = sizeof (ip4_header_t); - } - else - { - advance0 = sizeof (ip6_header_t); - advance1 = sizeof (ip6_header_t); - } - - if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) - { - error0 = UDP_ERROR_LENGTH_ERROR; - next0 = UDP_INPUT_NEXT_DROP; - } - else - { - vlib_buffer_advance (b0, advance0); - h0 = vlib_buffer_get_current (b0); - error0 = next0 = 0; - if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) > - vlib_buffer_length_in_chain (vm, b0))) - { - error0 = UDP_ERROR_LENGTH_ERROR; - next0 = UDP_INPUT_NEXT_DROP; - } - } - - if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1))) - { - error1 = UDP_ERROR_LENGTH_ERROR; - next1 = UDP_INPUT_NEXT_DROP; - } - else - { - vlib_buffer_advance (b1, advance1); - h1 = vlib_buffer_get_current (b1); - error1 = next1 = 0; - if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) > - vlib_buffer_length_in_chain (vm, b1))) - { - error1 = UDP_ERROR_LENGTH_ERROR; - next1 = UDP_INPUT_NEXT_DROP; - } - } - - /* Index sparse array with network byte order. */ - dst_port0 = (error0 == 0) ? h0->dst_port : 0; - dst_port1 = (error1 == 0) ? h1->dst_port : 0; - sparse_vec_index2 (rt->next_by_dst_port, dst_port0, dst_port1, - &i0, &i1); - next0 = (error0 == 0) ? vec_elt (rt->next_by_dst_port, i0) : next0; - next1 = (error1 == 0) ? vec_elt (rt->next_by_dst_port, i1) : next1; - - if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX)) - { - // move the pointer back so icmp-error can find the - // ip packet header - vlib_buffer_advance (b0, -(word) advance0); - - if (PREDICT_FALSE (punt_unknown)) - { - b0->error = node->errors[UDP_ERROR_PUNT]; - next0 = UDP_INPUT_NEXT_PUNT; - } - else if (is_ip4) - { - icmp4_error_set_vnet_buffer (b0, - ICMP4_destination_unreachable, - ICMP4_destination_unreachable_port_unreachable, - 0); - next0 = UDP_INPUT_NEXT_ICMP4_ERROR; - n_no_listener++; - } - else - { - icmp6_error_set_vnet_buffer (b0, - ICMP6_destination_unreachable, - ICMP6_destination_unreachable_port_unreachable, - 0); - next0 = UDP_INPUT_NEXT_ICMP6_ERROR; - n_no_listener++; - } - } - else - { - b0->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b0, sizeof (*h0)); - } - - if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX)) - { - // move the pointer back so icmp-error can find the - // ip packet header - vlib_buffer_advance (b1, -(word) advance1); - - if (PREDICT_FALSE (punt_unknown)) - { - b1->error = node->errors[UDP_ERROR_PUNT]; - next1 = UDP_INPUT_NEXT_PUNT; - } - else if (is_ip4) - { - icmp4_error_set_vnet_buffer (b1, - ICMP4_destination_unreachable, - ICMP4_destination_unreachable_port_unreachable, - 0); - next1 = UDP_INPUT_NEXT_ICMP4_ERROR; - n_no_listener++; - } - else - { - icmp6_error_set_vnet_buffer (b1, - ICMP6_destination_unreachable, - ICMP6_destination_unreachable_port_unreachable, - 0); - next1 = UDP_INPUT_NEXT_ICMP6_ERROR; - n_no_listener++; - } - } - else - { - b1->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b1, sizeof (*h1)); - } - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - udp_rx_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) - { - tr->src_port = h0 ? h0->src_port : 0; - tr->dst_port = h0 ? h0->dst_port : 0; - tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR && - next0 != UDP_INPUT_NEXT_ICMP6_ERROR); - } - } - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - udp_rx_trace_t *tr = vlib_add_trace (vm, node, - b1, sizeof (*tr)); - if (b1->error != node->errors[UDP_ERROR_LENGTH_ERROR]) - { - tr->src_port = h1 ? h1->src_port : 0; - tr->dst_port = h1 ? h1->dst_port : 0; - tr->bound = (next1 != UDP_INPUT_NEXT_ICMP4_ERROR && - next1 != UDP_INPUT_NEXT_ICMP6_ERROR); - } - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - udp_header_t *h0 = 0; - u32 i0, next0; - u32 advance0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - /* ip4/6_local hands us the ip header, not the udp header */ - if (is_ip4) - advance0 = sizeof (ip4_header_t); - else - advance0 = sizeof (ip6_header_t); - - if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) - { - b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; - next0 = UDP_INPUT_NEXT_DROP; - goto trace_x1; - } - - vlib_buffer_advance (b0, advance0); - - h0 = vlib_buffer_get_current (b0); - - if (PREDICT_TRUE (clib_net_to_host_u16 (h0->length) <= - vlib_buffer_length_in_chain (vm, b0))) - { - i0 = sparse_vec_index (rt->next_by_dst_port, h0->dst_port); - next0 = vec_elt (rt->next_by_dst_port, i0); - - if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX)) - { - // move the pointer back so icmp-error can find the - // ip packet header - vlib_buffer_advance (b0, -(word) advance0); - - if (PREDICT_FALSE (punt_unknown)) - { - b0->error = node->errors[UDP_ERROR_PUNT]; - next0 = UDP_INPUT_NEXT_PUNT; - } - else if (is_ip4) - { - icmp4_error_set_vnet_buffer (b0, - ICMP4_destination_unreachable, - ICMP4_destination_unreachable_port_unreachable, - 0); - next0 = UDP_INPUT_NEXT_ICMP4_ERROR; - n_no_listener++; - } - else - { - icmp6_error_set_vnet_buffer (b0, - ICMP6_destination_unreachable, - ICMP6_destination_unreachable_port_unreachable, - 0); - next0 = UDP_INPUT_NEXT_ICMP6_ERROR; - n_no_listener++; - } - } - else - { - b0->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b0, sizeof (*h0)); - } - } - else - { - b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; - next0 = UDP_INPUT_NEXT_DROP; - } - - trace_x1: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - udp_rx_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) - { - tr->src_port = h0->src_port; - tr->dst_port = h0->dst_port; - tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR && - next0 != UDP_INPUT_NEXT_ICMP6_ERROR); - } - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_error_count (vm, node->node_index, UDP_ERROR_NO_LISTENER, - n_no_listener); - return from_frame->n_vectors; -} - -static char *udp_error_strings[] = { -#define udp_error(n,s) s, -#include "udp_error.def" -#undef udp_error -}; - -static uword -udp4_input (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - return udp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ ); -} - -static uword -udp6_input (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - return udp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ ); -} - - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (udp4_input_node) = { - .function = udp4_input, - .name = "ip4-udp-lookup", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .runtime_data_bytes = sizeof (udp_input_runtime_t), - - .n_errors = UDP_N_ERROR, - .error_strings = udp_error_strings, - - .n_next_nodes = UDP_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [UDP_INPUT_NEXT_##s] = n, - foreach_udp_input_next -#undef _ - }, - - .format_buffer = format_udp_header, - .format_trace = format_udp_rx_trace, - .unformat_buffer = unformat_udp_header, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (udp4_input_node, udp4_input); - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (udp6_input_node) = { - .function = udp6_input, - .name = "ip6-udp-lookup", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .runtime_data_bytes = sizeof (udp_input_runtime_t), - - .n_errors = UDP_N_ERROR, - .error_strings = udp_error_strings, - - .n_next_nodes = UDP_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [UDP_INPUT_NEXT_##s] = n, - foreach_udp_input_next -#undef _ - }, - - .format_buffer = format_udp_header, - .format_trace = format_udp_rx_trace, - .unformat_buffer = unformat_udp_header, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (udp6_input_node, udp6_input); - -static void -add_dst_port (udp_main_t * um, - udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4) -{ - udp_dst_port_info_t *pi; - u32 i; - - vec_add2 (um->dst_port_infos[is_ip4], pi, 1); - i = pi - um->dst_port_infos[is_ip4]; - - pi->name = dst_port_name; - pi->dst_port = dst_port; - pi->next_index = pi->node_index = ~0; - - hash_set (um->dst_port_info_by_dst_port[is_ip4], dst_port, i); - - if (pi->name) - hash_set_mem (um->dst_port_info_by_name[is_ip4], pi->name, i); -} - -void -udp_register_dst_port (vlib_main_t * vm, - udp_dst_port_t dst_port, u32 node_index, u8 is_ip4) -{ - udp_main_t *um = &udp_main; - udp_dst_port_info_t *pi; - udp_input_runtime_t *rt; - u16 *n; - - { - clib_error_t *error = vlib_call_init_function (vm, udp_local_init); - if (error) - clib_error_report (error); - } - - pi = udp_get_dst_port_info (um, dst_port, is_ip4); - if (!pi) - { - add_dst_port (um, dst_port, 0, is_ip4); - pi = udp_get_dst_port_info (um, dst_port, is_ip4); - ASSERT (pi); - } - - pi->node_index = node_index; - pi->next_index = vlib_node_add_next (vm, - is_ip4 ? udp4_input_node.index - : udp6_input_node.index, node_index); - - /* Setup udp protocol -> next index sparse vector mapping. */ - rt = vlib_node_get_runtime_data - (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); - n = sparse_vec_validate (rt->next_by_dst_port, - clib_host_to_net_u16 (dst_port)); - n[0] = pi->next_index; -} - -void -udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add) -{ - udp_input_runtime_t *rt; - - { - clib_error_t *error = vlib_call_init_function (vm, udp_local_init); - if (error) - clib_error_report (error); - } - - rt = vlib_node_get_runtime_data - (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); - - rt->punt_unknown = is_add; -} - -/* Parse a UDP header. */ -uword -unformat_udp_header (unformat_input_t * input, va_list * args) -{ - u8 **result = va_arg (*args, u8 **); - udp_header_t *udp; - __attribute__ ((unused)) int old_length; - u16 src_port, dst_port; - - /* Allocate space for IP header. */ - { - void *p; - - old_length = vec_len (*result); - vec_add2 (*result, p, sizeof (ip4_header_t)); - udp = p; - } - - memset (udp, 0, sizeof (udp[0])); - if (unformat (input, "src-port %d dst-port %d", &src_port, &dst_port)) - { - udp->src_port = clib_host_to_net_u16 (src_port); - udp->dst_port = clib_host_to_net_u16 (dst_port); - return 1; - } - return 0; -} - -static void -udp_setup_node (vlib_main_t * vm, u32 node_index) -{ - vlib_node_t *n = vlib_get_node (vm, node_index); - pg_node_t *pn = pg_get_node (node_index); - - n->format_buffer = format_udp_header; - n->unformat_buffer = unformat_udp_header; - pn->unformat_edit = unformat_pg_udp_header; -} - -clib_error_t * -udp_local_init (vlib_main_t * vm) -{ - udp_input_runtime_t *rt; - udp_main_t *um = &udp_main; - int i; - - { - clib_error_t *error; - error = vlib_call_init_function (vm, udp_init); - if (error) - clib_error_report (error); - } - - - for (i = 0; i < 2; i++) - { - um->dst_port_info_by_name[i] = hash_create_string (0, sizeof (uword)); - um->dst_port_info_by_dst_port[i] = hash_create (0, sizeof (uword)); - } - - udp_setup_node (vm, udp4_input_node.index); - udp_setup_node (vm, udp6_input_node.index); - - rt = vlib_node_get_runtime_data (vm, udp4_input_node.index); - - rt->next_by_dst_port = sparse_vec_new - ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]), - /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); - - rt->punt_unknown = 0; - -#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */); - foreach_udp4_dst_port -#undef _ - rt = vlib_node_get_runtime_data (vm, udp6_input_node.index); - - rt->next_by_dst_port = sparse_vec_new - ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]), - /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); - - rt->punt_unknown = 0; - -#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */); - foreach_udp6_dst_port -#undef _ - ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index); - /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */ - return 0; -} - -VLIB_INIT_FUNCTION (udp_local_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_packet.h b/src/vnet/ip/udp_packet.h deleted file mode 100644 index beea3059..00000000 --- a/src/vnet/ip/udp_packet.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip4/udp_packet.h: UDP packet format - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef included_udp_packet_h -#define included_udp_packet_h - -typedef struct -{ - /* Source and destination port. */ - u16 src_port, dst_port; - - /* Length of UDP header plus payload. */ - u16 length; - - /* Checksum of UDP pseudo-header and data or - zero if checksum is disabled. */ - u16 checksum; -} udp_header_t; - -#endif /* included_udp_packet_h */ - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_pg.c b/src/vnet/ip/udp_pg.c deleted file mode 100644 index c9d8d38c..00000000 --- a/src/vnet/ip/udp_pg.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip/udp_pg: UDP packet-generator interface - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include /* for unformat_udp_udp_port */ - -#define UDP_PG_EDIT_LENGTH (1 << 0) -#define UDP_PG_EDIT_CHECKSUM (1 << 1) - -always_inline void -udp_pg_edit_function_inline (pg_main_t * pg, - pg_stream_t * s, - pg_edit_group_t * g, - u32 * packets, u32 n_packets, u32 flags) -{ - vlib_main_t *vm = vlib_get_main (); - u32 ip_offset, udp_offset; - - udp_offset = g->start_byte_offset; - ip_offset = (g - 1)->start_byte_offset; - - while (n_packets >= 1) - { - vlib_buffer_t *p0; - ip4_header_t *ip0; - udp_header_t *udp0; - u32 udp_len0; - - p0 = vlib_get_buffer (vm, packets[0]); - n_packets -= 1; - packets += 1; - - ip0 = (void *) (p0->data + ip_offset); - udp0 = (void *) (p0->data + udp_offset); - udp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]); - - if (flags & UDP_PG_EDIT_LENGTH) - udp0->length = - clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, p0) - - ip_offset); - - /* Initialize checksum with header. */ - if (flags & UDP_PG_EDIT_CHECKSUM) - { - ip_csum_t sum0; - - sum0 = clib_mem_unaligned (&ip0->src_address, u64); - - sum0 = ip_csum_with_carry - (sum0, clib_host_to_net_u32 (udp_len0 + (ip0->protocol << 16))); - - /* Invalidate possibly old checksum. */ - udp0->checksum = 0; - - sum0 = - ip_incremental_checksum_buffer (vm, p0, udp_offset, udp_len0, - sum0); - - sum0 = ~ip_csum_fold (sum0); - - /* Zero checksum means checksumming disabled. */ - sum0 = sum0 != 0 ? sum0 : 0xffff; - - udp0->checksum = sum0; - } - } -} - -static void -udp_pg_edit_function (pg_main_t * pg, - pg_stream_t * s, - pg_edit_group_t * g, u32 * packets, u32 n_packets) -{ - switch (g->edit_function_opaque) - { - case UDP_PG_EDIT_LENGTH: - udp_pg_edit_function_inline (pg, s, g, packets, n_packets, - UDP_PG_EDIT_LENGTH); - break; - - case UDP_PG_EDIT_CHECKSUM: - udp_pg_edit_function_inline (pg, s, g, packets, n_packets, - UDP_PG_EDIT_CHECKSUM); - break; - - case UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH: - udp_pg_edit_function_inline (pg, s, g, packets, n_packets, - UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH); - break; - - default: - ASSERT (0); - break; - } -} - -typedef struct -{ - pg_edit_t src_port, dst_port; - pg_edit_t length; - pg_edit_t checksum; -} pg_udp_header_t; - -static inline void -pg_udp_header_init (pg_udp_header_t * p) -{ - /* Initialize fields that are not bit fields in the IP header. */ -#define _(f) pg_edit_init (&p->f, udp_header_t, f); - _(src_port); - _(dst_port); - _(length); - _(checksum); -#undef _ -} - -uword -unformat_pg_udp_header (unformat_input_t * input, va_list * args) -{ - pg_stream_t *s = va_arg (*args, pg_stream_t *); - pg_udp_header_t *p; - u32 group_index; - - p = pg_create_edit_group (s, sizeof (p[0]), sizeof (udp_header_t), - &group_index); - pg_udp_header_init (p); - - /* Defaults. */ - p->checksum.type = PG_EDIT_UNSPECIFIED; - p->length.type = PG_EDIT_UNSPECIFIED; - - if (!unformat (input, "UDP: %U -> %U", - unformat_pg_edit, - unformat_tcp_udp_port, &p->src_port, - unformat_pg_edit, unformat_tcp_udp_port, &p->dst_port)) - goto error; - - /* Parse options. */ - while (1) - { - if (unformat (input, "length %U", - unformat_pg_edit, unformat_pg_number, &p->length)) - ; - - else if (unformat (input, "checksum %U", - unformat_pg_edit, unformat_pg_number, &p->checksum)) - ; - - /* Can't parse input: try next protocol level. */ - else - break; - } - - { - ip_main_t *im = &ip_main; - u16 dst_port; - tcp_udp_port_info_t *pi; - - pi = 0; - if (p->dst_port.type == PG_EDIT_FIXED) - { - dst_port = pg_edit_get_value (&p->dst_port, PG_EDIT_LO); - pi = ip_get_tcp_udp_port_info (im, dst_port); - } - - if (pi && pi->unformat_pg_edit - && unformat_user (input, pi->unformat_pg_edit, s)) - ; - - else if (!unformat_user (input, unformat_pg_payload, s)) - goto error; - - p = pg_get_edit_group (s, group_index); - if (p->checksum.type == PG_EDIT_UNSPECIFIED - || p->length.type == PG_EDIT_UNSPECIFIED) - { - pg_edit_group_t *g = pg_stream_get_group (s, group_index); - g->edit_function = udp_pg_edit_function; - g->edit_function_opaque = 0; - if (p->checksum.type == PG_EDIT_UNSPECIFIED) - g->edit_function_opaque |= UDP_PG_EDIT_CHECKSUM; - if (p->length.type == PG_EDIT_UNSPECIFIED) - g->edit_function_opaque |= UDP_PG_EDIT_LENGTH; - } - - return 1; - } - -error: - /* Free up any edits we may have added. */ - pg_free_edit_group (s); - return 0; -} - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ipsec/ikev2.c b/src/vnet/ipsec/ikev2.c index 09209334..2c1074d8 100644 --- a/src/vnet/ipsec/ikev2.c +++ b/src/vnet/ipsec/ikev2.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vnet/ipsec/ikev2_cli.c b/src/vnet/ipsec/ikev2_cli.c index 5c88d8d4..05ed4e60 100644 --- a/src/vnet/ipsec/ikev2_cli.c +++ b/src/vnet/ipsec/ikev2_cli.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/vnet/ipsec/ikev2_crypto.c b/src/vnet/ipsec/ikev2_crypto.c index c201d3eb..ca56158f 100644 --- a/src/vnet/ipsec/ikev2_crypto.c +++ b/src/vnet/ipsec/ikev2_crypto.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vnet/lisp-cp/packets.c b/src/vnet/lisp-cp/packets.c index 3a4f421b..f24024f1 100644 --- a/src/vnet/lisp-cp/packets.c +++ b/src/vnet/lisp-cp/packets.c @@ -15,7 +15,7 @@ #include #include -#include +#include /* Returns IP ID for the packet */ /* static u16 ip_id = 0; @@ -141,61 +141,6 @@ pkt_push_udp (vlib_main_t * vm, vlib_buffer_t * b, u16 sp, u16 dp) return uh; } -void * -pkt_push_ipv4 (vlib_main_t * vm, vlib_buffer_t * b, ip4_address_t * src, - ip4_address_t * dst, int proto) -{ - ip4_header_t *ih; - - /* make some room */ - ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t)); - - ih->ip_version_and_header_length = 0x45; - ih->tos = 0; - ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b)); - - /* iph->fragment_id = clib_host_to_net_u16(get_IP_ID ()); */ - - /* TODO: decide if we allow fragments in case of control */ - ih->flags_and_fragment_offset = clib_host_to_net_u16 (IP_DF); - ih->ttl = 255; - ih->protocol = proto; - ih->src_address.as_u32 = src->as_u32; - ih->dst_address.as_u32 = dst->as_u32; - - ih->checksum = ip4_header_checksum (ih); - return ih; -} - -void * -pkt_push_ipv6 (vlib_main_t * vm, vlib_buffer_t * b, ip6_address_t * src, - ip6_address_t * dst, int proto) -{ - ip6_header_t *ip6h; - u16 payload_length; - - /* make some room */ - ip6h = vlib_buffer_push_uninit (b, sizeof (ip6_header_t)); - - ip6h->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (0x6 << 28); - - /* calculate ip6 payload length */ - payload_length = vlib_buffer_length_in_chain (vm, b); - payload_length -= sizeof (*ip6h); - - ip6h->payload_length = clib_host_to_net_u16 (payload_length); - - ip6h->hop_limit = 0xff; - ip6h->protocol = proto; - clib_memcpy (ip6h->src_address.as_u8, src->as_u8, - sizeof (ip6h->src_address)); - clib_memcpy (ip6h->dst_address.as_u8, dst->as_u8, - sizeof (ip6h->src_address)); - - return ip6h; -} - void * pkt_push_ip (vlib_main_t * vm, vlib_buffer_t * b, ip_address_t * src, ip_address_t * dst, u32 proto) @@ -210,12 +155,12 @@ pkt_push_ip (vlib_main_t * vm, vlib_buffer_t * b, ip_address_t * src, switch (ip_addr_version (src)) { case IP4: - return pkt_push_ipv4 (vm, b, &ip_addr_v4 (src), &ip_addr_v4 (dst), - proto); + return vlib_buffer_push_ip4 (vm, b, &ip_addr_v4 (src), + &ip_addr_v4 (dst), proto); break; case IP6: - return pkt_push_ipv6 (vm, b, &ip_addr_v6 (src), &ip_addr_v6 (dst), - proto); + return vlib_buffer_push_ip6 (vm, b, &ip_addr_v6 (src), + &ip_addr_v6 (dst), proto); break; } diff --git a/src/vnet/lisp-cp/packets.h b/src/vnet/lisp-cp/packets.h index 212a1d78..f6da3bf4 100644 --- a/src/vnet/lisp-cp/packets.h +++ b/src/vnet/lisp-cp/packets.h @@ -26,51 +26,6 @@ void *pkt_push_udp_and_ip (vlib_main_t * vm, vlib_buffer_t * b, u16 sp, void *pkt_push_ecm_hdr (vlib_buffer_t * b); -always_inline u8 * -vlib_buffer_get_tail (vlib_buffer_t * b) -{ - return b->data + b->current_data + b->current_length; -} - -always_inline void * -vlib_buffer_put_uninit (vlib_buffer_t * b, u8 size) -{ - /* XXX should make sure there's enough space! */ - void *p = vlib_buffer_get_tail (b); - b->current_length += size; - return p; -} - -always_inline void * -vlib_buffer_push_uninit (vlib_buffer_t * b, u8 size) -{ - /* XXX should make sure there's enough space! */ - ASSERT (b->current_data >= size); - b->current_data -= size; - b->current_length += size; - - return vlib_buffer_get_current (b); -} - -always_inline void * -vlib_buffer_make_headroom (vlib_buffer_t * b, u8 size) -{ - /* XXX should make sure there's enough space! */ - b->current_data += size; - return vlib_buffer_get_current (b); -} - -always_inline void * -vlib_buffer_pull (vlib_buffer_t * b, u8 size) -{ - if (b->current_length < size) - return 0; - - void *data = vlib_buffer_get_current (b); - vlib_buffer_advance (b, size); - return data; -} - /* *INDENT-ON* */ /* diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c index 13359277..292c7e6a 100644 --- a/src/vnet/lisp-gpe/interface.c +++ b/src/vnet/lisp-gpe/interface.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vnet/lisp-gpe/lisp_gpe.h b/src/vnet/lisp-gpe/lisp_gpe.h index c898a7da..b5a50ec6 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.h +++ b/src/vnet/lisp-gpe/lisp_gpe.h @@ -27,10 +27,12 @@ #include #include #include -#include +#include #include #include #include +#include +#include /** IP4-UDP-LISP encap header */ /* *INDENT-OFF* */ diff --git a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c index 65006b81..dbcf7134 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c +++ b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include /** * Memory pool of all adjacencies diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c new file mode 100644 index 00000000..a561e7d1 --- /dev/null +++ b/src/vnet/session/application.c @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +/* + * Pool from which we allocate all applications + */ +static application_t *app_pool; + +/* + * Hash table of apps by api client index + */ +static uword *app_by_api_client_index; + +int +application_api_queue_is_full (application_t * app) +{ + unix_shared_memory_queue_t *q; + + /* builtin servers are always OK */ + if (app->api_client_index == ~0) + return 0; + + q = vl_api_client_index_to_input_queue (app->api_client_index); + if (!q) + return 1; + + if (q->cursize == q->maxsize) + return 1; + return 0; +} + +static void +application_table_add (application_t * app) +{ + hash_set (app_by_api_client_index, app->api_client_index, app->index); +} + +static void +application_table_del (application_t * app) +{ + hash_unset (app_by_api_client_index, app->api_client_index); +} + +application_t * +application_lookup (u32 api_client_index) +{ + uword *p; + p = hash_get (app_by_api_client_index, api_client_index); + if (p) + return application_get (p[0]); + + return 0; +} + +void +application_del (application_t * app) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + api_main_t *am = &api_main; + void *oldheap; + session_manager_t *sm; + + if (app->mode == APP_SERVER) + { + sm = session_manager_get (app->session_manager_index); + session_manager_del (smm, sm); + } + + /* Free the event fifo in the /vpe-api shared-memory segment */ + oldheap = svm_push_data_heap (am->vlib_rp); + if (app->event_queue) + unix_shared_memory_queue_free (app->event_queue); + svm_pop_heap (oldheap); + + application_table_del (app); + + pool_put (app_pool, app); +} + +application_t * +application_new (application_type_t type, session_type_t sst, + u32 api_client_index, u32 flags, session_cb_vft_t * cb_fns) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + api_main_t *am = &api_main; + application_t *app; + void *oldheap; + session_manager_t *sm; + + pool_get (app_pool, app); + memset (app, 0, sizeof (*app)); + + /* Allocate event fifo in the /vpe-api shared-memory segment */ + oldheap = svm_push_data_heap (am->vlib_rp); + + /* Allocate server event queue */ + app->event_queue = + unix_shared_memory_queue_init (128 /* nels $$$$ config */ , + sizeof (session_fifo_event_t), + 0 /* consumer pid */ , + 0 + /* (do not) signal when queue non-empty */ + ); + + svm_pop_heap (oldheap); + + /* If a server, allocate session manager */ + if (type == APP_SERVER) + { + pool_get (smm->session_managers, sm); + memset (sm, 0, sizeof (*sm)); + + app->session_manager_index = sm - smm->session_managers; + } + else if (type == APP_CLIENT) + { + /* Allocate connect session manager if needed */ + if (smm->connect_manager_index[sst] == INVALID_INDEX) + connects_session_manager_init (smm, sst); + app->session_manager_index = smm->connect_manager_index[sst]; + } + + app->mode = type; + app->index = application_get_index (app); + app->session_type = sst; + app->api_client_index = api_client_index; + app->flags = flags; + app->cb_fns = *cb_fns; + + /* Add app to lookup by api_client_index table */ + application_table_add (app); + + return app; +} + +application_t * +application_get (u32 index) +{ + return pool_elt_at_index (app_pool, index); +} + +u32 +application_get_index (application_t * app) +{ + return app - app_pool; +} + +int +application_server_init (application_t * server, u32 segment_size, + u32 add_segment_size, u32 rx_fifo_size, + u32 tx_fifo_size, u8 ** segment_name) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + session_manager_t *sm; + int rv; + + sm = session_manager_get (server->session_manager_index); + + /* Add first segment */ + if ((rv = session_manager_add_first_segment (smm, sm, segment_size, + segment_name))) + { + return rv; + } + + /* Setup session manager */ + sm->add_segment_size = add_segment_size; + sm->rx_fifo_size = rx_fifo_size; + sm->tx_fifo_size = tx_fifo_size; + sm->add_segment = sm->add_segment_size != 0; + return 0; +} + +u8 * +format_application_server (u8 * s, va_list * args) +{ + application_t *srv = va_arg (*args, application_t *); + int verbose = va_arg (*args, int); + vl_api_registration_t *regp; + stream_session_t *listener; + u8 *server_name, *str, *seg_name; + u32 segment_size; + + if (srv == 0) + { + if (verbose) + s = format (s, "%-40s%-20s%-15s%-15s%-10s", "Connection", "Server", + "Segment", "API Client", "Cookie"); + else + s = format (s, "%-40s%-20s", "Connection", "Server"); + + return s; + } + + regp = vl_api_client_index_to_registration (srv->api_client_index); + if (!regp) + server_name = format (0, "%s%c", regp->name, 0); + else + server_name = regp->name; + + listener = stream_session_listener_get (srv->session_type, + srv->session_index); + str = format (0, "%U", format_stream_session, listener, verbose); + + session_manager_get_segment_info (listener->server_segment_index, &seg_name, + &segment_size); + if (verbose) + { + s = format (s, "%-40s%-20s%-20s%-10d%-10d", str, server_name, + seg_name, srv->api_client_index, srv->accept_cookie); + } + else + s = format (s, "%-40s%-20s", str, server_name); + return s; +} + +u8 * +format_application_client (u8 * s, va_list * args) +{ + application_t *client = va_arg (*args, application_t *); + int verbose = va_arg (*args, int); + stream_session_t *session; + u8 *str, *seg_name; + u32 segment_size; + + if (client == 0) + { + if (verbose) + s = + format (s, "%-40s%-20s%-10s", "Connection", "Segment", + "API Client"); + else + s = format (s, "%-40s", "Connection"); + + return s; + } + + session = stream_session_get (client->session_index, client->thread_index); + str = format (0, "%U", format_stream_session, session, verbose); + + session_manager_get_segment_info (session->server_segment_index, &seg_name, + &segment_size); + if (verbose) + { + s = format (s, "%-40s%-20s%-10d%", str, seg_name, + client->api_client_index); + } + else + s = format (s, "%-40s", str); + return s; +} + +static clib_error_t * +show_app_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + application_t *app; + int do_server = 0; + int do_client = 0; + int verbose = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "server")) + do_server = 1; + else if (unformat (input, "client")) + do_client = 1; + else if (unformat (input, "verbose")) + verbose = 1; + else + break; + } + + if (do_server) + { + if (pool_elts (app_pool)) + { + vlib_cli_output (vm, "%U", format_application_server, + 0 /* header */ , + verbose); + /* *INDENT-OFF* */ + pool_foreach (app, app_pool, + ({ + if (app->mode == APP_SERVER) + vlib_cli_output (vm, "%U", format_application_server, app, + verbose); + })); + /* *INDENT-ON* */ + } + else + vlib_cli_output (vm, "No active server bindings"); + } + + if (do_client) + { + if (pool_elts (app_pool)) + { + vlib_cli_output (vm, "%U", format_application_client, + 0 /* header */ , + verbose); + /* *INDENT-OFF* */ + pool_foreach (app, app_pool, + ({ + if (app->mode == APP_CLIENT) + vlib_cli_output (vm, "%U", format_application_client, app, + verbose); + })); + /* *INDENT-ON* */ + } + else + vlib_cli_output (vm, "No active server bindings"); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_app_command, static) = +{ +.path = "show app",.short_help = + "show app [server|client] [verbose]",.function = show_app_command_fn,}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h new file mode 100644 index 00000000..027d6967 --- /dev/null +++ b/src/vnet/session/application.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_VNET_SESSION_APPLICATION_H_ +#define SRC_VNET_SESSION_APPLICATION_H_ + +#include +#include + +typedef enum +{ + APP_SERVER, + APP_CLIENT +} application_type_t; + +typedef struct _stream_session_cb_vft +{ + /** Notify server of new segment */ + int (*add_segment_callback) (u32 api_client_index, const u8 * seg_name, + u32 seg_size); + + /** Notify server of newly accepted session */ + int (*session_accept_callback) (stream_session_t * new_session); + + /* Connection request callback */ + int (*session_connected_callback) (u32 api_client_index, + stream_session_t * s, u8 code); + + /** Notify app that session is closing */ + void (*session_disconnect_callback) (stream_session_t * s); + + /** Notify app that session was reset */ + void (*session_reset_callback) (stream_session_t * s); + + /* Direct RX callback, for built-in servers */ + int (*builtin_server_rx_callback) (stream_session_t * session); + + /* Redirect connection to local server */ + int (*redirect_connect_callback) (u32 api_client_index, void *mp); +} session_cb_vft_t; + +typedef struct _application +{ + /** Index in server pool */ + u32 index; + + /** Flags */ + u32 flags; + + /** Binary API connection index, ~0 if internal */ + u32 api_client_index; + + /* */ + u32 api_context; + + /** Application listens for events on this svm queue */ + unix_shared_memory_queue_t *event_queue; + + /** Stream session type */ + u8 session_type; + + /* Stream server mode: accept or connect */ + u8 mode; + + u32 session_manager_index; + + /* + * Bind/Listen specific + */ + + /** Accept cookie, for multiple session flavors ($$$ maybe) */ + u32 accept_cookie; + + /** Index of the listen session or connect session */ + u32 session_index; + + /** Session thread index for client connect sessions */ + u32 thread_index; + + /* + * Callbacks: shoulder-taps for the server/client + */ + session_cb_vft_t cb_fns; +} application_t; + +application_t *application_new (application_type_t type, session_type_t sst, + u32 api_client_index, u32 flags, + session_cb_vft_t * cb_fns); +void application_del (application_t * app); +application_t *application_get (u32 index); +application_t *application_lookup (u32 api_client_index); +u32 application_get_index (application_t * app); + +int +application_server_init (application_t * server, u32 segment_size, + u32 add_segment_size, u32 rx_fifo_size, + u32 tx_fifo_size, u8 ** segment_name); +int application_api_queue_is_full (application_t * app); + +#endif /* SRC_VNET_SESSION_APPLICATION_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c new file mode 100644 index 00000000..0ea77fd8 --- /dev/null +++ b/src/vnet/session/application_interface.c @@ -0,0 +1,459 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include +#include + +/** @file + VPP's application/session API bind/unbind/connect/disconnect calls +*/ + +static u8 +ip_is_zero (ip46_address_t * ip46_address, u8 is_ip4) +{ + if (is_ip4) + return (ip46_address->ip4.as_u32 == 0); + else + return (ip46_address->as_u64[0] == 0 && ip46_address->as_u64[1] == 0); +} + +static u8 +ip_is_local (ip46_address_t * ip46_address, u8 is_ip4) +{ + fib_node_index_t fei; + fib_entry_flag_t flags; + fib_prefix_t prefix; + + /* Check if requester is local */ + if (is_ip4) + { + prefix.fp_len = 32; + prefix.fp_proto = FIB_PROTOCOL_IP4; + } + else + { + prefix.fp_len = 128; + prefix.fp_proto = FIB_PROTOCOL_IP6; + } + + clib_memcpy (&prefix.fp_addr, ip46_address, sizeof (ip46_address)); + fei = fib_table_lookup (0, &prefix); + flags = fib_entry_get_flags (fei); + + return (flags & FIB_ENTRY_FLAG_LOCAL); +} + +int +api_parse_session_handle (u64 handle, u32 * session_index, u32 * thread_index) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + stream_session_t *pool; + + *thread_index = handle & 0xFFFFFFFF; + *session_index = handle >> 32; + + if (*thread_index >= vec_len (smm->sessions)) + return VNET_API_ERROR_INVALID_VALUE; + + pool = smm->sessions[*thread_index]; + + if (pool_is_free_index (pool, *session_index)) + return VNET_API_ERROR_INVALID_VALUE_2; + + return 0; +} + +int +vnet_bind_i (u32 api_client_index, ip46_address_t * ip46, u16 port_host_order, + session_type_t sst, u64 * options, session_cb_vft_t * cb_fns, + application_t ** app, u32 * len_seg_name, char *seg_name) +{ + u8 *segment_name = 0; + application_t *server = 0; + stream_session_t *listener; + u8 is_ip4; + + listener = + stream_session_lookup_listener (ip46, + clib_host_to_net_u16 (port_host_order), + sst); + + if (listener) + return VNET_API_ERROR_ADDRESS_IN_USE; + + if (application_lookup (api_client_index)) + { + clib_warning ("Only one bind supported for now"); + return VNET_API_ERROR_ADDRESS_IN_USE; + } + + is_ip4 = SESSION_TYPE_IP4_UDP == sst || SESSION_TYPE_IP4_TCP == sst; + if (!ip_is_zero (ip46, is_ip4) && !ip_is_local (ip46, is_ip4)) + return VNET_API_ERROR_INVALID_VALUE; + + /* Allocate and initialize stream server */ + server = application_new (APP_SERVER, sst, api_client_index, + options[SESSION_OPTIONS_FLAGS], cb_fns); + + application_server_init (server, options[SESSION_OPTIONS_SEGMENT_SIZE], + options[SESSION_OPTIONS_ADD_SEGMENT_SIZE], + options[SESSION_OPTIONS_RX_FIFO_SIZE], + options[SESSION_OPTIONS_TX_FIFO_SIZE], + &segment_name); + + /* Setup listen path down to transport */ + stream_session_start_listen (server->index, ip46, port_host_order); + + /* + * Return values + */ + + ASSERT (vec_len (segment_name) <= 128); + *len_seg_name = vec_len (segment_name); + memcpy (seg_name, segment_name, *len_seg_name); + *app = server; + + return 0; +} + +int +vnet_unbind_i (u32 api_client_index) +{ + application_t *server; + + /* + * Find the stream_server_t corresponding to the api client + */ + server = application_lookup (api_client_index); + if (!server) + return VNET_API_ERROR_INVALID_VALUE_2; + + /* Clear the listener */ + stream_session_stop_listen (server->index); + application_del (server); + + return 0; +} + +int +vnet_connect_i (u32 api_client_index, u32 api_context, session_type_t sst, + ip46_address_t * ip46, u16 port, u64 * options, void *mp, + session_cb_vft_t * cb_fns) +{ + stream_session_t *listener; + application_t *server, *app; + + /* + * Figure out if connecting to a local server + */ + listener = stream_session_lookup_listener (ip46, + clib_host_to_net_u16 (port), + sst); + if (listener) + { + server = application_get (listener->app_index); + + /* + * Server is willing to have a direct fifo connection created + * instead of going through the state machine, etc. + */ + if (server->flags & SESSION_OPTIONS_FLAGS_USE_FIFO) + return server->cb_fns. + redirect_connect_callback (server->api_client_index, mp); + } + + /* Create client app */ + app = application_new (APP_CLIENT, sst, api_client_index, + options[SESSION_OPTIONS_FLAGS], cb_fns); + + app->api_context = api_context; + + /* + * Not connecting to a local server. Create regular session + */ + stream_session_open (sst, ip46, port, app->index); + + return 0; +} + +/** + * unformat a vnet URI + * + * fifo://name + * tcp://ip46-addr:port + * udp://ip46-addr:port + * + * u8 ip46_address[16]; + * u16 port_in_host_byte_order; + * stream_session_type_t sst; + * u8 *fifo_name; + * + * if (unformat (input, "%U", unformat_vnet_uri, &ip46_address, + * &sst, &port, &fifo_name)) + * etc... + * + */ +uword +unformat_vnet_uri (unformat_input_t * input, va_list * args) +{ + ip46_address_t *address = va_arg (*args, ip46_address_t *); + session_type_t *sst = va_arg (*args, session_type_t *); + u16 *port = va_arg (*args, u16 *); + + if (unformat (input, "tcp://%U/%d", unformat_ip4_address, &address->ip4, + port)) + { + *sst = SESSION_TYPE_IP4_TCP; + return 1; + } + if (unformat (input, "udp://%U/%d", unformat_ip4_address, &address->ip4, + port)) + { + *sst = SESSION_TYPE_IP4_UDP; + return 1; + } + if (unformat (input, "udp://%U/%d", unformat_ip6_address, &address->ip6, + port)) + { + *sst = SESSION_TYPE_IP6_UDP; + return 1; + } + if (unformat (input, "tcp://%U/%d", unformat_ip6_address, &address->ip6, + port)) + { + *sst = SESSION_TYPE_IP6_TCP; + return 1; + } + + return 0; +} + +int +parse_uri (char *uri, session_type_t * sst, ip46_address_t * addr, + u16 * port_number_host_byte_order) +{ + unformat_input_t _input, *input = &_input; + + /* Make sure */ + uri = (char *) format (0, "%s%c", uri, 0); + + /* Parse uri */ + unformat_init_string (input, uri, strlen (uri)); + if (!unformat (input, "%U", unformat_vnet_uri, addr, sst, + port_number_host_byte_order)) + { + unformat_free (input); + return VNET_API_ERROR_INVALID_VALUE; + } + unformat_free (input); + + return 0; +} + +int +vnet_bind_uri (vnet_bind_args_t * a) +{ + application_t *server = 0; + u16 port_host_order; + session_type_t sst = SESSION_N_TYPES; + ip46_address_t ip46; + int rv; + + memset (&ip46, 0, sizeof (ip46)); + rv = parse_uri (a->uri, &sst, &ip46, &port_host_order); + if (rv) + return rv; + + if ((rv = vnet_bind_i (a->api_client_index, &ip46, port_host_order, sst, + a->options, a->session_cb_vft, &server, + &a->segment_name_length, a->segment_name))) + return rv; + + a->server_event_queue_address = (u64) server->event_queue; + return 0; +} + +session_type_t +session_type_from_proto_and_ip (session_api_proto_t proto, u8 is_ip4) +{ + if (proto == SESSION_PROTO_TCP) + { + if (is_ip4) + return SESSION_TYPE_IP4_TCP; + else + return SESSION_TYPE_IP6_TCP; + } + else + { + if (is_ip4) + return SESSION_TYPE_IP4_UDP; + else + return SESSION_TYPE_IP6_UDP; + } + + return SESSION_N_TYPES; +} + +int +vnet_unbind_uri (char *uri, u32 api_client_index) +{ + u16 port_number_host_byte_order; + session_type_t sst = SESSION_N_TYPES; + ip46_address_t ip46_address; + stream_session_t *listener; + int rv; + + rv = parse_uri (uri, &sst, &ip46_address, &port_number_host_byte_order); + if (rv) + return rv; + + listener = + stream_session_lookup_listener (&ip46_address, + clib_host_to_net_u16 + (port_number_host_byte_order), sst); + + if (!listener) + return VNET_API_ERROR_ADDRESS_NOT_IN_USE; + + /* External client? */ + if (api_client_index != ~0) + { + ASSERT (vl_api_client_index_to_registration (api_client_index)); + } + + return vnet_unbind_i (api_client_index); +} + +int +vnet_connect_uri (vnet_connect_args_t * a) +{ + ip46_address_t ip46_address; + u16 port; + session_type_t sst; + application_t *app; + int rv; + + app = application_lookup (a->api_client_index); + if (app) + { + clib_warning ("Already have a connect from this app"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + + /* Parse uri */ + rv = parse_uri (a->uri, &sst, &ip46_address, &port); + if (rv) + return rv; + + return vnet_connect_i (a->api_client_index, a->api_context, sst, + &ip46_address, port, a->options, a->mp, + a->session_cb_vft); +} + +int +vnet_disconnect_session (u32 client_index, u32 session_index, + u32 thread_index) +{ + stream_session_t *session; + + session = stream_session_get (session_index, thread_index); + stream_session_disconnect (session); + + return 0; +} + + +int +vnet_bind (vnet_bind_args_t * a) +{ + application_t *server = 0; + session_type_t sst = SESSION_N_TYPES; + int rv; + + sst = session_type_from_proto_and_ip (a->proto, a->tep.is_ip4); + if ((rv = vnet_bind_i (a->api_client_index, &a->tep.ip, a->tep.port, sst, + a->options, a->session_cb_vft, &server, + &a->segment_name_length, a->segment_name))) + return rv; + + a->server_event_queue_address = (u64) server->event_queue; + a->handle = (u64) a->tep.vrf << 32 | (u64) server->session_index; + return 0; +} + +int +vnet_unbind (vnet_unbind_args_t * a) +{ + application_t *server; + + if (a->api_client_index != ~0) + { + ASSERT (vl_api_client_index_to_registration (a->api_client_index)); + } + + /* Make sure this is the right one */ + server = application_lookup (a->api_client_index); + ASSERT (server->session_index == (0xFFFFFFFF & a->handle)); + + /* TODO use handle to disambiguate namespaces/vrfs */ + return vnet_unbind_i (a->api_client_index); +} + +int +vnet_connect (vnet_connect_args_t * a) +{ + session_type_t sst; + application_t *app; + + app = application_lookup (a->api_client_index); + if (app) + { + clib_warning ("Already have a connect from this app"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + + sst = session_type_from_proto_and_ip (a->proto, a->tep.is_ip4); + return vnet_connect_i (a->api_client_index, a->api_context, sst, &a->tep.ip, + a->tep.port, a->options, a->mp, a->session_cb_vft); +} + +int +vnet_disconnect (vnet_disconnect_args_t * a) +{ + stream_session_t *session; + u32 session_index, thread_index; + + if (api_parse_session_handle (a->handle, &session_index, &thread_index)) + { + clib_warning ("Invalid handle"); + return -1; + } + + session = stream_session_get (session_index, thread_index); + stream_session_disconnect (session); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h new file mode 100644 index 00000000..8d87c067 --- /dev/null +++ b/src/vnet/session/application_interface.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_uri_h__ +#define __included_uri_h__ + +#include +#include +#include +#include +#include +#include + +typedef enum _session_api_proto +{ + SESSION_PROTO_TCP, + SESSION_PROTO_UDP +} session_api_proto_t; + +typedef struct _vnet_bind_args_t +{ + union + { + char *uri; + struct + { + transport_endpoint_t tep; + session_api_proto_t proto; + }; + }; + + u32 api_client_index; + u64 *options; + session_cb_vft_t *session_cb_vft; + + /* + * Results + */ + char *segment_name; + u32 segment_name_length; + u64 server_event_queue_address; + u64 handle; +} vnet_bind_args_t; + +typedef struct _vnet_unbind_args_t +{ + union + { + char *uri; + u64 handle; + }; + u32 api_client_index; +} vnet_unbind_args_t; + +typedef struct _vnet_connect_args +{ + union + { + char *uri; + struct + { + transport_endpoint_t tep; + session_api_proto_t proto; + }; + }; + u32 api_client_index; + u32 api_context; + u64 *options; + session_cb_vft_t *session_cb_vft; + + /* Used for redirects */ + void *mp; +} vnet_connect_args_t; + +typedef struct _vnet_disconnect_args_t +{ + u64 handle; + u32 api_client_index; +} vnet_disconnect_args_t; + +/* Bind / connect options */ +typedef enum +{ + SESSION_OPTIONS_FLAGS, + SESSION_OPTIONS_SEGMENT_SIZE, + SESSION_OPTIONS_ADD_SEGMENT_SIZE, + SESSION_OPTIONS_RX_FIFO_SIZE, + SESSION_OPTIONS_TX_FIFO_SIZE, + SESSION_OPTIONS_ACCEPT_COOKIE, + SESSION_OPTIONS_N_OPTIONS +} session_options_index_t; + +/** Server can handle delegated connect requests from local clients */ +#define SESSION_OPTIONS_FLAGS_USE_FIFO (1<<0) + +/** Server wants vpp to add segments when out of memory for fifos */ +#define SESSION_OPTIONS_FLAGS_ADD_SEGMENT (1<<1) + +#define VNET_CONNECT_REDIRECTED 123 + +int vnet_bind_uri (vnet_bind_args_t *); +int vnet_unbind_uri (char *uri, u32 api_client_index); +int vnet_connect_uri (vnet_connect_args_t * a); +int +vnet_disconnect_session (u32 client_index, u32 session_index, + u32 thread_index); + +int vnet_bind (vnet_bind_args_t * a); +int vnet_connect (vnet_connect_args_t * a); +int vnet_unbind (vnet_unbind_args_t * a); +int vnet_disconnect (vnet_disconnect_args_t * a); + +int +api_parse_session_handle (u64 handle, u32 * session_index, + u32 * thread_index); + +#endif /* __included_uri_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/hashes.c b/src/vnet/session/hashes.c new file mode 100644 index 00000000..1808dd73 --- /dev/null +++ b/src/vnet/session/hashes.c @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Generate typed init functions for multiple hash table styles... */ + +#include +#include + +#include + +#undef __included_bihash_template_h__ + +#include +#include + +#include diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c new file mode 100644 index 00000000..e467f4e9 --- /dev/null +++ b/src/vnet/session/node.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include + +vlib_node_registration_t session_queue_node; + +typedef struct +{ + u32 session_index; + u32 server_thread_index; +} session_queue_trace_t; + +/* packet trace format function */ +static u8 * +format_session_queue_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + session_queue_trace_t *t = va_arg (*args, session_queue_trace_t *); + + s = format (s, "SESSION_QUEUE: session index %d, server thread index %d", + t->session_index, t->server_thread_index); + return s; +} + +vlib_node_registration_t session_queue_node; + +#define foreach_session_queue_error \ +_(TX, "Packets transmitted") \ +_(TIMER, "Timer events") + +typedef enum +{ +#define _(sym,str) SESSION_QUEUE_ERROR_##sym, + foreach_session_queue_error +#undef _ + SESSION_QUEUE_N_ERROR, +} session_queue_error_t; + +static char *session_queue_error_strings[] = { +#define _(sym,string) string, + foreach_session_queue_error +#undef _ +}; + +static u32 session_type_to_next[] = { + SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT, + SESSION_QUEUE_NEXT_IP4_LOOKUP, + SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT, + SESSION_QUEUE_NEXT_IP6_LOOKUP, +}; + +always_inline int +session_fifo_rx_i (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, int *n_tx_packets, + u8 peek_data) +{ + u32 n_trace = vlib_get_trace_count (vm, node); + u32 left_to_snd0, max_len_to_snd0, len_to_deq0, n_bufs, snd_space0; + u32 n_frame_bytes, n_frames_per_evt; + transport_connection_t *tc0; + transport_proto_vft_t *transport_vft; + u32 next_index, next0, *to_next, n_left_to_next, bi0; + vlib_buffer_t *b0; + u32 rx_offset; + u16 snd_mss0; + u8 *data0; + int i; + + next_index = next0 = session_type_to_next[s0->session_type]; + + transport_vft = session_get_transport_vft (s0->session_type); + tc0 = transport_vft->get_connection (s0->connection_index, thread_index); + + /* Make sure we have space to send and there's something to dequeue */ + snd_space0 = transport_vft->send_space (tc0); + snd_mss0 = transport_vft->send_mss (tc0); + + if (snd_space0 == 0 || svm_fifo_max_dequeue (s0->server_tx_fifo) == 0 + || snd_mss0 == 0) + return 0; + + ASSERT (e0->enqueue_length > 0); + + /* Ensure we're not writing more than transport window allows */ + max_len_to_snd0 = clib_min (e0->enqueue_length, snd_space0); + + if (peek_data) + { + /* Offset in rx fifo from where to peek data */ + rx_offset = transport_vft->rx_fifo_offset (tc0); + } + + /* TODO check if transport is willing to send len_to_snd0 + * bytes (Nagle) */ + + n_frame_bytes = snd_mss0 * VLIB_FRAME_SIZE; + n_frames_per_evt = ceil ((double) max_len_to_snd0 / n_frame_bytes); + + n_bufs = vec_len (smm->tx_buffers[thread_index]); + left_to_snd0 = max_len_to_snd0; + for (i = 0; i < n_frames_per_evt; i++) + { + /* Make sure we have at least one full frame of buffers ready */ + if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE)) + { + vec_validate (smm->tx_buffers[thread_index], + n_bufs + VLIB_FRAME_SIZE - 1); + n_bufs += + vlib_buffer_alloc (vm, &smm->tx_buffers[thread_index][n_bufs], + VLIB_FRAME_SIZE); + + /* buffer shortage + * XXX 0.9 because when debugging we might not get a full frame */ + if (PREDICT_FALSE (n_bufs < 0.9 * VLIB_FRAME_SIZE)) + { + /* Keep track of how much we've dequeued and exit */ + e0->enqueue_length -= max_len_to_snd0 - left_to_snd0; + return -1; + } + + _vec_len (smm->tx_buffers[thread_index]) = n_bufs; + } + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while (left_to_snd0 && n_left_to_next) + { + /* Get free buffer */ + n_bufs--; + bi0 = smm->tx_buffers[thread_index][n_bufs]; + _vec_len (smm->tx_buffers[thread_index]) = n_bufs; + + b0 = vlib_get_buffer (vm, bi0); + b0->error = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID + | VNET_BUFFER_LOCALLY_ORIGINATED; + b0->current_data = 0; + + /* RX on the local interface. tx in default fib */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + /* usual speculation, or the enqueue_x1 macro will barf */ + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + if (PREDICT_FALSE (n_trace > 0)) + { + session_queue_trace_t *t0; + vlib_trace_buffer (vm, node, next_index, b0, + 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + t0->session_index = s0->session_index; + t0->server_thread_index = s0->thread_index; + } + + if (1) + { + ELOG_TYPE_DECLARE (e) = + { + .format = "evt-dequeue: id %d length %d",.format_args = + "i4i4",}; + struct + { + u32 data[2]; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->data[0] = e0->event_id; + ed->data[1] = e0->enqueue_length; + } + + len_to_deq0 = (left_to_snd0 < snd_mss0) ? left_to_snd0 : snd_mss0; + + /* Make room for headers */ + data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN); + + /* Dequeue the data + * TODO 1) peek instead of dequeue + * 2) buffer chains */ + if (peek_data) + { + int n_bytes_read; + n_bytes_read = svm_fifo_peek (s0->server_tx_fifo, s0->pid, + rx_offset, len_to_deq0, data0); + if (n_bytes_read < 0) + goto dequeue_fail; + + /* Keep track of progress locally, transport is also supposed to + * increment it independently when pushing header */ + rx_offset += n_bytes_read; + } + else + { + if (svm_fifo_dequeue_nowait (s0->server_tx_fifo, s0->pid, + len_to_deq0, data0) < 0) + goto dequeue_fail; + } + + b0->current_length = len_to_deq0; + + /* Ask transport to push header */ + transport_vft->push_header (tc0, b0); + + left_to_snd0 -= len_to_deq0; + *n_tx_packets = *n_tx_packets + 1; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* If we couldn't dequeue all bytes store progress */ + if (max_len_to_snd0 < e0->enqueue_length) + { + e0->enqueue_length -= max_len_to_snd0; + vec_add1 (smm->evts_partially_read[thread_index], *e0); + } + return 0; + +dequeue_fail: + /* Can't read from fifo. Store event rx progress, save as partially read, + * return buff to free list and return */ + e0->enqueue_length -= max_len_to_snd0 - left_to_snd0; + vec_add1 (smm->evts_partially_read[thread_index], *e0); + + to_next -= 1; + n_left_to_next += 1; + _vec_len (smm->tx_buffers[thread_index]) += 1; + + clib_warning ("dequeue fail"); + return 0; +} + +int +session_fifo_rx_peek (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, int *n_tx_pkts) +{ + return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts, + 1); +} + +int +session_fifo_rx_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, stream_session_t * s0, + u32 thread_index, int *n_tx_pkts) +{ + return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts, + 0); +} + +static uword +session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + session_fifo_event_t *my_fifo_events, *e; + u32 n_to_dequeue; + unix_shared_memory_queue_t *q; + int n_tx_packets = 0; + u32 my_thread_index = vm->cpu_index; + int i, rv; + + /* + * Update TCP time + */ + tcp_update_time (vlib_time_now (vm), my_thread_index); + + /* + * Get vpp queue events + */ + q = smm->vpp_event_queues[my_thread_index]; + if (PREDICT_FALSE (q == 0)) + return 0; + + /* min number of events we can dequeue without blocking */ + n_to_dequeue = q->cursize; + if (n_to_dequeue == 0) + return 0; + + my_fifo_events = smm->fifo_events[my_thread_index]; + + /* If we didn't manage to process previous events try going + * over them again without dequeuing new ones. + * XXX: Block senders to sessions that can't keep up */ + if (vec_len (my_fifo_events) >= 100) + goto skip_dequeue; + + /* See you in the next life, don't be late */ + if (pthread_mutex_trylock (&q->mutex)) + return 0; + + for (i = 0; i < n_to_dequeue; i++) + { + vec_add2 (my_fifo_events, e, 1); + unix_shared_memory_queue_sub_raw (q, (u8 *) e); + } + + /* The other side of the connection is not polling */ + if (q->cursize < (q->maxsize / 8)) + (void) pthread_cond_broadcast (&q->condvar); + pthread_mutex_unlock (&q->mutex); + + smm->fifo_events[my_thread_index] = my_fifo_events; + +skip_dequeue: + + for (i = 0; i < n_to_dequeue; i++) + { + svm_fifo_t *f0; /* $$$ prefetch 1 ahead maybe */ + stream_session_t *s0; + u32 server_session_index0, server_thread_index0; + session_fifo_event_t *e0; + + e0 = &my_fifo_events[i]; + f0 = e0->fifo; + server_session_index0 = f0->server_session_index; + server_thread_index0 = f0->server_thread_index; + + /* $$$ add multiple event queues, per vpp worker thread */ + ASSERT (server_thread_index0 == my_thread_index); + + s0 = pool_elt_at_index (smm->sessions[my_thread_index], + server_session_index0); + + ASSERT (s0->thread_index == my_thread_index); + + switch (e0->event_type) + { + case FIFO_EVENT_SERVER_TX: + /* Spray packets in per session type frames, since they go to + * different nodes */ + rv = (smm->session_rx_fns[s0->session_type]) (vm, node, smm, e0, s0, + my_thread_index, + &n_tx_packets); + if (rv < 0) + goto done; + + break; + + default: + clib_warning ("unhandled event type %d", e0->event_type); + } + } + +done: + + /* Couldn't process all events. Probably out of buffers */ + if (PREDICT_FALSE (i < n_to_dequeue)) + { + session_fifo_event_t *partially_read = + smm->evts_partially_read[my_thread_index]; + vec_add (partially_read, &my_fifo_events[i], n_to_dequeue - i); + vec_free (my_fifo_events); + smm->fifo_events[my_thread_index] = partially_read; + smm->evts_partially_read[my_thread_index] = 0; + } + else + { + vec_free (smm->fifo_events[my_thread_index]); + smm->fifo_events[my_thread_index] = + smm->evts_partially_read[my_thread_index]; + smm->evts_partially_read[my_thread_index] = 0; + } + + vlib_node_increment_counter (vm, session_queue_node.index, + SESSION_QUEUE_ERROR_TX, n_tx_packets); + + return n_tx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (session_queue_node) = +{ + .function = session_queue_node_fn, + .name = "session-queue", + .format_trace = format_session_queue_trace, + .type = VLIB_NODE_TYPE_INPUT, + .n_errors = ARRAY_LEN (session_queue_error_strings), + .error_strings = session_queue_error_strings, + .n_next_nodes = SESSION_QUEUE_N_NEXT, + /* .state = VLIB_NODE_STATE_DISABLED, enable on-demand? */ + /* edit / add dispositions here */ + .next_nodes = + { + [SESSION_QUEUE_NEXT_DROP] = "error-drop", + [SESSION_QUEUE_NEXT_IP4_LOOKUP] = "ip4-lookup", + [SESSION_QUEUE_NEXT_IP6_LOOKUP] = "ip6-lookup", + [SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT] = "tcp4-output", + [SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT] = "tcp6-output", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api new file mode 100644 index 00000000..a7b28c1d --- /dev/null +++ b/src/vnet/session/session.api @@ -0,0 +1,429 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /** \brief Bind to a given URI + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param accept_cookie - sender accept cookie, to identify this bind flavor + @param uri - a URI, e.g. "tcp://0.0.0.0/0/80" [ipv4] + "tcp://::/0/80" [ipv6] etc. + @param options - socket options, fifo sizes, etc. +*/ +define bind_uri { + u32 client_index; + u32 context; + u32 accept_cookie; + u32 initial_segment_size; + u8 uri[128]; + u64 options[16]; +}; + +/** \brief Unbind a given URI + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param uri - a URI, e.g. "tcp://0.0.0.0/0/80" [ipv4] + "tcp://::/0/80" [ipv6], etc. + @param options - socket options, fifo sizes, etc. +*/ +define unbind_uri { + u32 client_index; + u32 context; + u8 uri[128]; +}; + +/** \brief Connect to a given URI + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param accept_cookie - sender accept cookie, to identify this bind flavor + @param uri - a URI, e.g. "tcp4://0.0.0.0/0/80" + "tcp6://::/0/80" [ipv6], etc. + @param options - socket options, fifo sizes, etc. +*/ +define connect_uri { + u32 client_index; + u32 context; + u8 uri[128]; + u64 client_queue_address; + u64 options[16]; +}; + +/** \brief Bind reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param event_queue_address - vpp event queue address or 0 if this + connection shouldn't send events + @param segment_name_length - length of segment name + @param segment_name - name of segment client needs to attach to +*/ +define bind_uri_reply { + u32 context; + i32 retval; + u64 server_event_queue_address; + u8 segment_name_length; + u32 segment_size; + u8 segment_name[128]; +}; + +/** \brief unbind reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define unbind_uri_reply { + u32 context; + i32 retval; +}; + +/** \brief vpp->client, connect reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param server_rx_fifo - rx (vpp -> vpp-client) fifo address + @param server_tx_fifo - tx (vpp-client -> vpp) fifo address + @param session_index - session index; + @param session_thread_index - session thread index + @param session_type - session thread type + @param vpp_event_queue_address - vpp's event queue address + @param client_event_queue_address - client's event queue address + @param segment_name_length - non-zero if the client needs to attach to + the fifo segment + @param segment_name - set if the client needs to attach to the segment +*/ +define connect_uri_reply { + u32 context; + i32 retval; + u64 server_rx_fifo; + u64 server_tx_fifo; + u32 session_index; + u32 session_thread_index; + u8 session_type; + u64 client_event_queue_address; + u64 vpp_event_queue_address; + u32 segment_size; + u8 segment_name_length; + u8 segment_name[128]; +}; + +/** \brief vpp->client, please map an additional shared memory segment + @param context - sender context, to match reply w/ request + @param segment_name - +*/ +define map_another_segment { + u32 client_index; + u32 context; + u32 segment_size; + u8 segment_name[128]; +}; + +/** \brief client->vpp + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define map_another_segment_reply { + u32 context; + i32 retval; +}; + +/** \brief vpp->client, accept this session + @param context - sender context, to match reply w/ request + @param accept_cookie - tells client which bind flavor just occurred + @param rx_fifo_address - rx (vpp -> vpp-client) fifo address + @param tx_fifo_address - tx (vpp-client -> vpp) fifo address + @param session_index - index of new session + @param session_thread_index - thread index of new session + @param vpp_event_queue_address - vpp's event queue address + @param session_type - type of session + +*/ +define accept_session { + u32 client_index; + u32 context; + u32 accept_cookie; + u64 server_rx_fifo; + u64 server_tx_fifo; + u32 session_index; + u32 session_thread_index; + u64 vpp_event_queue_address; + u8 session_type; +}; + +/** \brief client->vpp, reply to an accept message + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param session_index - session index from accept_session / connect_reply + @param session_thread_index - thread index from accept_session / + connect_reply +*/ +define accept_session_reply { + u32 context; + i32 retval; + u8 session_type; + u8 session_thread_index; + u32 session_index; +}; + +/** \brief bidirectional disconnect API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param session_index - cookie #1 from accept_session / connect_reply + @param session_thread_index - cookie #2 +*/ +define disconnect_session { + u32 client_index; + u32 context; + u32 session_index; + u32 session_thread_index; +}; + +/** \brief bidirectional disconnect reply API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param session_index - session index from accept_session / connect_reply + @param session_thread_index - thread index from accept_session / + connect_reply +*/ +define disconnect_session_reply { + u32 client_index; + u32 context; + i32 retval; + u32 session_index; + u32 session_thread_index; +}; + +/** \brief vpp->client reset session API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param session_index - session index from accept_session / connect_reply + @param session_thread_index - thread index from accept_session / + connect_reply +*/ +define reset_session { + u32 client_index; + u32 context; + u32 session_index; + u32 session_thread_index; +}; + +/** \brief client->vpp reset session reply + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param session_index - session index from accept_session / connect_reply + @param session_thread_index - thread index from accept_session / + connect_reply +*/ +define reset_session_reply { + u32 client_index; + u32 context; + i32 retval; + u32 session_index; + u32 session_thread_index; +}; + +/** \brief Bind to an ip:port pair for a given transport protocol + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf - bind namespace + @param is_ip4 - flag that is 1 if ip address family is IPv4 + @param ip - ip address + @param port - port + @param proto - protocol 0 - TCP 1 - UDP + @param options - socket options, fifo sizes, etc. +*/ +define bind_sock { + u32 client_index; + u32 context; + u32 vrf; + u8 is_ip4; + u8 ip[16]; + u16 port; + u8 proto; + u64 options[16]; +}; + +/** \brief Unbind + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param handle - bind handle obtained from bind reply +*/ +define unbind_sock { + u32 client_index; + u32 context; + u64 handle; +}; + +/** \brief Connect to a remote peer + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf - connection namespace + @param is_ip4 - flag that is 1 if ip address family is IPv4 + @param ip - ip address + @param port - port + @param proto - protocol 0 - TCP 1 - UDP + @param client_queue_address - client's API queue address. Non-zero when + used to perform redirects + @param options - socket options, fifo sizes, etc. +*/ +define connect_sock { + u32 client_index; + u32 context; + u32 vrf; + u8 is_ip4; + u8 ip[16]; + u16 port; + u8 proto; + u64 client_queue_address; + u64 options[16]; +}; + +/** \brief Bind reply + @param context - sender context, to match reply w/ request + @param handle - bind handle + @param retval - return code for the request + @param event_queue_address - vpp event queue address or 0 if this + connection shouldn't send events + @param segment_name_length - length of segment name + @param segment_name - name of segment client needs to attach to +*/ +define bind_sock_reply { + u32 context; + u64 handle; + i32 retval; + u64 server_event_queue_address; + u32 segment_size; + u8 segment_name_length; + u8 segment_name[128]; +}; + +/** \brief unbind reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define unbind_sock_reply { + u32 context; + i32 retval; +}; + +/** \brief vpp/server->client, connect reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param handle - connection handle + @param server_rx_fifo - rx (vpp -> vpp-client) fifo address + @param server_tx_fifo - tx (vpp-client -> vpp) fifo address + @param vpp_event_queue_address - vpp's event queue address + @param client_event_queue_address - client's event queue address + @param segment_name_length - non-zero if the client needs to attach to + the fifo segment + @param segment_name - set if the client needs to attach to the segment +*/ +define connect_sock_reply { + u32 context; + i32 retval; + u64 handle; + u64 server_rx_fifo; + u64 server_tx_fifo; + u64 client_event_queue_address; + u64 vpp_event_queue_address; + u32 segment_size; + u8 segment_name_length; + u8 segment_name[128]; +}; + +/** \brief bidirectional disconnect API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param handle - session handle obtained through accept/connect +*/ +define disconnect_sock { + u32 client_index; + u32 context; + u64 handle; +}; + +/** \brief bidirectional disconnect reply API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param client_context - sender context, to match reply w/ request + @param handle - session handle obtained through accept/connect +*/ +define disconnect_sock_reply { + u32 client_index; + u32 context; + i32 retval; + u64 handle; +}; + +/** \brief vpp->client, accept this session + @param context - sender context, to match reply w/ request + @param accept_cookie - tells client which bind flavor just occurred + @param handle - session handle obtained through accept/connect + @param rx_fifo_address - rx (vpp -> vpp-client) fifo address + @param tx_fifo_address - tx (vpp-client -> vpp) fifo address + @param vpp_event_queue_address - vpp's event queue address +*/ +define accept_sock { + u32 client_index; + u32 context; + u32 accept_cookie; + u64 handle; + u64 server_rx_fifo; + u64 server_tx_fifo; + u64 vpp_event_queue_address; +}; + +/** \brief client->vpp, reply to an accept message + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param handle - session handle obtained through accept/connect +*/ +define accept_sock_reply { + u32 context; + i32 retval; + u64 handle; +}; + +/** \brief vpp->client reset session API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param handle - session handle obtained through accept/connect +*/ +define reset_sock { + u32 client_index; + u32 context; + u64 handle; +}; + +/** \brief client->vpp reset session reply + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param handle - session handle obtained through accept/connect +*/ +define reset_sock_reply { + u32 client_index; + u32 context; + i32 retval; + u64 handle; +}; +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ \ No newline at end of file diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c new file mode 100644 index 00000000..539da613 --- /dev/null +++ b/src/vnet/session/session.c @@ -0,0 +1,1286 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief Session and session manager + */ + +#include +#include +#include +#include +#include + +/** + * Per-type vector of transport protocol virtual function tables + */ +static transport_proto_vft_t *tp_vfts; + +session_manager_main_t session_manager_main; + +/* + * Session lookup key; (src-ip, dst-ip, src-port, dst-port, session-type) + * Value: (owner thread index << 32 | session_index); + */ +static void +stream_session_table_add_for_tc (u8 sst, transport_connection_t * tc, + u64 value) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + kv4.value = value; + clib_bihash_add_del_16_8 (&smm->v4_session_hash, &kv4, 1 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + kv6.value = value; + clib_bihash_add_del_48_8 (&smm->v6_session_hash, &kv6, 1 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } +} + +void +stream_session_table_add (session_manager_main_t * smm, stream_session_t * s, + u64 value) +{ + transport_connection_t *tc; + + tc = tp_vfts[s->session_type].get_connection (s->connection_index, + s->thread_index); + stream_session_table_add_for_tc (s->session_type, tc, value); +} + +static void +stream_session_half_open_table_add (u8 sst, transport_connection_t * tc, + u64 value) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + kv4.value = value; + clib_bihash_add_del_16_8 (&smm->v4_half_open_hash, &kv4, + 1 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + kv6.value = value; + clib_bihash_add_del_48_8 (&smm->v6_half_open_hash, &kv6, + 1 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } +} + +static int +stream_session_table_del_for_tc (session_manager_main_t * smm, u8 sst, + transport_connection_t * tc) +{ + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + return clib_bihash_add_del_16_8 (&smm->v4_session_hash, &kv4, + 0 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + return clib_bihash_add_del_48_8 (&smm->v6_session_hash, &kv6, + 0 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } + + return 0; +} + +static int +stream_session_table_del (session_manager_main_t * smm, stream_session_t * s) +{ + transport_connection_t *ts; + + ts = tp_vfts[s->session_type].get_connection (s->connection_index, + s->thread_index); + return stream_session_table_del_for_tc (smm, s->session_type, ts); +} + +static void +stream_session_half_open_table_del (session_manager_main_t * smm, u8 sst, + transport_connection_t * tc) +{ + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + clib_bihash_add_del_16_8 (&smm->v4_half_open_hash, &kv4, + 0 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + clib_bihash_add_del_48_8 (&smm->v6_half_open_hash, &kv6, + 0 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } +} + +stream_session_t * +stream_session_lookup_listener4 (ip4_address_t * lcl, u16 lcl_port, u8 proto) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv4_t kv4; + int rv; + + make_v4_listener_kv (&kv4, lcl, lcl_port, proto); + rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); + if (rv == 0) + return pool_elt_at_index (smm->listen_sessions[proto], (u32) kv4.value); + + /* Zero out the lcl ip */ + kv4.key[0] = 0; + rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); + if (rv == 0) + return pool_elt_at_index (smm->listen_sessions[proto], kv4.value); + + return 0; +} + +/** Looks up a session based on the 5-tuple passed as argument. + * + * First it tries to find an established session, if this fails, it tries + * finding a listener session if this fails, it tries a lookup with a + * wildcarded local source (listener bound to all interfaces) + */ +stream_session_t * +stream_session_lookup4 (ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv4_t kv4; + int rv; + + /* Lookup session amongst established ones */ + make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); + if (rv == 0) + return stream_session_get_tsi (kv4.value, my_thread_index); + + /* If nothing is found, check if any listener is available */ + return stream_session_lookup_listener4 (lcl, lcl_port, proto); +} + +stream_session_t * +stream_session_lookup_listener6 (ip6_address_t * lcl, u16 lcl_port, u8 proto) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv6_t kv6; + int rv; + + make_v6_listener_kv (&kv6, lcl, lcl_port, proto); + rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); + if (rv == 0) + return pool_elt_at_index (smm->listen_sessions[proto], kv6.value); + + /* Zero out the lcl ip */ + kv6.key[0] = kv6.key[1] = 0; + rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); + if (rv == 0) + return pool_elt_at_index (smm->listen_sessions[proto], kv6.value); + + return 0; +} + +/* Looks up a session based on the 5-tuple passed as argument. + * First it tries to find an established session, if this fails, it tries + * finding a listener session if this fails, it tries a lookup with a + * wildcarded local source (listener bound to all interfaces) */ +stream_session_t * +stream_session_lookup6 (ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + session_kv6_t kv6; + int rv; + + make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); + if (rv == 0) + return stream_session_get_tsi (kv6.value, my_thread_index); + + /* If nothing is found, check if any listener is available */ + return stream_session_lookup_listener6 (lcl, lcl_port, proto); +} + +stream_session_t * +stream_session_lookup_listener (ip46_address_t * lcl, u16 lcl_port, u8 proto) +{ + switch (proto) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + return stream_session_lookup_listener4 (&lcl->ip4, lcl_port, proto); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + return stream_session_lookup_listener6 (&lcl->ip6, lcl_port, proto); + break; + } + return 0; +} + +static u64 +stream_session_half_open_lookup (session_manager_main_t * smm, + ip46_address_t * lcl, ip46_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + session_kv4_t kv4; + session_kv6_t kv6; + int rv; + + switch (proto) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv (&kv4, &lcl->ip4, &rmt->ip4, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4); + + if (rv == 0) + return kv4.value; + + return (u64) ~ 0; + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv (&kv6, &lcl->ip6, &rmt->ip6, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6); + + if (rv == 0) + return kv6.value; + + return (u64) ~ 0; + break; + } + return 0; +} + +transport_connection_t * +stream_session_lookup_transport4 (session_manager_main_t * smm, + ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + session_kv4_t kv4; + stream_session_t *s; + int rv; + + /* Lookup session amongst established ones */ + make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); + if (rv == 0) + { + s = stream_session_get_tsi (kv4.value, my_thread_index); + + return tp_vfts[s->session_type].get_connection (s->connection_index, + my_thread_index); + } + + /* If nothing is found, check if any listener is available */ + s = stream_session_lookup_listener4 (lcl, lcl_port, proto); + if (s) + return tp_vfts[s->session_type].get_listener (s->connection_index); + + /* Finally, try half-open connections */ + rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4); + if (rv == 0) + return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF); + + return 0; +} + +transport_connection_t * +stream_session_lookup_transport6 (session_manager_main_t * smm, + ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + stream_session_t *s; + session_kv6_t kv6; + int rv; + + make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); + if (rv == 0) + { + s = stream_session_get_tsi (kv6.value, my_thread_index); + + return tp_vfts[s->session_type].get_connection (s->connection_index, + my_thread_index); + } + + /* If nothing is found, check if any listener is available */ + s = stream_session_lookup_listener6 (lcl, lcl_port, proto); + if (s) + return tp_vfts[s->session_type].get_listener (s->connection_index); + + /* Finally, try half-open connections */ + rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6); + if (rv == 0) + return tp_vfts[s->session_type].get_half_open (kv6.value & 0xFFFFFFFF); + + return 0; +} + +/** + * Allocate vpp event queue (once) per worker thread + */ +void +vpp_session_event_queue_allocate (session_manager_main_t * smm, + u32 thread_index) +{ + api_main_t *am = &api_main; + void *oldheap; + + if (smm->vpp_event_queues[thread_index] == 0) + { + /* Allocate event fifo in the /vpe-api shared-memory segment */ + oldheap = svm_push_data_heap (am->vlib_rp); + + smm->vpp_event_queues[thread_index] = + unix_shared_memory_queue_init (2048 /* nels $$$$ config */ , + sizeof (session_fifo_event_t), + 0 /* consumer pid */ , + 0 + /* (do not) send signal when queue non-empty */ + ); + + svm_pop_heap (oldheap); + } +} + +void +session_manager_get_segment_info (u32 index, u8 ** name, u32 * size) +{ + svm_fifo_segment_private_t *s; + s = svm_fifo_get_segment (index); + *name = s->h->segment_name; + *size = s->ssvm.ssvm_size; +} + +always_inline int +session_manager_add_segment_i (session_manager_main_t * smm, + session_manager_t * sm, + u32 segment_size, u8 * segment_name) +{ + svm_fifo_segment_create_args_t _ca, *ca = &_ca; + int rv; + + memset (ca, 0, sizeof (*ca)); + + ca->segment_name = (char *) segment_name; + ca->segment_size = segment_size; + + rv = svm_fifo_segment_create (ca); + if (rv) + { + clib_warning ("svm_fifo_segment_create ('%s', %d) failed", + ca->segment_name, ca->segment_size); + vec_free (segment_name); + return -1; + } + + vec_add1 (sm->segment_indices, ca->new_segment_index); + + return 0; +} + +static int +session_manager_add_segment (session_manager_main_t * smm, + session_manager_t * sm) +{ + u8 *segment_name; + svm_fifo_segment_create_args_t _ca, *ca = &_ca; + u32 add_segment_size; + u32 default_segment_size = 128 << 10; + + memset (ca, 0, sizeof (*ca)); + segment_name = format (0, "%d-%d%c", getpid (), + smm->unique_segment_name_counter++, 0); + add_segment_size = + sm->add_segment_size ? sm->add_segment_size : default_segment_size; + + return session_manager_add_segment_i (smm, sm, add_segment_size, + segment_name); +} + +int +session_manager_add_first_segment (session_manager_main_t * smm, + session_manager_t * sm, u32 segment_size, + u8 ** segment_name) +{ + svm_fifo_segment_create_args_t _ca, *ca = &_ca; + memset (ca, 0, sizeof (*ca)); + *segment_name = format (0, "%d-%d%c", getpid (), + smm->unique_segment_name_counter++, 0); + return session_manager_add_segment_i (smm, sm, segment_size, *segment_name); +} + +void +session_manager_del (session_manager_main_t * smm, session_manager_t * sm) +{ + u32 *deleted_sessions = 0; + u32 *deleted_thread_indices = 0; + int i, j; + + /* Across all fifo segments used by the server */ + for (j = 0; j < vec_len (sm->segment_indices); j++) + { + svm_fifo_segment_private_t *fifo_segment; + svm_fifo_t **fifos; + /* Vector of fifos allocated in the segment */ + fifo_segment = svm_fifo_get_segment (sm->segment_indices[j]); + fifos = (svm_fifo_t **) fifo_segment->h->fifos; + + /* + * Remove any residual sessions from the session lookup table + * Don't bother deleting the individual fifos, we're going to + * throw away the fifo segment in a minute. + */ + for (i = 0; i < vec_len (fifos); i++) + { + svm_fifo_t *fifo; + u32 session_index, thread_index; + stream_session_t *session; + + fifo = fifos[i]; + session_index = fifo->server_session_index; + thread_index = fifo->server_thread_index; + + session = pool_elt_at_index (smm->sessions[thread_index], + session_index); + + /* Add to the deleted_sessions vector (once!) */ + if (!session->is_deleted) + { + session->is_deleted = 1; + vec_add1 (deleted_sessions, + session - smm->sessions[thread_index]); + vec_add1 (deleted_thread_indices, thread_index); + } + } + + for (i = 0; i < vec_len (deleted_sessions); i++) + { + stream_session_t *session; + + session = + pool_elt_at_index (smm->sessions[deleted_thread_indices[i]], + deleted_sessions[i]); + + /* Instead of directly removing the session call disconnect */ + stream_session_disconnect (session); + + /* + stream_session_table_del (smm, session); + pool_put(smm->sessions[deleted_thread_indices[i]], session); + */ + } + + vec_reset_length (deleted_sessions); + vec_reset_length (deleted_thread_indices); + + /* Instead of removing the segment, test when removing the session if + * the segment can be removed + */ + /* svm_fifo_segment_delete (fifo_segment); */ + } + + vec_free (deleted_sessions); + vec_free (deleted_thread_indices); +} + +int +session_manager_allocate_session_fifos (session_manager_main_t * smm, + session_manager_t * sm, + svm_fifo_t ** server_rx_fifo, + svm_fifo_t ** server_tx_fifo, + u32 * fifo_segment_index, + u8 * added_a_segment) +{ + svm_fifo_segment_private_t *fifo_segment; + u32 fifo_size, default_fifo_size = 8192 /* TODO config */ ; + int i; + + *added_a_segment = 0; + + /* Allocate svm fifos */ + ASSERT (vec_len (sm->segment_indices)); + +again: + for (i = 0; i < vec_len (sm->segment_indices); i++) + { + *fifo_segment_index = sm->segment_indices[i]; + fifo_segment = svm_fifo_get_segment (*fifo_segment_index); + + fifo_size = sm->rx_fifo_size; + fifo_size = (fifo_size == 0) ? default_fifo_size : fifo_size; + *server_rx_fifo = svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size); + + fifo_size = sm->tx_fifo_size; + fifo_size = (fifo_size == 0) ? default_fifo_size : fifo_size; + *server_tx_fifo = svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size); + + if (*server_rx_fifo == 0) + { + /* This would be very odd, but handle it... */ + if (*server_tx_fifo != 0) + { + svm_fifo_segment_free_fifo (fifo_segment, *server_tx_fifo); + *server_tx_fifo = 0; + } + continue; + } + if (*server_tx_fifo == 0) + { + if (*server_rx_fifo != 0) + { + svm_fifo_segment_free_fifo (fifo_segment, *server_rx_fifo); + *server_rx_fifo = 0; + } + continue; + } + break; + } + + /* See if we're supposed to create another segment */ + if (*server_rx_fifo == 0) + { + if (sm->add_segment) + { + if (*added_a_segment) + { + clib_warning ("added a segment, still cant allocate a fifo"); + return SESSION_ERROR_NEW_SEG_NO_SPACE; + } + + if (session_manager_add_segment (smm, sm)) + return VNET_API_ERROR_URI_FIFO_CREATE_FAILED; + + *added_a_segment = 1; + goto again; + } + else + return SESSION_ERROR_NO_SPACE; + } + return 0; +} + +int +stream_session_create_i (session_manager_main_t * smm, application_t * app, + transport_connection_t * tc, + stream_session_t ** ret_s) +{ + int rv; + svm_fifo_t *server_rx_fifo = 0, *server_tx_fifo = 0; + u32 fifo_segment_index; + u32 pool_index, seg_size; + stream_session_t *s; + u64 value; + u32 thread_index = tc->thread_index; + session_manager_t *sm; + u8 segment_added; + u8 *seg_name; + + sm = session_manager_get (app->session_manager_index); + + /* Check the API queue */ + if (app->mode == APP_SERVER && application_api_queue_is_full (app)) + return SESSION_ERROR_API_QUEUE_FULL; + + if ((rv = session_manager_allocate_session_fifos (smm, sm, &server_rx_fifo, + &server_tx_fifo, + &fifo_segment_index, + &segment_added))) + return rv; + + if (segment_added && app->mode == APP_SERVER) + { + /* Send an API message to the external server, to map new segment */ + ASSERT (app->cb_fns.add_segment_callback); + + session_manager_get_segment_info (fifo_segment_index, &seg_name, + &seg_size); + if (app->cb_fns.add_segment_callback (app->api_client_index, seg_name, + seg_size)) + return VNET_API_ERROR_URI_FIFO_CREATE_FAILED; + } + + /* Create the session */ + pool_get (smm->sessions[thread_index], s); + memset (s, 0, sizeof (*s)); + + /* Initialize backpointers */ + pool_index = s - smm->sessions[thread_index]; + server_rx_fifo->server_session_index = pool_index; + server_rx_fifo->server_thread_index = thread_index; + + server_tx_fifo->server_session_index = pool_index; + server_tx_fifo->server_thread_index = thread_index; + + s->server_rx_fifo = server_rx_fifo; + s->server_tx_fifo = server_tx_fifo; + + /* Initialize state machine, such as it is... */ + s->session_type = app->session_type; + s->session_state = SESSION_STATE_CONNECTING; + s->app_index = application_get_index (app); + s->server_segment_index = fifo_segment_index; + s->thread_index = thread_index; + s->session_index = pool_index; + + /* Attach transport to session */ + s->connection_index = tc->c_index; + + /* Attach session to transport */ + tc->s_index = s->session_index; + + /* Add to the main lookup table */ + value = (((u64) thread_index) << 32) | (u64) s->session_index; + stream_session_table_add_for_tc (app->session_type, tc, value); + + *ret_s = s; + + return 0; +} + +/* + * Enqueue data for delivery to session peer. Does not notify peer of enqueue + * event but on request can queue notification events for later delivery by + * calling stream_server_flush_enqueue_events(). + * + * @param tc Transport connection which is to be enqueued data + * @param data Data to be enqueued + * @param len Length of data to be enqueued + * @param queue_event Flag to indicate if peer is to be notified or if event + * is to be queued. The former is useful when more data is + * enqueued and only one event is to be generated. + * @return Number of bytes enqueued or a negative value if enqueueing failed. + */ +int +stream_session_enqueue_data (transport_connection_t * tc, u8 * data, u16 len, + u8 queue_event) +{ + stream_session_t *s; + int enqueued; + + s = stream_session_get (tc->s_index, tc->thread_index); + + /* Make sure there's enough space left. We might've filled the pipes */ + if (PREDICT_FALSE (len > svm_fifo_max_enqueue (s->server_rx_fifo))) + return -1; + + enqueued = svm_fifo_enqueue_nowait (s->server_rx_fifo, s->pid, len, data); + + if (queue_event) + { + /* Queue RX event on this fifo. Eventually these will need to be flushed + * by calling stream_server_flush_enqueue_events () */ + session_manager_main_t *smm = vnet_get_session_manager_main (); + u32 thread_index = s->thread_index; + u32 my_enqueue_epoch = smm->current_enqueue_epoch[thread_index]; + + if (s->enqueue_epoch != my_enqueue_epoch) + { + s->enqueue_epoch = my_enqueue_epoch; + vec_add1 (smm->session_indices_to_enqueue_by_thread[thread_index], + s - smm->sessions[thread_index]); + } + } + + return enqueued; +} + +/** Check if we have space in rx fifo to push more bytes */ +u8 +stream_session_no_space (transport_connection_t * tc, u32 thread_index, + u16 data_len) +{ + stream_session_t *s = stream_session_get (tc->c_index, thread_index); + + if (PREDICT_FALSE (s->session_state != SESSION_STATE_READY)) + return 1; + + if (data_len > svm_fifo_max_enqueue (s->server_rx_fifo)) + return 1; + + return 0; +} + +u32 +stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer, + u32 offset, u32 max_bytes) +{ + stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index); + return svm_fifo_peek (s->server_tx_fifo, s->pid, offset, max_bytes, buffer); +} + +u32 +stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes) +{ + stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index); + return svm_fifo_dequeue_drop (s->server_tx_fifo, s->pid, max_bytes); +} + +/** + * Notify session peer that new data has been enqueued. + * + * @param s Stream session for which the event is to be generated. + * @param block Flag to indicate if call should block if event queue is full. + * + * @return 0 on succes or negative number if failed to send notification. + */ +static int +stream_session_enqueue_notify (stream_session_t * s, u8 block) +{ + application_t *app; + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + static u32 serial_number; + + if (PREDICT_FALSE (s->session_state == SESSION_STATE_CLOSED)) + return 0; + + /* Get session's server */ + app = application_get (s->app_index); + + /* Fabricate event */ + evt.fifo = s->server_rx_fifo; + evt.event_type = FIFO_EVENT_SERVER_RX; + evt.event_id = serial_number++; + evt.enqueue_length = svm_fifo_max_dequeue (s->server_rx_fifo); + + /* Add event to server's event queue */ + q = app->event_queue; + + /* Based on request block (or not) for lack of space */ + if (block || PREDICT_TRUE (q->cursize < q->maxsize)) + unix_shared_memory_queue_add (app->event_queue, (u8 *) & evt, + 0 /* do wait for mutex */ ); + else + return -1; + + if (1) + { + ELOG_TYPE_DECLARE (e) = + { + .format = "evt-enqueue: id %d length %d",.format_args = "i4i4",}; + struct + { + u32 data[2]; + } *ed; + ed = ELOG_DATA (&vlib_global_main.elog_main, e); + ed->data[0] = evt.event_id; + ed->data[1] = evt.enqueue_length; + } + + return 0; +} + +/** + * Flushes queue of sessions that are to be notified of new data + * enqueued events. + * + * @param thread_index Thread index for which the flush is to be performed. + * @return 0 on success or a positive number indicating the number of + * failures due to API queue being full. + */ +int +session_manager_flush_enqueue_events (u32 thread_index) +{ + session_manager_main_t *smm = &session_manager_main; + u32 *session_indices_to_enqueue; + int i, errors = 0; + + session_indices_to_enqueue = + smm->session_indices_to_enqueue_by_thread[thread_index]; + + for (i = 0; i < vec_len (session_indices_to_enqueue); i++) + { + stream_session_t *s0; + + /* Get session */ + s0 = stream_session_get (session_indices_to_enqueue[i], thread_index); + if (stream_session_enqueue_notify (s0, 0 /* don't block */ )) + { + errors++; + } + } + + vec_reset_length (session_indices_to_enqueue); + + smm->session_indices_to_enqueue_by_thread[thread_index] = + session_indices_to_enqueue; + + /* Increment enqueue epoch for next round */ + smm->current_enqueue_epoch[thread_index]++; + + return errors; +} + +/* + * Start listening on server's ip/port pair for requested transport. + * + * Creates a 'dummy' stream session with state LISTENING to be used in session + * lookups, prior to establishing connection. Requests transport to build + * it's own specific listening connection. + */ +int +stream_session_start_listen (u32 server_index, ip46_address_t * ip, u16 port) +{ + session_manager_main_t *smm = &session_manager_main; + stream_session_t *s; + transport_connection_t *tc; + application_t *srv; + u32 tci; + + srv = application_get (server_index); + + pool_get (smm->listen_sessions[srv->session_type], s); + memset (s, 0, sizeof (*s)); + + s->session_type = srv->session_type; + s->session_state = SESSION_STATE_LISTENING; + s->session_index = s - smm->listen_sessions[srv->session_type]; + s->app_index = srv->index; + + /* Transport bind/listen */ + tci = tp_vfts[srv->session_type].bind (smm->vlib_main, s->session_index, ip, + port); + + /* Attach transport to session */ + s->connection_index = tci; + tc = tp_vfts[srv->session_type].get_listener (tci); + + srv->session_index = s->session_index; + + /* Add to the main lookup table */ + stream_session_table_add_for_tc (s->session_type, tc, s->session_index); + + return 0; +} + +void +stream_session_stop_listen (u32 server_index) +{ + session_manager_main_t *smm = &session_manager_main; + stream_session_t *listener; + transport_connection_t *tc; + application_t *srv; + + srv = application_get (server_index); + listener = pool_elt_at_index (smm->listen_sessions[srv->session_type], + srv->session_index); + + tc = tp_vfts[srv->session_type].get_listener (listener->connection_index); + stream_session_table_del_for_tc (smm, listener->session_type, tc); + + tp_vfts[srv->session_type].unbind (smm->vlib_main, + listener->connection_index); + pool_put (smm->listen_sessions[srv->session_type], listener); +} + +int +connect_server_add_segment_cb (application_t * ss, char *segment_name, + u32 segment_size) +{ + /* Does exactly nothing, but die */ + ASSERT (0); + return 0; +} + +void +connects_session_manager_init (session_manager_main_t * smm, u8 session_type) +{ + session_manager_t *sm; + u32 connect_fifo_size = 8 << 10; /* Config? */ + u32 default_segment_size = 1 << 20; + + pool_get (smm->session_managers, sm); + memset (sm, 0, sizeof (*sm)); + + sm->add_segment_size = default_segment_size; + sm->rx_fifo_size = connect_fifo_size; + sm->tx_fifo_size = connect_fifo_size; + sm->add_segment = 1; + + session_manager_add_segment (smm, sm); + smm->connect_manager_index[session_type] = sm - smm->session_managers; +} + +void +stream_session_connect_notify (transport_connection_t * tc, u8 sst, + u8 is_fail) +{ + session_manager_main_t *smm = &session_manager_main; + application_t *app; + stream_session_t *new_s = 0; + u64 value; + + value = stream_session_half_open_lookup (smm, &tc->lcl_ip, &tc->rmt_ip, + tc->lcl_port, tc->rmt_port, + tc->proto); + if (value == HALF_OPEN_LOOKUP_INVALID_VALUE) + { + clib_warning ("This can't be good!"); + return; + } + + app = application_get (value >> 32); + + if (!is_fail) + { + /* Create new session (server segments are allocated if needed) */ + if (stream_session_create_i (smm, app, tc, &new_s)) + return; + + app->session_index = stream_session_get_index (new_s); + app->thread_index = new_s->thread_index; + + /* Allocate vpp event queue for this thread if needed */ + vpp_session_event_queue_allocate (smm, tc->thread_index); + } + + /* Notify client */ + app->cb_fns.session_connected_callback (app->api_client_index, new_s, + is_fail); + + /* Cleanup session lookup */ + stream_session_half_open_table_del (smm, sst, tc); +} + +void +stream_session_accept_notify (transport_connection_t * tc) +{ + application_t *server; + stream_session_t *s; + + s = stream_session_get (tc->s_index, tc->thread_index); + server = application_get (s->app_index); + server->cb_fns.session_accept_callback (s); +} + +/** + * Notification from transport that connection is being closed. + * + * A disconnect is sent to application but state is not removed. Once + * disconnect is acknowledged by application, session disconnect is called. + * Ultimately this leads to close being called on transport (passive close). + */ +void +stream_session_disconnect_notify (transport_connection_t * tc) +{ + application_t *server; + stream_session_t *s; + + s = stream_session_get (tc->s_index, tc->thread_index); + server = application_get (s->app_index); + server->cb_fns.session_disconnect_callback (s); +} + +/** + * Cleans up session and associated app if needed. + */ +void +stream_session_delete (stream_session_t * s) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + svm_fifo_segment_private_t *fifo_segment; + application_t *app; + int rv; + + /* delete from the main lookup table */ + rv = stream_session_table_del (smm, s); + + if (rv) + clib_warning ("hash delete error, rv %d", rv); + + /* Cleanup fifo segments */ + fifo_segment = svm_fifo_get_segment (s->server_segment_index); + svm_fifo_segment_free_fifo (fifo_segment, s->server_rx_fifo); + svm_fifo_segment_free_fifo (fifo_segment, s->server_tx_fifo); + + /* Cleanup app if client */ + app = application_get (s->app_index); + if (app->mode == APP_CLIENT) + { + application_del (app); + } + else if (app->mode == APP_SERVER) + { + session_manager_t *sm; + svm_fifo_segment_private_t *fifo_segment; + svm_fifo_t **fifos; + u32 fifo_index; + + sm = session_manager_get (app->session_manager_index); + + /* Delete fifo */ + fifo_segment = svm_fifo_get_segment (s->server_segment_index); + fifos = (svm_fifo_t **) fifo_segment->h->fifos; + + fifo_index = svm_fifo_segment_index (fifo_segment); + + /* Remove segment only if it holds no fifos and not the first */ + if (sm->segment_indices[0] != fifo_index && vec_len (fifos) == 0) + svm_fifo_segment_delete (fifo_segment); + } + + pool_put (smm->sessions[s->thread_index], s); +} + +/** + * Notification from transport that connection is being deleted + * + * This should be called only on previously fully established sessions. For + * instance failed connects should call stream_session_connect_notify and + * indicate that the connect has failed. + */ +void +stream_session_delete_notify (transport_connection_t * tc) +{ + stream_session_t *s; + + s = stream_session_get_if_valid (tc->s_index, tc->thread_index); + if (!s) + { + clib_warning ("Surprised!"); + return; + } + stream_session_delete (s); +} + +/** + * Notify application that connection has been reset. + */ +void +stream_session_reset_notify (transport_connection_t * tc) +{ + stream_session_t *s; + application_t *app; + s = stream_session_get (tc->s_index, tc->thread_index); + + app = application_get (s->app_index); + app->cb_fns.session_reset_callback (s); +} + +/** + * Accept a stream session. Optionally ping the server by callback. + */ +int +stream_session_accept (transport_connection_t * tc, u32 listener_index, + u8 sst, u8 notify) +{ + session_manager_main_t *smm = &session_manager_main; + application_t *server; + stream_session_t *s, *listener; + + int rv; + + /* Find the server */ + listener = pool_elt_at_index (smm->listen_sessions[sst], listener_index); + server = application_get (listener->app_index); + + if ((rv = stream_session_create_i (smm, server, tc, &s))) + return rv; + + /* Allocate vpp event queue for this thread if needed */ + vpp_session_event_queue_allocate (smm, tc->thread_index); + + /* Shoulder-tap the server */ + if (notify) + { + server->cb_fns.session_accept_callback (s); + } + + return 0; +} + +void +stream_session_open (u8 sst, ip46_address_t * addr, u16 port_host_byte_order, + u32 app_index) +{ + transport_connection_t *tc; + u32 tci; + u64 value; + + /* Ask transport to open connection */ + tci = tp_vfts[sst].open (addr, port_host_byte_order); + + /* Get transport connection */ + tc = tp_vfts[sst].get_half_open (tci); + + /* Store api_client_index and transport connection index */ + value = (((u64) app_index) << 32) | (u64) tc->c_index; + + /* Add to the half-open lookup table */ + stream_session_half_open_table_add (sst, tc, value); +} + +/** + * Disconnect session and propagate to transport. This should eventually + * result in a delete notification that allows us to cleanup session state. + * Called for both active/passive disconnects. + */ +void +stream_session_disconnect (stream_session_t * s) +{ + tp_vfts[s->session_type].close (s->connection_index, s->thread_index); + s->session_state = SESSION_STATE_CLOSED; +} + +/** + * Cleanup transport and session state. + */ +void +stream_session_cleanup (stream_session_t * s) +{ + tp_vfts[s->session_type].cleanup (s->connection_index, s->thread_index); + stream_session_delete (s); +} + +void +session_register_transport (u8 type, const transport_proto_vft_t * vft) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + + vec_validate (tp_vfts, type); + tp_vfts[type] = *vft; + + /* If an offset function is provided, then peek instead of dequeue */ + smm->session_rx_fns[type] = + (vft->rx_fifo_offset) ? session_fifo_rx_peek : session_fifo_rx_dequeue; +} + +transport_proto_vft_t * +session_get_transport_vft (u8 type) +{ + if (type >= vec_len (tp_vfts)) + return 0; + return &tp_vfts[type]; +} + +static clib_error_t * +session_manager_main_init (vlib_main_t * vm) +{ + u32 num_threads; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + session_manager_main_t *smm = &session_manager_main; + int i; + + smm->vlib_main = vm; + smm->vnet_main = vnet_get_main (); + + num_threads = 1 /* main thread */ + vtm->n_threads; + + if (num_threads < 1) + return clib_error_return (0, "n_thread_stacks not set"); + + /* $$$ config parameters */ + svm_fifo_segment_init (0x200000000ULL /* first segment base VA */ , + 20 /* timeout in seconds */ ); + + /* configure per-thread ** vectors */ + vec_validate (smm->sessions, num_threads - 1); + vec_validate (smm->session_indices_to_enqueue_by_thread, num_threads - 1); + vec_validate (smm->tx_buffers, num_threads - 1); + vec_validate (smm->fifo_events, num_threads - 1); + vec_validate (smm->evts_partially_read, num_threads - 1); + vec_validate (smm->current_enqueue_epoch, num_threads - 1); + vec_validate (smm->vpp_event_queues, num_threads - 1); + + /* $$$$ preallocate hack config parameter */ + for (i = 0; i < 200000; i++) + { + stream_session_t *ss; + pool_get (smm->sessions[0], ss); + memset (ss, 0, sizeof (*ss)); + } + + for (i = 0; i < 200000; i++) + pool_put_index (smm->sessions[0], i); + + clib_bihash_init_16_8 (&smm->v4_session_hash, "v4 session table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + clib_bihash_init_48_8 (&smm->v6_session_hash, "v6 session table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + + clib_bihash_init_16_8 (&smm->v4_half_open_hash, "v4 half-open table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + clib_bihash_init_48_8 (&smm->v6_half_open_hash, "v6 half-open table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + + for (i = 0; i < SESSION_N_TYPES; i++) + smm->connect_manager_index[i] = INVALID_INDEX; + + return 0; +} + +VLIB_INIT_FUNCTION (session_manager_main_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h new file mode 100644 index 00000000..cf14cca9 --- /dev/null +++ b/src/vnet/session/session.h @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_session_h__ +#define __included_session_h__ + +#include +#include +#include +#include +#include + +#define HALF_OPEN_LOOKUP_INVALID_VALUE ((u64)~0) +#define INVALID_INDEX ((u32)~0) + +/* TODO decide how much since we have pre-data as well */ +#define MAX_HDRS_LEN 100 /* Max number of bytes for headers */ + +typedef enum +{ + FIFO_EVENT_SERVER_RX, + FIFO_EVENT_SERVER_TX, + FIFO_EVENT_TIMEOUT, + FIFO_EVENT_SERVER_EXIT, +} fifo_event_type_t; + +#define foreach_session_input_error \ +_(NO_SESSION, "No session drops") \ +_(NO_LISTENER, "No listener for dst port drops") \ +_(ENQUEUED, "Packets pushed into rx fifo") \ +_(NOT_READY, "Session not ready packets") \ +_(FIFO_FULL, "Packets dropped for lack of rx fifo space") \ +_(EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") \ +_(API_QUEUE_FULL, "Sessions not created for lack of API queue space") \ +_(NEW_SEG_NO_SPACE, "Created segment, couldn't allocate a fifo pair") \ +_(NO_SPACE, "Couldn't allocate a fifo pair") + +typedef enum +{ +#define _(sym,str) SESSION_ERROR_##sym, + foreach_session_input_error +#undef _ + SESSION_N_ERROR, +} session_error_t; + +/* Event queue input node static next indices */ +typedef enum +{ + SESSION_QUEUE_NEXT_DROP, + SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT, + SESSION_QUEUE_NEXT_IP4_LOOKUP, + SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT, + SESSION_QUEUE_NEXT_IP6_LOOKUP, + SESSION_QUEUE_N_NEXT, +} session_queue_next_t; + +#define foreach_session_type \ + _(IP4_TCP, ip4_tcp) \ + _(IP4_UDP, ip4_udp) \ + _(IP6_TCP, ip6_tcp) \ + _(IP6_UDP, ip6_udp) + +typedef enum +{ +#define _(A, a) SESSION_TYPE_##A, + foreach_session_type +#undef _ + SESSION_N_TYPES, +} session_type_t; + +/* + * Application session state + */ +typedef enum +{ + SESSION_STATE_LISTENING, + SESSION_STATE_CONNECTING, + SESSION_STATE_READY, + SESSION_STATE_CLOSED, + SESSION_STATE_N_STATES, +} stream_session_state_t; + +typedef CLIB_PACKED (struct + { + svm_fifo_t * fifo; + u8 event_type; + /* $$$$ for event logging */ + u16 event_id; + u32 enqueue_length; + }) session_fifo_event_t; + +typedef struct _stream_session_t +{ + /** Type */ + u8 session_type; + + /** State */ + u8 session_state; + + /** Session index in per_thread pool */ + u32 session_index; + + /** Transport specific */ + u32 connection_index; + + u8 thread_index; + + /** Application specific */ + u32 pid; + + /** fifo pointers. Once allocated, these do not move */ + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; + + /** To avoid n**2 "one event per frame" check */ + u8 enqueue_epoch; + + /** used during unbind processing */ + u8 is_deleted; + + /** stream server pool index */ + u32 app_index; + + /** svm segment index */ + u32 server_segment_index; +} stream_session_t; + +typedef struct _session_manager +{ + /** segments mapped by this server */ + u32 *segment_indices; + + /** Session fifo sizes. They are provided for binds and take default + * values for connects */ + u32 rx_fifo_size; + u32 tx_fifo_size; + + /** Configured additional segment size */ + u32 add_segment_size; + + /** Flag that indicates if additional segments should be created */ + u8 add_segment; +} session_manager_t; + +/* Forward definition */ +typedef struct _session_manager_main session_manager_main_t; + +typedef int + (session_fifo_rx_fn) (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, stream_session_t * s0, + u32 thread_index, int *n_tx_pkts); + +extern session_fifo_rx_fn session_fifo_rx_peek; +extern session_fifo_rx_fn session_fifo_rx_dequeue; + +struct _session_manager_main +{ + /** Lookup tables for established sessions and listeners */ + clib_bihash_16_8_t v4_session_hash; + clib_bihash_48_8_t v6_session_hash; + + /** Lookup tables for half-open sessions */ + clib_bihash_16_8_t v4_half_open_hash; + clib_bihash_48_8_t v6_half_open_hash; + + /** Per worker thread session pools */ + stream_session_t **sessions; + + /** Pool of listen sessions. Same type as stream sessions to ease lookups */ + stream_session_t *listen_sessions[SESSION_N_TYPES]; + + /** Sparse vector to map dst port to stream server */ + u16 *stream_server_by_dst_port[SESSION_N_TYPES]; + + /** per-worker enqueue epoch counters */ + u8 *current_enqueue_epoch; + + /** Per-worker thread vector of sessions to enqueue */ + u32 **session_indices_to_enqueue_by_thread; + + /** per-worker tx buffer free lists */ + u32 **tx_buffers; + + /** Per worker-thread vector of partially read events */ + session_fifo_event_t **evts_partially_read; + + /** per-worker active event vectors */ + session_fifo_event_t **fifo_events; + + /** vpp fifo event queue */ + unix_shared_memory_queue_t **vpp_event_queues; + + /** Unique segment name counter */ + u32 unique_segment_name_counter; + + /* Connection manager used by incoming connects */ + u32 connect_manager_index[SESSION_N_TYPES]; + + session_manager_t *session_managers; + + /** Per transport rx function that can either dequeue or peek */ + session_fifo_rx_fn *session_rx_fns[SESSION_N_TYPES]; + + /* Convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +}; + +extern session_manager_main_t session_manager_main; + +/* + * Session manager function + */ +always_inline session_manager_main_t * +vnet_get_session_manager_main () +{ + return &session_manager_main; +} + +always_inline session_manager_t * +session_manager_get (u32 index) +{ + return pool_elt_at_index (session_manager_main.session_managers, index); +} + +always_inline unix_shared_memory_queue_t * +session_manager_get_vpp_event_queue (u32 thread_index) +{ + return session_manager_main.vpp_event_queues[thread_index]; +} + +always_inline session_manager_t * +connects_session_manager_get (session_manager_main_t * smm, + session_type_t session_type) +{ + return pool_elt_at_index (smm->session_managers, + smm->connect_manager_index[session_type]); +} + +void session_manager_get_segment_info (u32 index, u8 ** name, u32 * size); +int session_manager_flush_enqueue_events (u32 thread_index); +int +session_manager_add_first_segment (session_manager_main_t * smm, + session_manager_t * sm, u32 segment_size, + u8 ** segment_name); +void +session_manager_del (session_manager_main_t * smm, session_manager_t * sm); +void +connects_session_manager_init (session_manager_main_t * smm, u8 session_type); + +/* + * Stream session functions + */ + +stream_session_t *stream_session_lookup_listener4 (ip4_address_t * lcl, + u16 lcl_port, u8 proto); +stream_session_t *stream_session_lookup4 (ip4_address_t * lcl, + ip4_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto, + u32 thread_index); +stream_session_t *stream_session_lookup_listener6 (ip6_address_t * lcl, + u16 lcl_port, u8 proto); +stream_session_t *stream_session_lookup6 (ip6_address_t * lcl, + ip6_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8, u32 thread_index); +transport_connection_t + * stream_session_lookup_transport4 (session_manager_main_t * smm, + ip4_address_t * lcl, + ip4_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto, + u32 thread_index); +transport_connection_t + * stream_session_lookup_transport6 (session_manager_main_t * smm, + ip6_address_t * lcl, + ip6_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto, + u32 thread_index); +stream_session_t *stream_session_lookup_listener (ip46_address_t * lcl, + u16 lcl_port, u8 proto); + +always_inline stream_session_t * +stream_session_get_tsi (u64 ti_and_si, u32 thread_index) +{ + ASSERT ((u32) (ti_and_si >> 32) == thread_index); + return pool_elt_at_index (session_manager_main.sessions[thread_index], + ti_and_si & 0xFFFFFFFFULL); +} + +always_inline stream_session_t * +stream_session_get (u64 si, u32 thread_index) +{ + return pool_elt_at_index (session_manager_main.sessions[thread_index], si); +} + +always_inline stream_session_t * +stream_session_get_if_valid (u64 si, u32 thread_index) +{ + if (thread_index >= vec_len (session_manager_main.sessions)) + return 0; + + if (pool_is_free_index (session_manager_main.sessions[thread_index], si)) + return 0; + + return pool_elt_at_index (session_manager_main.sessions[thread_index], si); +} + +always_inline stream_session_t * +stream_session_listener_get (u8 sst, u64 si) +{ + return pool_elt_at_index (session_manager_main.listen_sessions[sst], si); +} + +always_inline u32 +stream_session_get_index (stream_session_t * s) +{ + if (s->session_state == SESSION_STATE_LISTENING) + return s - session_manager_main.listen_sessions[s->session_type]; + + return s - session_manager_main.sessions[s->thread_index]; +} + +always_inline u32 +stream_session_max_enqueue (transport_connection_t * tc) +{ + stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index); + return svm_fifo_max_enqueue (s->server_rx_fifo); +} + +int +stream_session_enqueue_data (transport_connection_t * tc, u8 * data, u16 len, + u8 queue_event); +u32 +stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer, + u32 offset, u32 max_bytes); +u32 stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes); + +void +stream_session_connect_notify (transport_connection_t * tc, u8 sst, + u8 is_fail); +void stream_session_accept_notify (transport_connection_t * tc); +void stream_session_disconnect_notify (transport_connection_t * tc); +void stream_session_delete_notify (transport_connection_t * tc); +void stream_session_reset_notify (transport_connection_t * tc); +int +stream_session_accept (transport_connection_t * tc, u32 listener_index, + u8 sst, u8 notify); +void stream_session_open (u8 sst, ip46_address_t * addr, + u16 port_host_byte_order, u32 api_client_index); +void stream_session_disconnect (stream_session_t * s); +void stream_session_cleanup (stream_session_t * s); +int +stream_session_start_listen (u32 server_index, ip46_address_t * ip, u16 port); +void stream_session_stop_listen (u32 server_index); + +u8 *format_stream_session (u8 * s, va_list * args); + +void session_register_transport (u8 type, const transport_proto_vft_t * vft); +transport_proto_vft_t *session_get_transport_vft (u8 type); + +#endif /* __included_session_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c new file mode 100644 index 00000000..9d068684 --- /dev/null +++ b/src/vnet/session/session_api.c @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include "application_interface.h" + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_session_api_msg \ +_(MAP_ANOTHER_SEGMENT_REPLY, map_another_segment_reply) \ +_(BIND_URI, bind_uri) \ +_(UNBIND_URI, unbind_uri) \ +_(CONNECT_URI, connect_uri) \ +_(DISCONNECT_SESSION, disconnect_session) \ +_(DISCONNECT_SESSION_REPLY, disconnect_session_reply) \ +_(ACCEPT_SESSION_REPLY, accept_session_reply) \ +_(RESET_SESSION_REPLY, reset_session_reply) \ +_(BIND_SOCK, bind_sock) \ +_(UNBIND_SOCK, unbind_sock) \ +_(CONNECT_SOCK, connect_sock) \ +_(DISCONNECT_SOCK, disconnect_sock) \ +_(DISCONNECT_SOCK_REPLY, disconnect_sock_reply) \ +_(ACCEPT_SOCK_REPLY, accept_sock_reply) \ +_(RESET_SOCK_REPLY, reset_sock_reply) \ + +static int +send_add_segment_callback (u32 api_client_index, const u8 * segment_name, + u32 segment_size) +{ + vl_api_map_another_segment_t *mp; + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (api_client_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_MAP_ANOTHER_SEGMENT); + mp->segment_size = segment_size; + strncpy ((char *) mp->segment_name, (char *) segment_name, + sizeof (mp->segment_name) - 1); + + vl_msg_api_send_shmem (q, (u8 *) & mp); + + return 0; +} + +static int +send_session_accept_uri_callback (stream_session_t * s) +{ + vl_api_accept_session_t *mp; + unix_shared_memory_queue_t *q, *vpp_queue; + application_t *server = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (server->api_client_index); + vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_ACCEPT_SESSION); + + /* Note: session_type is the first octet in all types of sessions */ + + mp->accept_cookie = server->accept_cookie; + mp->server_rx_fifo = (u64) s->server_rx_fifo; + mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->session_thread_index = s->thread_index; + mp->session_index = s->session_index; + mp->session_type = s->session_type; + mp->vpp_event_queue_address = (u64) vpp_queue; + vl_msg_api_send_shmem (q, (u8 *) & mp); + + return 0; +} + +static void +send_session_disconnect_uri_callback (stream_session_t * s) +{ + vl_api_disconnect_session_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_DISCONNECT_SESSION); + + mp->session_thread_index = s->thread_index; + mp->session_index = s->session_index; + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static int +send_session_connected_uri_callback (u32 api_client_index, + stream_session_t * s, u8 is_fail) +{ + vl_api_connect_uri_reply_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_lookup (api_client_index); + u8 *seg_name; + unix_shared_memory_queue_t *vpp_queue; + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_URI_REPLY); + mp->context = app->api_context; + mp->retval = is_fail; + if (!is_fail) + { + vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); + mp->server_rx_fifo = (u64) s->server_rx_fifo; + mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->session_thread_index = s->thread_index; + mp->session_index = s->session_index; + mp->session_type = s->session_type; + mp->vpp_event_queue_address = (u64) vpp_queue; + mp->client_event_queue_address = (u64) app->event_queue; + + session_manager_get_segment_info (s->server_segment_index, &seg_name, + &mp->segment_size); + mp->segment_name_length = vec_len (seg_name); + if (mp->segment_name_length) + clib_memcpy (mp->segment_name, seg_name, mp->segment_name_length); + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); + + /* Remove client if connect failed */ + if (is_fail) + application_del (app); + + return 0; +} + +/** + * Redirect a connect_uri message to the indicated server. + * Only sent if the server has bound the related port with + * URI_OPTIONS_FLAGS_USE_FIFO + */ +static int +redirect_connect_uri_callback (u32 server_api_client_index, void *mp_arg) +{ + vl_api_connect_uri_t *mp = mp_arg; + unix_shared_memory_queue_t *server_q, *client_q; + vlib_main_t *vm = vlib_get_main (); + f64 timeout = vlib_time_now (vm) + 0.5; + int rv = 0; + + server_q = vl_api_client_index_to_input_queue (server_api_client_index); + + if (!server_q) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + client_q = vl_api_client_index_to_input_queue (mp->client_index); + if (!client_q) + { + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto out; + } + + /* Tell the server the client's API queue address, so it can reply */ + mp->client_queue_address = (u64) client_q; + + /* + * Bounce message handlers MUST NOT block the data-plane. + * Spin waiting for the queue lock, but + */ + + while (vlib_time_now (vm) < timeout) + { + rv = + unix_shared_memory_queue_add (server_q, (u8 *) & mp, 1 /*nowait */ ); + switch (rv) + { + /* correctly enqueued */ + case 0: + return VNET_CONNECT_REDIRECTED; + + /* continue spinning, wait for pthread_mutex_trylock to work */ + case -1: + continue; + + /* queue stuffed, drop the msg */ + case -2: + rv = VNET_API_ERROR_QUEUE_FULL; + goto out; + } + } +out: + /* Dispose of the message */ + vl_msg_api_free (mp); + return rv; +} + +static u64 +make_session_handle (stream_session_t * s) +{ + return (u64) s->session_index << 32 | (u64) s->thread_index; +} + +static int +send_session_accept_callback (stream_session_t * s) +{ + vl_api_accept_sock_t *mp; + unix_shared_memory_queue_t *q, *vpp_queue; + application_t *server = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (server->api_client_index); + vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_ACCEPT_SOCK); + + /* Note: session_type is the first octet in all types of sessions */ + + mp->accept_cookie = server->accept_cookie; + mp->server_rx_fifo = (u64) s->server_rx_fifo; + mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->handle = make_session_handle (s); + mp->vpp_event_queue_address = (u64) vpp_queue; + vl_msg_api_send_shmem (q, (u8 *) & mp); + + return 0; +} + +static int +send_session_connected_callback (u32 api_client_index, stream_session_t * s, + u8 is_fail) +{ + vl_api_connect_sock_reply_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_lookup (api_client_index); + u8 *seg_name; + unix_shared_memory_queue_t *vpp_queue; + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_SOCK_REPLY); + mp->context = app->api_context; + mp->retval = is_fail; + if (!is_fail) + { + vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); + mp->server_rx_fifo = (u64) s->server_rx_fifo; + mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->handle = make_session_handle (s); + mp->vpp_event_queue_address = (u64) vpp_queue; + mp->client_event_queue_address = (u64) app->event_queue; + + session_manager_get_segment_info (s->server_segment_index, &seg_name, + &mp->segment_size); + mp->segment_name_length = vec_len (seg_name); + if (mp->segment_name_length) + clib_memcpy (mp->segment_name, seg_name, mp->segment_name_length); + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); + + /* Remove client if connect failed */ + if (is_fail) + application_del (app); + + return 0; +} + +static void +send_session_disconnect_callback (stream_session_t * s) +{ + vl_api_disconnect_sock_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_DISCONNECT_SOCK); + + mp->handle = make_session_handle (s); + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +/** + * Redirect a connect_uri message to the indicated server. + * Only sent if the server has bound the related port with + * URI_OPTIONS_FLAGS_USE_FIFO + */ +static int +redirect_connect_callback (u32 server_api_client_index, void *mp_arg) +{ + vl_api_connect_sock_t *mp = mp_arg; + unix_shared_memory_queue_t *server_q, *client_q; + vlib_main_t *vm = vlib_get_main (); + f64 timeout = vlib_time_now (vm) + 0.5; + int rv = 0; + + server_q = vl_api_client_index_to_input_queue (server_api_client_index); + + if (!server_q) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + client_q = vl_api_client_index_to_input_queue (mp->client_index); + if (!client_q) + { + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto out; + } + + /* Tell the server the client's API queue address, so it can reply */ + mp->client_queue_address = (u64) client_q; + + /* + * Bounce message handlers MUST NOT block the data-plane. + * Spin waiting for the queue lock, but + */ + + while (vlib_time_now (vm) < timeout) + { + rv = + unix_shared_memory_queue_add (server_q, (u8 *) & mp, 1 /*nowait */ ); + switch (rv) + { + /* correctly enqueued */ + case 0: + return VNET_CONNECT_REDIRECTED; + + /* continue spinning, wait for pthread_mutex_trylock to work */ + case -1: + continue; + + /* queue stuffed, drop the msg */ + case -2: + rv = VNET_API_ERROR_QUEUE_FULL; + goto out; + } + } +out: + /* Dispose of the message */ + vl_msg_api_free (mp); + return rv; +} + +static session_cb_vft_t uri_session_cb_vft = { + .session_accept_callback = send_session_accept_uri_callback, + .session_disconnect_callback = send_session_disconnect_uri_callback, + .session_connected_callback = send_session_connected_uri_callback, + .add_segment_callback = send_add_segment_callback, + .redirect_connect_callback = redirect_connect_uri_callback +}; + +static session_cb_vft_t session_cb_vft = { + .session_accept_callback = send_session_accept_callback, + .session_disconnect_callback = send_session_disconnect_callback, + .session_connected_callback = send_session_connected_callback, + .add_segment_callback = send_add_segment_callback, + .redirect_connect_callback = redirect_connect_callback +}; + +static int +api_session_not_valid (u32 session_index, u32 thread_index) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + stream_session_t *pool; + + if (thread_index >= vec_len (smm->sessions)) + return VNET_API_ERROR_INVALID_VALUE; + + pool = smm->sessions[thread_index]; + + if (pool_is_free_index (pool, session_index)) + return VNET_API_ERROR_INVALID_VALUE_2; + + return 0; +} + +static void +vl_api_bind_uri_t_handler (vl_api_bind_uri_t * mp) +{ + vl_api_bind_uri_reply_t *rmp; + vnet_bind_args_t _a, *a = &_a; + char segment_name[128]; + u32 segment_name_length; + int rv; + + _Static_assert (sizeof (u64) * SESSION_OPTIONS_N_OPTIONS <= + sizeof (mp->options), + "Out of options, fix api message definition"); + + segment_name_length = ARRAY_LEN (segment_name); + + memset (a, 0, sizeof (*a)); + + a->uri = (char *) mp->uri; + a->api_client_index = mp->client_index; + a->options = mp->options; + a->segment_name = segment_name; + a->segment_name_length = segment_name_length; + a->session_cb_vft = &uri_session_cb_vft; + + a->options[SESSION_OPTIONS_SEGMENT_SIZE] = mp->initial_segment_size; + a->options[SESSION_OPTIONS_ACCEPT_COOKIE] = mp->accept_cookie; + rv = vnet_bind_uri (a); + + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_BIND_URI_REPLY, ({ + rmp->retval = rv; + if (!rv) + { + rmp->segment_name_length = 0; + /* $$$$ policy? */ + rmp->segment_size = mp->initial_segment_size; + if (segment_name_length) + { + memcpy (rmp->segment_name, segment_name, segment_name_length); + rmp->segment_name_length = segment_name_length; + } + rmp->server_event_queue_address = a->server_event_queue_address; + } + })); + /* *INDENT-ON* */ + +} + +static void +vl_api_unbind_uri_t_handler (vl_api_unbind_uri_t * mp) +{ + vl_api_unbind_uri_reply_t *rmp; + int rv; + + rv = vnet_unbind_uri ((char *) mp->uri, mp->client_index); + + REPLY_MACRO (VL_API_UNBIND_URI_REPLY); +} + +static void +vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp) +{ + vnet_connect_args_t _a, *a = &_a; + + a->uri = (char *) mp->uri; + a->api_client_index = mp->client_index; + a->api_context = mp->context; + a->options = mp->options; + a->session_cb_vft = &uri_session_cb_vft; + a->mp = mp; + vnet_connect_uri (a); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + vl_api_disconnect_session_reply_t *rmp; + int rv; + + rv = api_session_not_valid (mp->session_index, mp->session_thread_index); + if (!rv) + rv = vnet_disconnect_session (mp->client_index, mp->session_index, + mp->session_thread_index); + + REPLY_MACRO (VL_API_DISCONNECT_SESSION_REPLY); +} + +static void +vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t * + mp) +{ + if (api_session_not_valid (mp->session_index, mp->session_thread_index)) + { + clib_warning ("Invalid session!"); + return; + } + + /* Client objected to disconnecting the session, log and continue */ + if (mp->retval) + { + clib_warning ("client retval %d", mp->retval); + return; + } + + /* Disconnect has been confirmed. Confirm close to transport */ + vnet_disconnect_session (mp->client_index, mp->session_index, + mp->session_thread_index); +} + +static void +vl_api_reset_session_reply_t_handler (vl_api_reset_session_reply_t * mp) +{ + stream_session_t *s; + + if (api_session_not_valid (mp->session_index, mp->session_thread_index)) + { + clib_warning ("Invalid session!"); + return; + } + + /* Client objected to resetting the session, log and continue */ + if (mp->retval) + { + clib_warning ("client retval %d", mp->retval); + return; + } + + s = stream_session_get (mp->session_index, mp->session_thread_index); + + /* This comes as a response to a reset, transport only waiting for + * confirmation to remove connection state, no need to disconnect */ + stream_session_cleanup (s); +} + +static void +vl_api_accept_session_reply_t_handler (vl_api_accept_session_reply_t * mp) +{ + stream_session_t *s; + int rv; + + if (api_session_not_valid (mp->session_index, mp->session_thread_index)) + return; + + s = stream_session_get (mp->session_index, mp->session_thread_index); + rv = mp->retval; + + if (rv) + { + /* Server isn't interested, kill the session */ + stream_session_disconnect (s); + return; + } + + s->session_state = SESSION_STATE_READY; +} + +static void +vl_api_map_another_segment_reply_t_handler (vl_api_map_another_segment_reply_t + * mp) +{ + clib_warning ("not implemented"); +} + +static void +vl_api_bind_sock_t_handler (vl_api_bind_sock_t * mp) +{ + vl_api_bind_sock_reply_t *rmp; + vnet_bind_args_t _a, *a = &_a; + char segment_name[128]; + u32 segment_name_length; + int rv; + + STATIC_ASSERT (sizeof (u64) * SESSION_OPTIONS_N_OPTIONS <= + sizeof (mp->options), + "Out of options, fix api message definition"); + + segment_name_length = ARRAY_LEN (segment_name); + + memset (a, 0, sizeof (*a)); + + clib_memcpy (&a->tep.ip, mp->ip, + (mp->is_ip4 ? sizeof (ip4_address_t) : + sizeof (ip6_address_t))); + a->tep.is_ip4 = mp->is_ip4; + a->tep.port = mp->port; + a->tep.vrf = mp->vrf; + + a->api_client_index = mp->client_index; + a->options = mp->options; + a->segment_name = segment_name; + a->segment_name_length = segment_name_length; + a->session_cb_vft = &session_cb_vft; + + rv = vnet_bind_uri (a); + + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_BIND_SOCK_REPLY, ({ + rmp->retval = rv; + if (!rv) + { + rmp->segment_name_length = 0; + rmp->segment_size = mp->options[SESSION_OPTIONS_SEGMENT_SIZE]; + if (segment_name_length) + { + memcpy(rmp->segment_name, segment_name, segment_name_length); + rmp->segment_name_length = segment_name_length; + } + rmp->server_event_queue_address = a->server_event_queue_address; + } + })); + /* *INDENT-ON* */ +} + +static void +vl_api_unbind_sock_t_handler (vl_api_unbind_sock_t * mp) +{ + vl_api_unbind_sock_reply_t *rmp; + vnet_unbind_args_t _a, *a = &_a; + int rv; + + a->api_client_index = mp->client_index; + a->handle = mp->handle; + + rv = vnet_unbind (a); + + REPLY_MACRO (VL_API_UNBIND_SOCK_REPLY); +} + +static void +vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp) +{ + vnet_connect_args_t _a, *a = &_a; + + clib_memcpy (&a->tep.ip, mp->ip, + (mp->is_ip4 ? sizeof (ip4_address_t) : + sizeof (ip6_address_t))); + a->tep.is_ip4 = mp->is_ip4; + a->tep.port = mp->port; + a->tep.vrf = mp->vrf; + a->options = mp->options; + a->session_cb_vft = &session_cb_vft; + a->api_context = mp->context; + a->mp = mp; + + vnet_connect (a); +} + +static void +vl_api_disconnect_sock_t_handler (vl_api_disconnect_sock_t * mp) +{ + vnet_disconnect_args_t _a, *a = &_a; + vl_api_disconnect_sock_reply_t *rmp; + int rv; + + a->api_client_index = mp->client_index; + a->handle = mp->handle; + rv = vnet_disconnect (a); + + REPLY_MACRO (VL_API_DISCONNECT_SOCK_REPLY); +} + +static void +vl_api_disconnect_sock_reply_t_handler (vl_api_disconnect_sock_reply_t * mp) +{ + vnet_disconnect_args_t _a, *a = &_a; + + /* Client objected to disconnecting the session, log and continue */ + if (mp->retval) + { + clib_warning ("client retval %d", mp->retval); + return; + } + + a->api_client_index = mp->client_index; + a->handle = mp->handle; + + vnet_disconnect (a); +} + +static void +vl_api_reset_sock_reply_t_handler (vl_api_reset_sock_reply_t * mp) +{ + stream_session_t *s; + u32 session_index, thread_index; + + /* Client objected to resetting the session, log and continue */ + if (mp->retval) + { + clib_warning ("client retval %d", mp->retval); + return; + } + + if (api_parse_session_handle (mp->handle, &session_index, &thread_index)) + { + clib_warning ("Invalid handle"); + return; + } + + s = stream_session_get (session_index, thread_index); + + /* This comes as a response to a reset, transport only waiting for + * confirmation to remove connection state, no need to disconnect */ + stream_session_cleanup (s); +} + +static void +vl_api_accept_sock_reply_t_handler (vl_api_accept_sock_reply_t * mp) +{ + stream_session_t *s; + u32 session_index, thread_index; + + if (api_parse_session_handle (mp->handle, &session_index, &thread_index)) + { + clib_warning ("Invalid handle"); + return; + } + s = stream_session_get (session_index, thread_index); + + if (mp->retval) + { + /* Server isn't interested, kill the session */ + stream_session_disconnect (s); + return; + } + + s->session_state = SESSION_STATE_READY; +} + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_session; +#undef _ +} + +/* + * session_api_hookup + * Add uri's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../open-repo/vlib/memclnt_vlib.c:memclnt_process() + */ +static clib_error_t * +session_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_session_api_msg; +#undef _ + + /* + * Messages which bounce off the data-plane to + * an API client. Simply tells the message handling infra not + * to free the message. + * + * Bounced message handlers MUST NOT block the data plane + */ + am->message_bounce[VL_API_CONNECT_URI] = 1; + am->message_bounce[VL_API_CONNECT_SOCK] = 1; + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (session_api_hookup); +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c new file mode 100644 index 00000000..b2943a1c --- /dev/null +++ b/src/vnet/session/session_cli.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +/** + * Format stream session as per the following format + * + * verbose: + * "Connection", "Rx fifo", "Tx fifo", "Session Index" + * non-verbose: + * "Connection" + */ +u8 * +format_stream_session (u8 * s, va_list * args) +{ + stream_session_t *ss = va_arg (*args, stream_session_t *); + int verbose = va_arg (*args, int); + transport_proto_vft_t *tp_vft; + u8 *str = 0; + + tp_vft = session_get_transport_vft (ss->session_type); + + if (verbose) + str = format (0, "%-20llp%-20llp%-15lld", ss->server_rx_fifo, + ss->server_tx_fifo, stream_session_get_index (ss)); + + if (ss->session_state == SESSION_STATE_READY) + { + s = format (s, "%-40U%v", tp_vft->format_connection, + ss->connection_index, ss->thread_index, str); + } + else if (ss->session_state == SESSION_STATE_LISTENING) + { + s = format (s, "%-40U%v", tp_vft->format_listener, ss->connection_index, + str); + } + else if (ss->session_state == SESSION_STATE_READY) + { + s = + format (s, "%-40U%v", tp_vft->format_half_open, ss->connection_index, + str); + } + else if (ss->session_state == SESSION_STATE_CLOSED) + { + s = format (s, "[CL] %-40U%v", tp_vft->format_connection, + ss->connection_index, ss->thread_index, str); + } + else + { + clib_warning ("Session in unknown state!"); + } + + vec_free (str); + + return s; +} + +static clib_error_t * +show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + session_manager_main_t *smm = &session_manager_main; + int verbose = 0, i; + stream_session_t *pool; + stream_session_t *s; + u8 *str = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + else + break; + } + + for (i = 0; i < vec_len (smm->sessions); i++) + { + u32 once_per_pool; + pool = smm->sessions[i]; + + once_per_pool = 1; + + if (pool_elts (pool)) + { + + vlib_cli_output (vm, "Thread %d: %d active sessions", + i, pool_elts (pool)); + if (verbose) + { + if (once_per_pool) + { + str = format (str, "%-40s%-20s%-20s%-15s", + "Connection", "Rx fifo", "Tx fifo", + "Session Index"); + vlib_cli_output (vm, "%v", str); + vec_reset_length (str); + once_per_pool = 0; + } + + /* *INDENT-OFF* */ + pool_foreach (s, pool, + ({ + vlib_cli_output (vm, "%U", format_stream_session, s, verbose); + })); + /* *INDENT-ON* */ + } + } + else + vlib_cli_output (vm, "Thread %d: no active sessions", i); + } + vec_free (str); + + return 0; +} + +VLIB_CLI_COMMAND (show_uri_command, static) = +{ +.path = "show session",.short_help = "show session [verbose]",.function = + show_session_command_fn,}; + + +static clib_error_t * +clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + session_manager_main_t *smm = &session_manager_main; + u32 thread_index = 0; + u32 session_index = ~0; + stream_session_t *pool, *session; + application_t *server; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "thread %d", &thread_index)) + ; + else if (unformat (input, "session %d", &session_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (session_index == ~0) + return clib_error_return (0, "session required, but not set."); + + if (thread_index > vec_len (smm->sessions)) + return clib_error_return (0, "thread %d out of range [0-%d]", + thread_index, vec_len (smm->sessions)); + + pool = smm->sessions[thread_index]; + + if (pool_is_free_index (pool, session_index)) + return clib_error_return (0, "session %d not active", session_index); + + session = pool_elt_at_index (pool, session_index); + server = application_get (session->app_index); + + /* Disconnect both app and transport */ + server->cb_fns.session_disconnect_callback (session); + + return 0; +} + +VLIB_CLI_COMMAND (clear_uri_session_command, static) = +{ +.path = "clear session",.short_help = + "clear session thread session ",.function = + clear_session_command_fn,}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c new file mode 100644 index 00000000..abd94ba4 --- /dev/null +++ b/src/vnet/session/transport.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +u32 +transport_endpoint_lookup (transport_endpoint_table_t *ht, ip46_address_t *ip, + u16 port) +{ + clib_bihash_kv_24_8_t kv; + int rv; + + kv.key[0] = ip->as_u64[0]; + kv.key[1] = ip->as_u64[1]; + kv.key[2] = port; + + rv = clib_bihash_search_inline_24_8 (ht, &kv); + if (rv == 0) + return kv.value; + + return TRANSPORT_ENDPOINT_INVALID_INDEX; +} + +void +transport_endpoint_table_add (transport_endpoint_table_t *ht, + transport_endpoint_t *te, u32 value) +{ + clib_bihash_kv_24_8_t kv; + + kv.key[0] = te->ip.as_u64[0]; + kv.key[1] = te->ip.as_u64[1]; + kv.key[2] = te->port; + kv.value = value; + + clib_bihash_add_del_24_8 (ht, &kv, 1); +} + +void +transport_endpoint_table_del (transport_endpoint_table_t *ht, + transport_endpoint_t *te) +{ + clib_bihash_kv_24_8_t kv; + + kv.key[0] = te->ip.as_u64[0]; + kv.key[1] = te->ip.as_u64[1]; + kv.key[2] = te->port; + + clib_bihash_add_del_24_8 (ht, &kv, 0); +} + + + diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h new file mode 100644 index 00000000..2d4415ba --- /dev/null +++ b/src/vnet/session/transport.h @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef VNET_VNET_URI_TRANSPORT_H_ +#define VNET_VNET_URI_TRANSPORT_H_ + +#include +#include +#include +#include + +/* + * Protocol independent transport properties associated to a session + */ +typedef struct _transport_connection +{ + ip46_address_t rmt_ip; /**< Remote IP */ + ip46_address_t lcl_ip; /**< Local IP */ + u16 lcl_port; /**< Local port */ + u16 rmt_port; /**< Remote port */ + u8 proto; /**< Transport protocol id */ + + u32 s_index; /**< Parent session index */ + u32 c_index; /**< Connection index in transport pool */ + u8 is_ip4; /**< Flag if IP4 connection */ + u32 thread_index; /**< Worker-thread index */ + + /** Macros for 'derived classes' where base is named "connection" */ +#define c_lcl_ip connection.lcl_ip +#define c_rmt_ip connection.rmt_ip +#define c_lcl_ip4 connection.lcl_ip.ip4 +#define c_rmt_ip4 connection.rmt_ip.ip4 +#define c_lcl_ip6 connection.lcl_ip.ip6 +#define c_rmt_ip6 connection.rmt_ip.ip6 +#define c_lcl_port connection.lcl_port +#define c_rmt_port connection.rmt_port +#define c_proto connection.proto +#define c_state connection.state +#define c_s_index connection.s_index +#define c_c_index connection.c_index +#define c_is_ip4 connection.is_ip4 +#define c_thread_index connection.thread_index +} transport_connection_t; + +/* + * Transport protocol virtual function table + */ +typedef struct _transport_proto_vft +{ + /* + * Setup + */ + u32 (*bind) (vlib_main_t *, u32, ip46_address_t *, u16); + u32 (*unbind) (vlib_main_t *, u32); + int (*open) (ip46_address_t * addr, u16 port_host_byte_order); + void (*close) (u32 conn_index, u32 thread_index); + void (*cleanup) (u32 conn_index, u32 thread_index); + + /* + * Transmission + */ + u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b); + u16 (*send_mss) (transport_connection_t * tc); + u32 (*send_space) (transport_connection_t * tc); + u32 (*rx_fifo_offset) (transport_connection_t * tc); + + /* + * Connection retrieval + */ + transport_connection_t *(*get_connection) (u32 conn_idx, u32 thread_idx); + transport_connection_t *(*get_listener) (u32 conn_index); + transport_connection_t *(*get_half_open) (u32 conn_index); + + /* + * Format + */ + u8 *(*format_connection) (u8 * s, va_list * args); + u8 *(*format_listener) (u8 * s, va_list * args); + u8 *(*format_half_open) (u8 * s, va_list * args); + +} transport_proto_vft_t; + +/* 16 octets */ +typedef CLIB_PACKED (struct + { + union + { + struct + { + ip4_address_t src; ip4_address_t dst; + u16 src_port; + u16 dst_port; + /* align by making this 4 octets even though its a 1-bit field + * NOTE: avoid key overlap with other transports that use 5 tuples for + * session identification. + */ + u32 proto; + }; + u64 as_u64[2]; + }; + }) v4_connection_key_t; + +typedef CLIB_PACKED (struct + { + union + { + struct + { + /* 48 octets */ + ip6_address_t src; ip6_address_t dst; + u16 src_port; + u16 dst_port; u32 proto; u8 unused_for_now[8]; + }; u64 as_u64[6]; + }; + }) v6_connection_key_t; + +typedef clib_bihash_kv_16_8_t session_kv4_t; +typedef clib_bihash_kv_48_8_t session_kv6_t; + +always_inline void +make_v4_ss_kv (session_kv4_t * kv, ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + v4_connection_key_t key; + memset (&key, 0, sizeof (v4_connection_key_t)); + + key.src.as_u32 = lcl->as_u32; + key.dst.as_u32 = rmt->as_u32; + key.src_port = lcl_port; + key.dst_port = rmt_port; + key.proto = proto; + + kv->key[0] = key.as_u64[0]; + kv->key[1] = key.as_u64[1]; + kv->value = ~0ULL; +} + +always_inline void +make_v4_listener_kv (session_kv4_t * kv, ip4_address_t * lcl, u16 lcl_port, + u8 proto) +{ + v4_connection_key_t key; + memset (&key, 0, sizeof (v4_connection_key_t)); + + key.src.as_u32 = lcl->as_u32; + key.dst.as_u32 = 0; + key.src_port = lcl_port; + key.dst_port = 0; + key.proto = proto; + + kv->key[0] = key.as_u64[0]; + kv->key[1] = key.as_u64[1]; + kv->value = ~0ULL; +} + +always_inline void +make_v4_ss_kv_from_tc (session_kv4_t * kv, transport_connection_t * t) +{ + return make_v4_ss_kv (kv, &t->lcl_ip.ip4, &t->rmt_ip.ip4, t->lcl_port, + t->rmt_port, t->proto); +} + +always_inline void +make_v6_ss_kv (session_kv6_t * kv, ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + v6_connection_key_t key; + memset (&key, 0, sizeof (v6_connection_key_t)); + + key.src.as_u64[0] = lcl->as_u64[0]; + key.src.as_u64[1] = lcl->as_u64[1]; + key.dst.as_u64[0] = rmt->as_u64[0]; + key.dst.as_u64[1] = rmt->as_u64[1]; + key.src_port = lcl_port; + key.dst_port = rmt_port; + key.proto = proto; + + kv->key[0] = key.as_u64[0]; + kv->key[1] = key.as_u64[1]; + kv->value = ~0ULL; +} + +always_inline void +make_v6_listener_kv (session_kv6_t * kv, ip6_address_t * lcl, u16 lcl_port, + u8 proto) +{ + v6_connection_key_t key; + memset (&key, 0, sizeof (v6_connection_key_t)); + + key.src.as_u64[0] = lcl->as_u64[0]; + key.src.as_u64[1] = lcl->as_u64[1]; + key.dst.as_u64[0] = 0; + key.dst.as_u64[1] = 0; + key.src_port = lcl_port; + key.dst_port = 0; + key.proto = proto; + + kv->key[0] = key.as_u64[0]; + kv->key[1] = key.as_u64[1]; + kv->value = ~0ULL; +} + +always_inline void +make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * t) +{ + make_v6_ss_kv (kv, &t->lcl_ip.ip6, &t->rmt_ip.ip6, t->lcl_port, + t->rmt_port, t->proto); +} + +typedef struct _transport_endpoint +{ + ip46_address_t ip; + u16 port; + u8 is_ip4; + u32 vrf; +} transport_endpoint_t; + +typedef clib_bihash_24_8_t transport_endpoint_table_t; + +#define TRANSPORT_ENDPOINT_INVALID_INDEX ((u32)~0) + +u32 +transport_endpoint_lookup (transport_endpoint_table_t * ht, + ip46_address_t * ip, u16 port); +void transport_endpoint_table_add (transport_endpoint_table_t * ht, + transport_endpoint_t * te, u32 value); +void transport_endpoint_table_del (transport_endpoint_table_t * ht, + transport_endpoint_t * te); + +#endif /* VNET_VNET_URI_TRANSPORT_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c new file mode 100644 index 00000000..0f9b7097 --- /dev/null +++ b/src/vnet/tcp/tcp.c @@ -0,0 +1,708 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +tcp_main_t tcp_main; + +static u32 +tcp_connection_bind (vlib_main_t * vm, u32 session_index, ip46_address_t * ip, + u16 port_host_byte_order, u8 is_ip4) +{ + tcp_main_t *tm = &tcp_main; + tcp_connection_t *listener; + + pool_get (tm->listener_pool, listener); + memset (listener, 0, sizeof (*listener)); + + listener->c_c_index = listener - tm->listener_pool; + listener->c_lcl_port = clib_host_to_net_u16 (port_host_byte_order); + + if (is_ip4) + listener->c_lcl_ip4.as_u32 = ip->ip4.as_u32; + else + clib_memcpy (&listener->c_lcl_ip6, &ip->ip6, sizeof (ip6_address_t)); + + listener->c_s_index = session_index; + listener->c_proto = SESSION_TYPE_IP4_TCP; + listener->state = TCP_STATE_LISTEN; + listener->c_is_ip4 = 1; + + return listener->c_c_index; +} + +u32 +tcp_session_bind_ip4 (vlib_main_t * vm, u32 session_index, + ip46_address_t * ip, u16 port_host_byte_order) +{ + return tcp_connection_bind (vm, session_index, ip, port_host_byte_order, 1); +} + +u32 +tcp_session_bind_ip6 (vlib_main_t * vm, u32 session_index, + ip46_address_t * ip, u16 port_host_byte_order) +{ + return tcp_connection_bind (vm, session_index, ip, port_host_byte_order, 0); + +} + +static void +tcp_session_unbind (u32 listener_index) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + pool_put_index (tm->listener_pool, listener_index); +} + +u32 +tcp_session_unbind_ip4 (vlib_main_t * vm, u32 listener_index) +{ + tcp_session_unbind (listener_index); + return 0; +} + +u32 +tcp_session_unbind_ip6 (vlib_main_t * vm, u32 listener_index) +{ + tcp_session_unbind (listener_index); + return 0; +} + +transport_connection_t * +tcp_session_get_listener (u32 listener_index) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + tcp_connection_t *tc; + tc = pool_elt_at_index (tm->listener_pool, listener_index); + return &tc->connection; +} + +/** + * Cleans up connection state. + * + * No notifications. + */ +void +tcp_connection_cleanup (tcp_connection_t * tc) +{ + tcp_main_t *tm = &tcp_main; + u32 tepi; + transport_endpoint_t *tep; + + /* Cleanup local endpoint if this was an active connect */ + tepi = transport_endpoint_lookup (&tm->local_endpoints_table, &tc->c_lcl_ip, + tc->c_lcl_port); + + /*XXX lock */ + if (tepi != TRANSPORT_ENDPOINT_INVALID_INDEX) + { + tep = pool_elt_at_index (tm->local_endpoints, tepi); + transport_endpoint_table_del (&tm->local_endpoints_table, tep); + pool_put (tm->local_endpoints, tep); + } + + /* Make sure all timers are cleared */ + tcp_connection_timers_reset (tc); + + /* Check if half-open */ + if (tc->state == TCP_STATE_SYN_SENT) + pool_put (tm->half_open_connections, tc); + else + pool_put (tm->connections[tc->c_thread_index], tc); +} + +/** + * Connection removal. + * + * This should be called only once connection enters CLOSED state. Note + * that it notifies the session of the removal event, so if the goal is to + * just remove the connection, call tcp_connection_cleanup instead. + */ +void +tcp_connection_del (tcp_connection_t * tc) +{ + stream_session_delete_notify (&tc->connection); + tcp_connection_cleanup (tc); +} + +/** + * Begin connection closing procedure. + * + * If at the end the connection is not in CLOSED state, it is not removed. + * Instead, we rely on on TCP to advance through state machine to either + * 1) LAST_ACK (passive close) whereby when the last ACK is received + * tcp_connection_del is called. This notifies session of the delete and + * calls cleanup. + * 2) TIME_WAIT (active close) whereby after 2MSL the 2MSL timer triggers + * and cleanup is called. + */ +void +tcp_connection_close (tcp_connection_t * tc) +{ + /* Send FIN if needed */ + if (tc->state == TCP_STATE_ESTABLISHED || tc->state == TCP_STATE_SYN_RCVD + || tc->state == TCP_STATE_CLOSE_WAIT) + tcp_send_fin (tc); + + /* Switch state */ + if (tc->state == TCP_STATE_ESTABLISHED || tc->state == TCP_STATE_SYN_RCVD) + tc->state = TCP_STATE_FIN_WAIT_1; + else if (tc->state == TCP_STATE_SYN_SENT) + tc->state = TCP_STATE_CLOSED; + else if (tc->state == TCP_STATE_CLOSE_WAIT) + tc->state = TCP_STATE_LAST_ACK; + + /* Half-close connections are not supported XXX */ + + if (tc->state == TCP_STATE_CLOSED) + tcp_connection_del (tc); +} + +void +tcp_session_close (u32 conn_index, u32 thread_index) +{ + tcp_connection_t *tc; + tc = tcp_connection_get (conn_index, thread_index); + tcp_connection_close (tc); +} + +void +tcp_session_cleanup (u32 conn_index, u32 thread_index) +{ + tcp_connection_t *tc; + tc = tcp_connection_get (conn_index, thread_index); + tcp_connection_cleanup (tc); +} + +void * +ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4) +{ + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_interface_address_t *ia = 0; + + if (is_ip4) + { + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ , + ({ + return ip_interface_address_get_address (lm4, ia); + })); + /* *INDENT-ON* */ + } + else + { + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ , + ({ + return ip_interface_address_get_address (lm6, ia); + })); + /* *INDENT-ON* */ + } + + return 0; +} + +/** + * Allocate local port and add if successful add entry to local endpoint + * table to mark the pair as used. + */ +u16 +tcp_allocate_local_port (tcp_main_t * tm, ip46_address_t * ip) +{ + u8 unique = 0; + transport_endpoint_t *tep; + u32 time_now, tei; + u16 min = 1024, max = 65535, tries; /* XXX configurable ? */ + + tries = max - min; + time_now = tcp_time_now (); + + /* Start at random point or max */ + pool_get (tm->local_endpoints, tep); + clib_memcpy (&tep->ip, ip, sizeof (*ip)); + tep->port = random_u32 (&time_now) << 16; + tep->port = tep->port < min ? max : tep->port; + + /* Search for first free slot */ + while (tries) + { + tei = transport_endpoint_lookup (&tm->local_endpoints_table, &tep->ip, + tep->port); + if (tei == TRANSPORT_ENDPOINT_INVALID_INDEX) + { + unique = 1; + break; + } + + tep->port--; + + if (tep->port < min) + tep->port = max; + + tries--; + } + + if (unique) + { + transport_endpoint_table_add (&tm->local_endpoints_table, tep, + tep - tm->local_endpoints); + + return tep->port; + } + + /* Failed */ + pool_put (tm->local_endpoints, tep); + return -1; +} + +/** + * Initialize all connection timers as invalid + */ +void +tcp_connection_timers_init (tcp_connection_t * tc) +{ + int i; + + /* Set all to invalid */ + for (i = 0; i < TCP_N_TIMERS; i++) + { + tc->timers[i] = TCP_TIMER_HANDLE_INVALID; + } + + tc->rto = TCP_RTO_INIT; +} + +/** + * Stop all connection timers + */ +void +tcp_connection_timers_reset (tcp_connection_t * tc) +{ + int i; + for (i = 0; i < TCP_N_TIMERS; i++) + { + tcp_timer_reset (tc, i); + } +} + +/** Initialize tcp connection variables + * + * Should be called after having received a msg from the peer, i.e., a SYN or + * a SYNACK, such that connection options have already been exchanged. */ +void +tcp_connection_init_vars (tcp_connection_t * tc) +{ + tcp_connection_timers_init (tc); + tcp_set_snd_mss (tc); + tc->sack_sb.head = TCP_INVALID_SACK_HOLE_INDEX; + tcp_cc_init (tc); +} + +int +tcp_connection_open (ip46_address_t * rmt_addr, u16 rmt_port, u8 is_ip4) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + tcp_connection_t *tc; + fib_prefix_t prefix; + u32 fei, sw_if_index; + ip46_address_t lcl_addr; + u16 lcl_port; + + /* + * Find the local address and allocate port + */ + memset (&lcl_addr, 0, sizeof (lcl_addr)); + + /* Find a FIB path to the destination */ + clib_memcpy (&prefix.fp_addr, rmt_addr, sizeof (*rmt_addr)); + prefix.fp_proto = is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; + prefix.fp_len = is_ip4 ? 32 : 128; + + fei = fib_table_lookup (0, &prefix); + + /* Couldn't find route to destination. Bail out. */ + if (fei == FIB_NODE_INDEX_INVALID) + return -1; + + sw_if_index = fib_entry_get_resolving_interface (fei); + + if (sw_if_index == (u32) ~ 0) + return -1; + + if (is_ip4) + { + ip4_address_t *ip4; + ip4 = ip_interface_get_first_ip (sw_if_index, 1); + lcl_addr.ip4.as_u32 = ip4->as_u32; + } + else + { + ip6_address_t *ip6; + ip6 = ip_interface_get_first_ip (sw_if_index, 0); + clib_memcpy (&lcl_addr.ip6, ip6, sizeof (*ip6)); + } + + /* Allocate source port */ + lcl_port = tcp_allocate_local_port (tm, &lcl_addr); + if (lcl_port < 1) + return -1; + + /* + * Create connection and send SYN + */ + + pool_get (tm->half_open_connections, tc); + memset (tc, 0, sizeof (*tc)); + + clib_memcpy (&tc->c_rmt_ip, rmt_addr, sizeof (ip46_address_t)); + clib_memcpy (&tc->c_lcl_ip, &lcl_addr, sizeof (ip46_address_t)); + tc->c_rmt_port = clib_host_to_net_u16 (rmt_port); + tc->c_lcl_port = clib_host_to_net_u16 (lcl_port); + tc->c_c_index = tc - tm->half_open_connections; + tc->c_is_ip4 = is_ip4; + + /* The other connection vars will be initialized after SYN ACK */ + tcp_connection_timers_init (tc); + + tcp_send_syn (tc); + + tc->state = TCP_STATE_SYN_SENT; + + return tc->c_c_index; +} + +int +tcp_session_open_ip4 (ip46_address_t * addr, u16 port) +{ + return tcp_connection_open (addr, port, 1); +} + +int +tcp_session_open_ip6 (ip46_address_t * addr, u16 port) +{ + return tcp_connection_open (addr, port, 0); +} + +u8 * +format_tcp_session_ip4 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + tcp_connection_t *tc; + + tc = tcp_connection_get (tci, thread_index); + + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip4_address, + &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip4_address, &tc->c_rmt_ip4, + clib_net_to_host_u16 (tc->c_rmt_port)); + + return s; +} + +u8 * +format_tcp_session_ip6 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + tcp_connection_t *tc = tcp_connection_get (tci, thread_index); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_tcp_listener_session_ip4 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + tcp_connection_t *tc = tcp_listener_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip4_address, + &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip4_address, &tc->c_rmt_ip4, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_tcp_listener_session_ip6 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + tcp_connection_t *tc = tcp_listener_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_tcp_half_open_session_ip4 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + tcp_connection_t *tc = tcp_half_open_connection_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip4_address, + &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip4_address, &tc->c_rmt_ip4, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_tcp_half_open_session_ip6 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + tcp_connection_t *tc = tcp_half_open_connection_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +transport_connection_t * +tcp_session_get_transport (u32 conn_index, u32 thread_index) +{ + tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index); + return &tc->connection; +} + +transport_connection_t * +tcp_half_open_session_get_transport (u32 conn_index) +{ + tcp_connection_t *tc = tcp_half_open_connection_get (conn_index); + return &tc->connection; +} + +u16 +tcp_session_send_mss (transport_connection_t * trans_conn) +{ + tcp_connection_t *tc = (tcp_connection_t *) trans_conn; + return tc->snd_mss; +} + +u32 +tcp_session_send_space (transport_connection_t * trans_conn) +{ + tcp_connection_t *tc = (tcp_connection_t *) trans_conn; + return tcp_available_snd_space (tc); +} + +u32 +tcp_session_rx_fifo_offset (transport_connection_t * trans_conn) +{ + tcp_connection_t *tc = (tcp_connection_t *) trans_conn; + return (tc->snd_una_max - tc->snd_una); +} + +/* *INDENT-OFF* */ +const static transport_proto_vft_t tcp4_proto = { + .bind = tcp_session_bind_ip4, + .unbind = tcp_session_unbind_ip4, + .push_header = tcp_push_header, + .get_connection = tcp_session_get_transport, + .get_listener = tcp_session_get_listener, + .get_half_open = tcp_half_open_session_get_transport, + .open = tcp_session_open_ip4, + .close = tcp_session_close, + .cleanup = tcp_session_cleanup, + .send_mss = tcp_session_send_mss, + .send_space = tcp_session_send_space, + .rx_fifo_offset = tcp_session_rx_fifo_offset, + .format_connection = format_tcp_session_ip4, + .format_listener = format_tcp_listener_session_ip4, + .format_half_open = format_tcp_half_open_session_ip4 +}; + +const static transport_proto_vft_t tcp6_proto = { + .bind = tcp_session_bind_ip6, + .unbind = tcp_session_unbind_ip6, + .push_header = tcp_push_header, + .get_connection = tcp_session_get_transport, + .get_listener = tcp_session_get_listener, + .get_half_open = tcp_half_open_session_get_transport, + .open = tcp_session_open_ip6, + .close = tcp_session_close, + .cleanup = tcp_session_cleanup, + .send_mss = tcp_session_send_mss, + .send_space = tcp_session_send_space, + .rx_fifo_offset = tcp_session_rx_fifo_offset, + .format_connection = format_tcp_session_ip6, + .format_listener = format_tcp_listener_session_ip6, + .format_half_open = format_tcp_half_open_session_ip6 +}; +/* *INDENT-ON* */ + +void +tcp_timer_keep_handler (u32 conn_index) +{ + u32 cpu_index = os_get_cpu_number (); + tcp_connection_t *tc; + + tc = tcp_connection_get (conn_index, cpu_index); + tc->timers[TCP_TIMER_KEEP] = TCP_TIMER_HANDLE_INVALID; + + tcp_connection_close (tc); +} + +void +tcp_timer_establish_handler (u32 conn_index) +{ + tcp_connection_t *tc; + u8 sst; + + tc = tcp_half_open_connection_get (conn_index); + tc->timers[TCP_TIMER_ESTABLISH] = TCP_TIMER_HANDLE_INVALID; + + ASSERT (tc->state == TCP_STATE_SYN_SENT); + + sst = tc->c_is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP; + stream_session_connect_notify (&tc->connection, sst, 1 /* fail */ ); + + tcp_connection_cleanup (tc); +} + +void +tcp_timer_2msl_handler (u32 conn_index) +{ + u32 cpu_index = os_get_cpu_number (); + tcp_connection_t *tc; + + tc = tcp_connection_get (conn_index, cpu_index); + tc->timers[TCP_TIMER_2MSL] = TCP_TIMER_HANDLE_INVALID; + + tcp_connection_del (tc); +} + +/* *INDENT-OFF* */ +static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] = +{ + tcp_timer_retransmit_handler, + tcp_timer_delack_handler, + 0, + tcp_timer_keep_handler, + tcp_timer_2msl_handler, + tcp_timer_retransmit_syn_handler, + tcp_timer_establish_handler +}; +/* *INDENT-ON* */ + +static void +tcp_expired_timers_dispatch (u32 * expired_timers) +{ + int i; + u32 connection_index, timer_id; + + for (i = 0; i < vec_len (expired_timers); i++) + { + /* Get session index and timer id */ + connection_index = expired_timers[i] & 0x0FFFFFFF; + timer_id = expired_timers[i] >> 28; + + /* Handle expiration */ + (*timer_expiration_handlers[timer_id]) (connection_index); + } +} + +void +tcp_initialize_timer_wheels (tcp_main_t * tm) +{ + tw_timer_wheel_16t_2w_512sl_t *tw; + vec_foreach (tw, tm->timer_wheels) + { + tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch, + 100e-3 /* timer period 100ms */ , ~0); + tw->last_run_time = vlib_time_now (tm->vlib_main); + } +} + +clib_error_t * +tcp_init (vlib_main_t * vm) +{ + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi; + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_thread_main_t *vtm = vlib_get_thread_main (); + clib_error_t *error = 0; + u32 num_threads; + + tm->vlib_main = vm; + tm->vnet_main = vnet_get_main (); + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + if ((error = vlib_call_init_function (vm, ip4_lookup_init))) + return error; + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + /* + * Registrations + */ + + /* Register with IP */ + pi = ip_get_protocol_info (im, IP_PROTOCOL_TCP); + if (pi == 0) + return clib_error_return (0, "TCP protocol info AWOL"); + pi->format_header = format_tcp_header; + pi->unformat_pg_edit = unformat_pg_tcp_header; + + ip4_register_protocol (IP_PROTOCOL_TCP, tcp4_input_node.index); + + /* Register as transport with URI */ + session_register_transport (SESSION_TYPE_IP4_TCP, &tcp4_proto); + session_register_transport (SESSION_TYPE_IP6_TCP, &tcp6_proto); + + /* + * Initialize data structures + */ + + num_threads = 1 /* main thread */ + vtm->n_threads; + vec_validate (tm->connections, num_threads - 1); + + /* Initialize per worker thread tx buffers (used for control messages) */ + vec_validate (tm->tx_buffers, num_threads - 1); + + /* Initialize timer wheels */ + vec_validate (tm->timer_wheels, num_threads - 1); + tcp_initialize_timer_wheels (tm); + + vec_validate (tm->delack_connections, num_threads - 1); + + /* Initialize clocks per tick for TCP timestamp. Used to compute + * monotonically increasing timestamps. */ + tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock + / TCP_TSTAMP_RESOLUTION; + + clib_bihash_init_24_8 (&tm->local_endpoints_table, "local endpoint table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + + return error; +} + +VLIB_INIT_FUNCTION (tcp_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h new file mode 100644 index 00000000..22f00a63 --- /dev/null +++ b/src/vnet/tcp/tcp.h @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _vnet_tcp_h_ +#define _vnet_tcp_h_ + +#include +#include +#include +#include +#include +#include + +#define TCP_TICK 10e-3 /**< TCP tick period (s) */ +#define THZ 1/TCP_TICK /**< TCP tick frequency */ +#define TCP_TSTAMP_RESOLUTION TCP_TICK /**< Time stamp resolution */ +#define TCP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ /**< 24 days */ +#define TCP_MAX_OPTION_SPACE 40 + +#define TCP_DUPACK_THRESHOLD 3 +#define TCP_DEFAULT_RX_FIFO_SIZE 64 << 10 + +/** TCP FSM state definitions as per RFC793. */ +#define foreach_tcp_fsm_state \ + _(CLOSED, "CLOSED") \ + _(LISTEN, "LISTEN") \ + _(SYN_SENT, "SYN_SENT") \ + _(SYN_RCVD, "SYN_RCVD") \ + _(ESTABLISHED, "ESTABLISHED") \ + _(CLOSE_WAIT, "CLOSE_WAIT") \ + _(FIN_WAIT_1, "FIN_WAIT_1") \ + _(LAST_ACK, "LAST_ACK") \ + _(CLOSING, "CLOSING") \ + _(FIN_WAIT_2, "FIN_WAIT_2") \ + _(TIME_WAIT, "TIME_WAIT") + +typedef enum _tcp_state +{ +#define _(sym, str) TCP_STATE_##sym, + foreach_tcp_fsm_state +#undef _ + TCP_N_STATES +} tcp_state_t; + +format_function_t format_tcp_state; + +/** TCP timers */ +#define foreach_tcp_timer \ + _(RETRANSMIT, "RETRANSMIT") \ + _(DELACK, "DELAYED ACK") \ + _(PERSIST, "PERSIST") \ + _(KEEP, "KEEP") \ + _(2MSL, "2MSL") \ + _(RETRANSMIT_SYN, "RETRANSMIT_SYN") \ + _(ESTABLISH, "ESTABLISH") + +typedef enum _tcp_timers +{ +#define _(sym, str) TCP_TIMER_##sym, + foreach_tcp_timer +#undef _ + TCP_N_TIMERS +} tcp_timers_e; + +typedef void (timer_expiration_handler) (u32 index); + +extern timer_expiration_handler tcp_timer_delack_handler; +extern timer_expiration_handler tcp_timer_retransmit_handler; +extern timer_expiration_handler tcp_timer_retransmit_syn_handler; + +#define TCP_TIMER_HANDLE_INVALID ((u32) ~0) + +/* Timer delays as multiples of 100ms */ +#define TCP_TO_TIMER_TICK TCP_TICK*10 /* Period for converting from TCP + * ticks to timer units */ +#define TCP_DELACK_TIME 1 /* 0.1s */ +#define TCP_ESTABLISH_TIME 750 /* 75s */ +#define TCP_2MSL_TIME 300 /* 30s */ + +#define TCP_RTO_MAX 60 * THZ /* Min max RTO (60s) as per RFC6298 */ +#define TCP_RTT_MAX 30 * THZ /* 30s (probably too much) */ +#define TCP_RTO_SYN_RETRIES 3 /* SYN retries without doubling RTO */ +#define TCP_RTO_INIT 1 * THZ /* Initial retransmit timer */ + +void tcp_update_time (f64 now, u32 thread_index); + +/** TCP connection flags */ +#define foreach_tcp_connection_flag \ + _(DELACK, "Delay ACK") \ + _(SNDACK, "Send ACK") \ + _(BURSTACK, "Burst ACK set") \ + _(SENT_RCV_WND0, "Sent 0 receive window") \ + _(RECOVERY, "Recovery on") \ + _(FAST_RECOVERY, "Fast Recovery on") + +typedef enum _tcp_connection_flag_bits +{ +#define _(sym, str) TCP_CONN_##sym##_BIT, + foreach_tcp_connection_flag +#undef _ + TCP_CONN_N_FLAG_BITS +} tcp_connection_flag_bits_e; + +typedef enum _tcp_connection_flag +{ +#define _(sym, str) TCP_CONN_##sym = 1 << TCP_CONN_##sym##_BIT, + foreach_tcp_connection_flag +#undef _ + TCP_CONN_N_FLAGS +} tcp_connection_flags_e; + +/** TCP buffer flags */ +#define foreach_tcp_buf_flag \ + _ (ACK) /**< Sending ACK. */ \ + _ (DUPACK) /**< Sending DUPACK. */ \ + +enum +{ +#define _(f) TCP_BUF_BIT_##f, + foreach_tcp_buf_flag +#undef _ + TCP_N_BUF_BITS, +}; + +enum +{ +#define _(f) TCP_BUF_FLAG_##f = 1 << TCP_BUF_BIT_##f, + foreach_tcp_buf_flag +#undef _ +}; + +#define TCP_MAX_SACK_BLOCKS 5 /**< Max number of SACK blocks stored */ +#define TCP_INVALID_SACK_HOLE_INDEX ((u32)~0) + +typedef struct _sack_scoreboard_hole +{ + u32 next; /**< Index for next entry in linked list */ + u32 prev; /**< Index for previous entry in linked list */ + u32 start; /**< Start sequence number */ + u32 end; /**< End sequence number */ +} sack_scoreboard_hole_t; + +typedef struct _sack_scoreboard +{ + sack_scoreboard_hole_t *holes; /**< Pool of holes */ + u32 head; /**< Index to first entry */ + u32 sacked_bytes; /**< Number of bytes sacked in sb */ +} sack_scoreboard_t; + +typedef enum _tcp_cc_algorithm_type +{ + TCP_CC_NEWRENO, +} tcp_cc_algorithm_type_e; + +typedef struct _tcp_cc_algorithm tcp_cc_algorithm_t; + +typedef enum _tcp_cc_ack_t +{ + TCP_CC_ACK, + TCP_CC_DUPACK, + TCP_CC_PARTIALACK +} tcp_cc_ack_t; + +typedef struct _tcp_connection +{ + transport_connection_t connection; /**< Common transport data. First! */ + + u8 state; /**< TCP state as per tcp_state_t */ + u16 flags; /**< Connection flags (see tcp_conn_flags_e) */ + u32 timers[TCP_N_TIMERS]; /**< Timer handles into timer wheel */ + + /* TODO RFC4898 */ + + /** Send sequence variables RFC793 */ + u32 snd_una; /**< oldest unacknowledged sequence number */ + u32 snd_una_max; /**< newest unacknowledged sequence number + 1*/ + u32 snd_wnd; /**< send window */ + u32 snd_wl1; /**< seq number used for last snd.wnd update */ + u32 snd_wl2; /**< ack number used for last snd.wnd update */ + u32 snd_nxt; /**< next seq number to be sent */ + + /** Receive sequence variables RFC793 */ + u32 rcv_nxt; /**< next sequence number expected */ + u32 rcv_wnd; /**< receive window we expect */ + + u32 rcv_las; /**< rcv_nxt at last ack sent/rcv_wnd update */ + u32 iss; /**< initial sent sequence */ + u32 irs; /**< initial remote sequence */ + + /* Options */ + tcp_options_t opt; /**< TCP connection options parsed */ + u8 rcv_wscale; /**< Window scale to advertise to peer */ + u8 snd_wscale; /**< Window scale to use when sending */ + u32 tsval_recent; /**< Last timestamp received */ + u32 tsval_recent_age; /**< When last updated tstamp_recent*/ + + sack_block_t *snd_sacks; /**< Vector of SACKs to send. XXX Fixed size? */ + sack_scoreboard_t sack_sb; /**< SACK "scoreboard" that tracks holes */ + + u8 rcv_dupacks; /**< Number of DUPACKs received */ + u8 snt_dupacks; /**< Number of DUPACKs sent in a burst */ + + /* Congestion control */ + u32 cwnd; /**< Congestion window */ + u32 ssthresh; /**< Slow-start threshold */ + u32 prev_ssthresh; /**< ssthresh before congestion */ + u32 bytes_acked; /**< Bytes acknowledged by current segment */ + u32 rtx_bytes; /**< Retransmitted bytes */ + u32 tsecr_last_ack; /**< Timestamp echoed to us in last health ACK */ + tcp_cc_algorithm_t *cc_algo; /**< Congestion control algorithm */ + + /* RTT and RTO */ + u32 rto; /**< Retransmission timeout */ + u32 rto_boff; /**< Index for RTO backoff */ + u32 srtt; /**< Smoothed RTT */ + u32 rttvar; /**< Smoothed mean RTT difference. Approximates variance */ + u32 rtt_ts; /**< Timestamp for tracked ACK */ + u32 rtt_seq; /**< Sequence number for tracked ACK */ + + u16 snd_mss; /**< Send MSS */ +} tcp_connection_t; + +struct _tcp_cc_algorithm +{ + void (*rcv_ack) (tcp_connection_t * tc); + void (*rcv_cong_ack) (tcp_connection_t * tc, tcp_cc_ack_t ack); + void (*congestion) (tcp_connection_t * tc); + void (*recovered) (tcp_connection_t * tc); + void (*init) (tcp_connection_t * tc); +}; + +#define tcp_fastrecovery_on(tc) (tc)->flags |= TCP_CONN_FAST_RECOVERY +#define tcp_fastrecovery_off(tc) (tc)->flags &= ~TCP_CONN_FAST_RECOVERY +#define tcp_in_fastrecovery(tc) ((tc)->flags & TCP_CONN_FAST_RECOVERY) +#define tcp_in_recovery(tc) ((tc)->flags & (TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY)) +#define tcp_recovery_off(tc) ((tc)->flags &= ~(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY)) +#define tcp_in_slowstart(tc) (tc->cwnd < tc->ssthresh) + +typedef enum +{ + TCP_IP4, + TCP_IP6, + TCP_N_AF, +} tcp_af_t; + +typedef enum _tcp_error +{ +#define tcp_error(n,s) TCP_ERROR_##n, +#include +#undef tcp_error + TCP_N_ERROR, +} tcp_error_t; + +typedef struct _tcp_lookup_dispatch +{ + u8 next, error; +} tcp_lookup_dispatch_t; + +typedef struct _tcp_main +{ + /* Per-worker thread tcp connection pools */ + tcp_connection_t **connections; + + /* Pool of listeners. */ + tcp_connection_t *listener_pool; + + /** Dispatch table by state and flags */ + tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64]; + + u8 log2_tstamp_clocks_per_tick; + f64 tstamp_ticks_per_clock; + + /** per-worker tx buffer free lists */ + u32 **tx_buffers; + + /* Per worker-thread timer wheel for connections timers */ + tw_timer_wheel_16t_2w_512sl_t *timer_wheels; + + /* Convenience per worker-thread vector of connections to DELACK */ + u32 **delack_connections; + + /* Pool of half-open connections on which we've sent a SYN */ + tcp_connection_t *half_open_connections; + + /* Pool of local TCP endpoints */ + transport_endpoint_t *local_endpoints; + + /* Local endpoints lookup table */ + transport_endpoint_table_t local_endpoints_table; + + /* Congestion control algorithms registered */ + tcp_cc_algorithm_t *cc_algos; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + ip4_main_t *ip4_main; + ip6_main_t *ip6_main; +} tcp_main_t; + +extern tcp_main_t tcp_main; +extern vlib_node_registration_t tcp4_input_node; +extern vlib_node_registration_t tcp6_input_node; +extern vlib_node_registration_t tcp4_output_node; +extern vlib_node_registration_t tcp6_output_node; + +always_inline tcp_main_t * +vnet_get_tcp_main () +{ + return &tcp_main; +} + +always_inline tcp_connection_t * +tcp_connection_get (u32 conn_index, u32 thread_index) +{ + return pool_elt_at_index (tcp_main.connections[thread_index], conn_index); +} + +always_inline tcp_connection_t * +tcp_connection_get_if_valid (u32 conn_index, u32 thread_index) +{ + if (tcp_main.connections[thread_index] == 0) + return 0; + if (pool_is_free_index (tcp_main.connections[thread_index], conn_index)) + return 0; + return pool_elt_at_index (tcp_main.connections[thread_index], conn_index); +} + +void tcp_connection_close (tcp_connection_t * tc); +void tcp_connection_cleanup (tcp_connection_t * tc); +void tcp_connection_del (tcp_connection_t * tc); + +always_inline tcp_connection_t * +tcp_listener_get (u32 tli) +{ + return pool_elt_at_index (tcp_main.listener_pool, tli); +} + +always_inline tcp_connection_t * +tcp_half_open_connection_get (u32 conn_index) +{ + return pool_elt_at_index (tcp_main.half_open_connections, conn_index); +} + +void tcp_make_ack (tcp_connection_t * ts, vlib_buffer_t * b); +void tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b); +void tcp_make_synack (tcp_connection_t * ts, vlib_buffer_t * b); +void tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4); +void tcp_send_syn (tcp_connection_t * tc); +void tcp_send_fin (tcp_connection_t * tc); +void tcp_set_snd_mss (tcp_connection_t * tc); + +always_inline u32 +tcp_end_seq (tcp_header_t * th, u32 len) +{ + return th->seq_number + tcp_is_syn (th) + tcp_is_fin (th) + len; +} + +/* Modulo arithmetic for TCP sequence numbers */ +#define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0) +#define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0) +#define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0) +#define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0) + +/* Modulo arithmetic for timestamps */ +#define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0) +#define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0) + +always_inline u32 +tcp_flight_size (const tcp_connection_t * tc) +{ + return tc->snd_una_max - tc->snd_una - tc->sack_sb.sacked_bytes + + tc->rtx_bytes; +} + +/** + * Initial cwnd as per RFC5681 + */ +always_inline u32 +tcp_initial_cwnd (const tcp_connection_t * tc) +{ + if (tc->snd_mss > 2190) + return 2 * tc->snd_mss; + else if (tc->snd_mss > 1095) + return 3 * tc->snd_mss; + else + return 4 * tc->snd_mss; +} + +always_inline u32 +tcp_loss_wnd (const tcp_connection_t * tc) +{ + return tc->snd_mss; +} + +always_inline u32 +tcp_available_wnd (const tcp_connection_t * tc) +{ + return clib_min (tc->cwnd, tc->snd_wnd); +} + +always_inline u32 +tcp_available_snd_space (const tcp_connection_t * tc) +{ + u32 available_wnd = tcp_available_wnd (tc); + u32 flight_size = tcp_flight_size (tc); + + if (available_wnd <= flight_size) + return 0; + + return available_wnd - flight_size; +} + +void tcp_retransmit_first_unacked (tcp_connection_t * tc); + +void tcp_fast_retransmit (tcp_connection_t * tc); + +always_inline u32 +tcp_time_now (void) +{ + return clib_cpu_time_now () * tcp_main.tstamp_ticks_per_clock; +} + +u32 tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b); + +u32 +tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b, + u32 max_bytes); + +void tcp_connection_timers_init (tcp_connection_t * tc); +void tcp_connection_timers_reset (tcp_connection_t * tc); + +void tcp_connection_init_vars (tcp_connection_t * tc); + +always_inline void +tcp_connection_force_ack (tcp_connection_t * tc, vlib_buffer_t * b) +{ + /* Reset flags, make sure ack is sent */ + tc->flags = TCP_CONN_SNDACK; + vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK; +} + +always_inline void +tcp_timer_set (tcp_connection_t * tc, u8 timer_id, u32 interval) +{ + tc->timers[timer_id] + = tw_timer_start_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], + tc->c_c_index, timer_id, interval); +} + +always_inline void +tcp_retransmit_timer_set (tcp_main_t * tm, tcp_connection_t * tc) +{ + /* XXX Switch to faster TW */ + tcp_timer_set (tc, TCP_TIMER_RETRANSMIT, + clib_max (tc->rto * TCP_TO_TIMER_TICK, 1)); +} + +always_inline void +tcp_timer_reset (tcp_connection_t * tc, u8 timer_id) +{ + if (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID) + return; + + tw_timer_stop_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], + tc->timers[timer_id]); + tc->timers[timer_id] = TCP_TIMER_HANDLE_INVALID; +} + +always_inline void +tcp_timer_update (tcp_connection_t * tc, u8 timer_id, u32 interval) +{ + if (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID) + tw_timer_stop_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], + tc->timers[timer_id]); + tc->timers[timer_id] = + tw_timer_start_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], + tc->c_c_index, timer_id, interval); +} + +always_inline u8 +tcp_timer_is_active (tcp_connection_t * tc, tcp_timers_e timer) +{ + return tc->timers[timer] != TCP_TIMER_HANDLE_INVALID; +} + +void +scoreboard_remove_hole (sack_scoreboard_t * sb, + sack_scoreboard_hole_t * hole); + +always_inline sack_scoreboard_hole_t * +scoreboard_next_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole) +{ + if (hole->next != TCP_INVALID_SACK_HOLE_INDEX) + return pool_elt_at_index (sb->holes, hole->next); + return 0; +} + +always_inline sack_scoreboard_hole_t * +scoreboard_first_hole (sack_scoreboard_t * sb) +{ + if (sb->head != TCP_INVALID_SACK_HOLE_INDEX) + return pool_elt_at_index (sb->holes, sb->head); + return 0; +} + +always_inline void +scoreboard_clear (sack_scoreboard_t * sb) +{ + sack_scoreboard_hole_t *hole = scoreboard_first_hole (sb); + while ((hole = scoreboard_first_hole (sb))) + { + scoreboard_remove_hole (sb, hole); + } +} + +always_inline u32 +scoreboard_hole_bytes (sack_scoreboard_hole_t * hole) +{ + return hole->end - hole->start; +} + +always_inline void +tcp_cc_algo_register (tcp_cc_algorithm_type_e type, + const tcp_cc_algorithm_t * vft) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vec_validate (tm->cc_algos, type); + + tm->cc_algos[type] = *vft; +} + +always_inline tcp_cc_algorithm_t * +tcp_cc_algo_get (tcp_cc_algorithm_type_e type) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + return &tm->cc_algos[type]; +} + +void tcp_cc_init (tcp_connection_t * tc); + +/** + * Push TCP header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param sp_net - source port net order + * @param dp_net - destination port net order + * @param seq - sequence number net order + * @param ack - ack number net order + * @param tcp_hdr_opts_len - header and options length in bytes + * @param flags - header flags + * @param wnd - window size + * + * @return - pointer to start of TCP header + */ +always_inline void * +vlib_buffer_push_tcp_net_order (vlib_buffer_t * b, u16 sp, u16 dp, u32 seq, + u32 ack, u8 tcp_hdr_opts_len, u8 flags, + u16 wnd) +{ + tcp_header_t *th; + + th = vlib_buffer_push_uninit (b, tcp_hdr_opts_len); + + th->src_port = sp; + th->dst_port = dp; + th->seq_number = seq; + th->ack_number = ack; + th->data_offset_and_reserved = (tcp_hdr_opts_len >> 2) << 4; + th->flags = flags; + th->window = wnd; + th->checksum = 0; + th->urgent_pointer = 0; + return th; +} + +/** + * Push TCP header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param sp_net - source port net order + * @param dp_net - destination port net order + * @param seq - sequence number host order + * @param ack - ack number host order + * @param tcp_hdr_opts_len - header and options length in bytes + * @param flags - header flags + * @param wnd - window size + * + * @return - pointer to start of TCP header + */ +always_inline void * +vlib_buffer_push_tcp (vlib_buffer_t * b, u16 sp_net, u16 dp_net, u32 seq, + u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd) +{ + return vlib_buffer_push_tcp_net_order (b, sp_net, dp_net, + clib_host_to_net_u32 (seq), + clib_host_to_net_u32 (ack), + tcp_hdr_opts_len, flags, + clib_host_to_net_u16 (wnd)); +} + +#endif /* _vnet_tcp_h_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def new file mode 100644 index 00000000..cff5ec13 --- /dev/null +++ b/src/vnet/tcp/tcp_error.def @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +tcp_error (NONE, "no error") +tcp_error (NO_LISTENER, "no listener for dst port") +tcp_error (LOOKUP_DROPS, "lookup drops") +tcp_error (DISPATCH, "Dispatch error") +tcp_error (ENQUEUED, "Packets pushed into rx fifo") +tcp_error (PURE_ACK, "Pure acks") +tcp_error (SYNS_RCVD, "SYNs received") +tcp_error (SYN_ACKS_RCVD, "SYN-ACKs received") +tcp_error (NOT_READY, "Session not ready for packets") +tcp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space") +tcp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") +tcp_error (API_QUEUE_FULL, "Sessions not created for lack of API queue space") +tcp_error (CREATE_SESSION_FAIL, "Sessions couldn't be allocated") +tcp_error (SEGMENT_INVALID, "Invalid segment") +tcp_error (ACK_INVALID, "Invalid ACK") +tcp_error (ACK_DUP, "Duplicate ACK") +tcp_error (ACK_OLD, "Old ACK") +tcp_error (PKTS_SENT, "Packets sent") +tcp_error (FILTERED_DUPACKS, "Filtered duplicate ACKs") +tcp_error (RST_SENT, "Resets sent") \ No newline at end of file diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c new file mode 100644 index 00000000..7136741d --- /dev/null +++ b/src/vnet/tcp/tcp_format.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * tcp/tcp_format.c: tcp formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +static u8 * +format_tcp_flags (u8 * s, va_list * args) +{ + int flags = va_arg (*args, int); + +#define _(f) if (flags & TCP_FLAG_##f) s = format (s, "%s, ", #f); + foreach_tcp_flag +#undef _ + return s; +} + +/* Format TCP header. */ +u8 * +format_tcp_header (u8 * s, va_list * args) +{ + tcp_header_t *tcp = va_arg (*args, tcp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 header_bytes; + uword indent; + + /* Nothing to do. */ + if (max_header_bytes < sizeof (tcp[0])) + return format (s, "TCP header truncated"); + + indent = format_get_indent (s); + indent += 2; + header_bytes = tcp_header_bytes (tcp); + + s = format (s, "TCP: %d -> %d", clib_net_to_host_u16 (tcp->src), + clib_net_to_host_u16 (tcp->dst)); + + s = format (s, "\n%Useq. 0x%08x ack 0x%08x", format_white_space, indent, + clib_net_to_host_u32 (tcp->seq_number), + clib_net_to_host_u32 (tcp->ack_number)); + + s = format (s, "\n%Uflags %U, tcp header: %d bytes", format_white_space, + indent, format_tcp_flags, tcp->flags, header_bytes); + + s = format (s, "\n%Uwindow %d, checksum 0x%04x", format_white_space, indent, + clib_net_to_host_u16 (tcp->window), + clib_net_to_host_u16 (tcp->checksum)); + + +#if 0 + /* Format TCP options. */ + { + u8 *o; + u8 *option_start = (void *) (tcp + 1); + u8 *option_end = (void *) tcp + header_bytes; + + for (o = option_start; o < option_end;) + { + u32 length = o[1]; + switch (o[0]) + { + case TCP_OPTION_END: + length = 1; + o = option_end; + break; + + case TCP_OPTION_NOOP: + length = 1; + break; + + } + } + } +#endif + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ip_main_t *im = &ip_main; + tcp_udp_port_info_t *pi; + + pi = ip_get_tcp_udp_port_info (im, tcp->dst); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", format_white_space, indent - 2, + pi->format_header, + /* next protocol header */ (void *) tcp + header_bytes, + max_header_bytes - header_bytes); + } + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c new file mode 100644 index 00000000..daa0683b --- /dev/null +++ b/src/vnet/tcp/tcp_input.c @@ -0,0 +1,2316 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +static char *tcp_error_strings[] = { +#define tcp_error(n,s) s, +#include +#undef tcp_error +}; + +/* All TCP nodes have the same outgoing arcs */ +#define foreach_tcp_state_next \ + _ (DROP, "error-drop") \ + _ (TCP4_OUTPUT, "tcp4-output") \ + _ (TCP6_OUTPUT, "tcp6-output") + +typedef enum _tcp_established_next +{ +#define _(s,n) TCP_ESTABLISHED_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_ESTABLISHED_N_NEXT, +} tcp_established_next_t; + +typedef enum _tcp_rcv_process_next +{ +#define _(s,n) TCP_RCV_PROCESS_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_RCV_PROCESS_N_NEXT, +} tcp_rcv_process_next_t; + +typedef enum _tcp_syn_sent_next +{ +#define _(s,n) TCP_SYN_SENT_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_SYN_SENT_N_NEXT, +} tcp_syn_sent_next_t; + +typedef enum _tcp_listen_next +{ +#define _(s,n) TCP_LISTEN_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_LISTEN_N_NEXT, +} tcp_listen_next_t; + +/* Generic, state independent indices */ +typedef enum _tcp_state_next +{ +#define _(s,n) TCP_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_STATE_N_NEXT, +} tcp_state_next_t; + +#define tcp_next_output(is_ip4) (is_ip4 ? TCP_NEXT_TCP4_OUTPUT \ + : TCP_NEXT_TCP6_OUTPUT) + +vlib_node_registration_t tcp4_established_node; +vlib_node_registration_t tcp6_established_node; + +/** + * Validate segment sequence number. As per RFC793: + * + * Segment Receive Test + * Length Window + * ------- ------- ------------------------------------------- + * 0 0 SEG.SEQ = RCV.NXT + * 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + * >0 0 not acceptable + * >0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + * or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND + * + * This ultimately consists in checking if segment falls within the window. + * The one important difference compared to RFC793 is that we use rcv_las, + * or the rcv_nxt at last ack sent instead of rcv_nxt since that's the + * peer's reference when computing our receive window. + * + * This accepts only segments within the window. + */ +always_inline u8 +tcp_segment_in_rcv_wnd (tcp_connection_t * tc, u32 seq, u32 end_seq) +{ + return seq_leq (end_seq, tc->rcv_las + tc->rcv_wnd) + && seq_geq (seq, tc->rcv_nxt); +} + +void +tcp_options_parse (tcp_header_t * th, tcp_options_t * to) +{ + const u8 *data; + u8 opt_len, opts_len, kind; + int j; + sack_block_t b; + + opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t); + data = (const u8 *) (th + 1); + + /* Zero out all flags but those set in SYN */ + to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE); + + for (; opts_len > 0; opts_len -= opt_len, data += opt_len) + { + kind = data[0]; + + /* Get options length */ + if (kind == TCP_OPTION_EOL) + break; + else if (kind == TCP_OPTION_NOOP) + opt_len = 1; + else + { + /* broken options */ + if (opts_len < 2) + break; + opt_len = data[1]; + + /* weird option length */ + if (opt_len < 2 || opt_len > opts_len) + break; + } + + /* Parse options */ + switch (kind) + { + case TCP_OPTION_MSS: + if ((opt_len == TCP_OPTION_LEN_MSS) && tcp_syn (th)) + { + to->flags |= TCP_OPTS_FLAG_MSS; + to->mss = clib_net_to_host_u16 (*(u16 *) (data + 2)); + } + break; + case TCP_OPTION_WINDOW_SCALE: + if ((opt_len == TCP_OPTION_LEN_WINDOW_SCALE) && tcp_syn (th)) + { + to->flags |= TCP_OPTS_FLAG_WSCALE; + to->wscale = data[2]; + if (to->wscale > TCP_MAX_WND_SCALE) + { + clib_warning ("Illegal window scaling value: %d", + to->wscale); + to->wscale = TCP_MAX_WND_SCALE; + } + } + break; + case TCP_OPTION_TIMESTAMP: + if (opt_len == TCP_OPTION_LEN_TIMESTAMP) + { + to->flags |= TCP_OPTS_FLAG_TSTAMP; + to->tsval = clib_net_to_host_u32 (*(u32 *) (data + 2)); + to->tsecr = clib_net_to_host_u32 (*(u32 *) (data + 6)); + } + break; + case TCP_OPTION_SACK_PERMITTED: + if (opt_len == TCP_OPTION_LEN_SACK_PERMITTED && tcp_syn (th)) + to->flags |= TCP_OPTS_FLAG_SACK_PERMITTED; + break; + case TCP_OPTION_SACK_BLOCK: + /* If SACK permitted was not advertised or a SYN, break */ + if ((to->flags & TCP_OPTS_FLAG_SACK_PERMITTED) == 0 || tcp_syn (th)) + break; + + /* If too short or not correctly formatted, break */ + if (opt_len < 10 || ((opt_len - 2) % TCP_OPTION_LEN_SACK_BLOCK)) + break; + + to->flags |= TCP_OPTS_FLAG_SACK; + to->n_sack_blocks = (opt_len - 2) / TCP_OPTION_LEN_SACK_BLOCK; + vec_reset_length (to->sacks); + for (j = 0; j < to->n_sack_blocks; j++) + { + b.start = clib_net_to_host_u32 (*(u32 *) (data + 2 + 4 * j)); + b.end = clib_net_to_host_u32 (*(u32 *) (data + 6 + 4 * j)); + vec_add1 (to->sacks, b); + } + break; + default: + /* Nothing to see here */ + continue; + } + } +} + +always_inline int +tcp_segment_check_paws (tcp_connection_t * tc) +{ + /* XXX normally test for timestamp should be lt instead of leq, but for + * local testing this is not enough */ + return tcp_opts_tstamp (&tc->opt) && tc->tsval_recent + && timestamp_lt (tc->opt.tsval, tc->tsval_recent); +} + +/** + * Validate incoming segment as per RFC793 p. 69 and RFC1323 p. 19 + * + * It first verifies if segment has a wrapped sequence number (PAWS) and then + * does the processing associated to the first four steps (ignoring security + * and precedence): sequence number, rst bit and syn bit checks. + * + * @return 0 if segments passes validation. + */ +static int +tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0, + vlib_buffer_t * b0, tcp_header_t * th0, u32 * next0) +{ + u8 paws_failed; + + if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0))) + return -1; + + tcp_options_parse (th0, &tc0->opt); + + /* RFC1323: Check against wrapped sequence numbers (PAWS). If we have + * timestamp to echo and it's less than tsval_recent, drop segment + * but still send an ACK in order to retain TCP's mechanism for detecting + * and recovering from half-open connections */ + paws_failed = tcp_segment_check_paws (tc0); + if (paws_failed) + { + clib_warning ("paws failed"); + + /* If it just so happens that a segment updates tsval_recent for a + * segment over 24 days old, invalidate tsval_recent. */ + if (timestamp_lt (tc0->tsval_recent_age + TCP_PAWS_IDLE, + tcp_time_now ())) + { + /* Age isn't reset until we get a valid tsval (bsd inspired) */ + tc0->tsval_recent = 0; + } + else + { + /* Drop after ack if not rst */ + if (!tcp_rst (th0)) + { + tcp_make_ack (tc0, b0); + *next0 = tcp_next_output (tc0->c_is_ip4); + return -1; + } + } + } + + /* 1st: check sequence number */ + if (!tcp_segment_in_rcv_wnd (tc0, vnet_buffer (b0)->tcp.seq_number, + vnet_buffer (b0)->tcp.seq_end)) + { + if (!tcp_rst (th0)) + { + /* Send dup ack */ + tcp_make_ack (tc0, b0); + *next0 = tcp_next_output (tc0->c_is_ip4); + } + return -1; + } + + /* 2nd: check the RST bit */ + if (tcp_rst (th0)) + { + /* Notify session that connection has been reset. Switch + * state to closed and await for session to do the cleanup. */ + stream_session_reset_notify (&tc0->connection); + tc0->state = TCP_STATE_CLOSED; + return -1; + } + + /* 3rd: check security and precedence (skip) */ + + /* 4th: check the SYN bit */ + if (tcp_syn (th0)) + { + tcp_send_reset (b0, tc0->c_is_ip4); + return -1; + } + + /* If PAWS passed and segment in window, save timestamp */ + if (!paws_failed) + { + tc0->tsval_recent = tc0->opt.tsval; + tc0->tsval_recent_age = tcp_time_now (); + } + + return 0; +} + +always_inline int +tcp_rcv_ack_is_acceptable (tcp_connection_t * tc0, vlib_buffer_t * tb0) +{ + /* SND.UNA =< SEG.ACK =< SND.NXT */ + return (seq_leq (tc0->snd_una, vnet_buffer (tb0)->tcp.ack_number) + && seq_leq (vnet_buffer (tb0)->tcp.ack_number, tc0->snd_nxt)); +} + +/** + * Compute smoothed RTT as per VJ's '88 SIGCOMM and RFC6298 + * + * Note that although the original article, srtt and rttvar are scaled + * to minimize round-off errors, here we don't. Instead, we rely on + * better precision time measurements. + * + * TODO support us rtt resolution + */ +static void +tcp_estimate_rtt (tcp_connection_t * tc, u32 mrtt) +{ + int err; + + if (tc->srtt != 0) + { + err = mrtt - tc->srtt; + tc->srtt += err >> 3; + + /* XXX Drop in RTT results in RTTVAR increase and bigger RTO. + * The increase should be bound */ + tc->rttvar += (clib_abs (err) - tc->rttvar) >> 2; + } + else + { + /* First measurement. */ + tc->srtt = mrtt; + tc->rttvar = mrtt << 1; + } +} + +/** Update RTT estimate and RTO timer + * + * Measure RTT: We have two sources of RTT measurements: TSOPT and ACK + * timing. Middle boxes are known to fiddle with TCP options so we + * should give higher priority to ACK timing. + * + * return 1 if valid rtt 0 otherwise + */ +static int +tcp_update_rtt (tcp_connection_t * tc, u32 ack) +{ + u32 mrtt = 0; + + /* Karn's rule, part 1. Don't use retransmitted segments to estimate + * RTT because they're ambiguous. */ + if (tc->rtt_seq && seq_gt (ack, tc->rtt_seq) && !tc->rto_boff) + { + mrtt = tcp_time_now () - tc->rtt_ts; + tc->rtt_seq = 0; + } + + /* As per RFC7323 TSecr can be used for RTTM only if the segment advances + * snd_una, i.e., the left side of the send window: + * seq_lt (tc->snd_una, ack). Note: last condition could be dropped, we don't + * try to update rtt for dupacks */ + else if (tcp_opts_tstamp (&tc->opt) && tc->opt.tsecr && tc->bytes_acked) + { + mrtt = tcp_time_now () - tc->opt.tsecr; + } + + /* Ignore dubious measurements */ + if (mrtt == 0 || mrtt > TCP_RTT_MAX) + return 0; + + tcp_estimate_rtt (tc, mrtt); + + tc->rto = clib_min (tc->srtt + (tc->rttvar << 2), TCP_RTO_MAX); + + return 1; +} + +/** + * Dequeue bytes that have been acked and while at it update RTT estimates. + */ +static void +tcp_dequeue_acked (tcp_connection_t * tc, u32 ack) +{ + /* Dequeue the newly ACKed bytes */ + stream_session_dequeue_drop (&tc->connection, tc->bytes_acked); + + /* Update rtt and rto */ + if (tcp_update_rtt (tc, ack)) + { + /* Good ACK received and valid RTT, make sure retransmit backoff is 0 */ + tc->rto_boff = 0; + } +} + +/** Check if dupack as per RFC5681 Sec. 2 */ +always_inline u8 +tcp_ack_is_dupack (tcp_connection_t * tc, vlib_buffer_t * b, u32 new_snd_wnd) +{ + return ((vnet_buffer (b)->tcp.ack_number == tc->snd_una) + && seq_gt (tc->snd_una_max, tc->snd_una) + && (vnet_buffer (b)->tcp.seq_end == vnet_buffer (b)->tcp.seq_number) + && (new_snd_wnd == tc->snd_wnd)); +} + +void +scoreboard_remove_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole) +{ + sack_scoreboard_hole_t *next, *prev; + + if (hole->next != TCP_INVALID_SACK_HOLE_INDEX) + { + next = pool_elt_at_index (sb->holes, hole->next); + next->prev = hole->prev; + } + + if (hole->prev != TCP_INVALID_SACK_HOLE_INDEX) + { + prev = pool_elt_at_index (sb->holes, hole->prev); + prev->next = hole->next; + } + else + { + sb->head = hole->next; + } + + pool_put (sb->holes, hole); +} + +sack_scoreboard_hole_t * +scoreboard_insert_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * prev, + u32 start, u32 end) +{ + sack_scoreboard_hole_t *hole, *next; + u32 hole_index; + + pool_get (sb->holes, hole); + memset (hole, 0, sizeof (*hole)); + + hole->start = start; + hole->end = end; + hole_index = hole - sb->holes; + + if (prev) + { + hole->prev = prev - sb->holes; + hole->next = prev->next; + + if ((next = scoreboard_next_hole (sb, hole))) + next->prev = hole_index; + + prev->next = hole_index; + } + else + { + sb->head = hole_index; + hole->prev = TCP_INVALID_SACK_HOLE_INDEX; + hole->next = TCP_INVALID_SACK_HOLE_INDEX; + } + + return hole; +} + +static void +tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) +{ + sack_scoreboard_t *sb = &tc->sack_sb; + sack_block_t *blk, tmp; + sack_scoreboard_hole_t *hole, *next_hole; + u32 blk_index = 0; + int i, j; + + if (!tcp_opts_sack (tc) && sb->head == TCP_INVALID_SACK_HOLE_INDEX) + return; + + /* Remove invalid blocks */ + vec_foreach (blk, tc->opt.sacks) + { + if (seq_lt (blk->start, blk->end) + && seq_gt (blk->start, tc->snd_una) + && seq_gt (blk->start, ack) && seq_lt (blk->end, tc->snd_nxt)) + continue; + + vec_del1 (tc->opt.sacks, blk - tc->opt.sacks); + } + + /* Add block for cumulative ack */ + if (seq_gt (ack, tc->snd_una)) + { + tmp.start = tc->snd_una; + tmp.end = ack; + vec_add1 (tc->opt.sacks, tmp); + } + + if (vec_len (tc->opt.sacks) == 0) + return; + + /* Make sure blocks are ordered */ + for (i = 0; i < vec_len (tc->opt.sacks); i++) + for (j = i; j < vec_len (tc->opt.sacks); j++) + if (seq_lt (tc->opt.sacks[j].start, tc->opt.sacks[i].start)) + { + tmp = tc->opt.sacks[i]; + tc->opt.sacks[i] = tc->opt.sacks[j]; + tc->opt.sacks[j] = tmp; + } + + /* If no holes, insert the first that covers all outstanding bytes */ + if (sb->head == TCP_INVALID_SACK_HOLE_INDEX) + { + scoreboard_insert_hole (sb, 0, tc->snd_una, tc->snd_una_max); + } + + /* Walk the holes with the SACK blocks */ + hole = pool_elt_at_index (sb->holes, sb->head); + while (hole && blk_index < vec_len (tc->opt.sacks)) + { + blk = &tc->opt.sacks[blk_index]; + + if (seq_leq (blk->start, hole->start)) + { + /* Block covers hole. Remove hole */ + if (seq_geq (blk->end, hole->end)) + { + next_hole = scoreboard_next_hole (sb, hole); + + /* Byte accounting */ + if (seq_lt (hole->end, ack)) + { + /* Bytes lost because snd wnd left edge advances */ + if (seq_lt (next_hole->start, ack)) + sb->sacked_bytes -= next_hole->start - hole->end; + else + sb->sacked_bytes -= ack - hole->end; + } + else + { + sb->sacked_bytes += scoreboard_hole_bytes (hole); + } + + scoreboard_remove_hole (sb, hole); + hole = next_hole; + } + /* Partial overlap */ + else + { + sb->sacked_bytes += blk->end - hole->start; + hole->start = blk->end; + blk_index++; + } + } + else + { + /* Hole must be split */ + if (seq_leq (blk->end, hole->end)) + { + sb->sacked_bytes += blk->end - blk->start; + scoreboard_insert_hole (sb, hole, blk->end, hole->end); + hole->end = blk->start - 1; + blk_index++; + } + else + { + sb->sacked_bytes += hole->end - blk->start + 1; + hole->end = blk->start - 1; + hole = scoreboard_next_hole (sb, hole); + } + } + } +} + +/** Update snd_wnd + * + * If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and SND.WL2 =< SEG.ACK)), set + * SND.WND <- SEG.WND, set SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK */ +static void +tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd) +{ + if (tc->snd_wl1 < seq || (tc->snd_wl1 == seq && tc->snd_wl2 <= ack)) + { + tc->snd_wnd = snd_wnd; + tc->snd_wl1 = seq; + tc->snd_wl2 = ack; + } +} + +static void +tcp_cc_congestion (tcp_connection_t * tc) +{ + tc->cc_algo->congestion (tc); +} + +static void +tcp_cc_recover (tcp_connection_t * tc) +{ + if (tcp_in_fastrecovery (tc)) + { + tc->cc_algo->recovered (tc); + tcp_recovery_off (tc); + } + else if (tcp_in_recovery (tc)) + { + tcp_recovery_off (tc); + tc->cwnd = tcp_loss_wnd (tc); + } +} + +static void +tcp_cc_rcv_ack (tcp_connection_t * tc) +{ + u8 partial_ack; + + if (tcp_in_recovery (tc)) + { + partial_ack = seq_lt (tc->snd_una, tc->snd_una_max); + if (!partial_ack) + { + /* Clear retransmitted bytes. */ + tc->rtx_bytes = 0; + tcp_cc_recover (tc); + } + else + { + /* Clear retransmitted bytes. XXX should we clear all? */ + tc->rtx_bytes = 0; + tc->cc_algo->rcv_cong_ack (tc, TCP_CC_PARTIALACK); + + /* Retransmit first unacked segment */ + tcp_retransmit_first_unacked (tc); + } + } + else + { + tc->cc_algo->rcv_ack (tc); + } + + tc->rcv_dupacks = 0; + tc->tsecr_last_ack = tc->opt.tsecr; +} + +static void +tcp_cc_rcv_dupack (tcp_connection_t * tc, u32 ack) +{ + ASSERT (tc->snd_una == ack); + + tc->rcv_dupacks++; + if (tc->rcv_dupacks == TCP_DUPACK_THRESHOLD) + { + /* RFC6582 NewReno heuristic to avoid multiple fast retransmits */ + if (tc->opt.tsecr != tc->tsecr_last_ack) + { + tc->rcv_dupacks = 0; + return; + } + + tcp_fastrecovery_on (tc); + + /* Handle congestion and dupack */ + tcp_cc_congestion (tc); + tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK); + + tcp_fast_retransmit (tc); + + /* Post retransmit update cwnd to ssthresh and account for the + * three segments that have left the network and should've been + * buffered at the receiver */ + tc->cwnd = tc->ssthresh + TCP_DUPACK_THRESHOLD * tc->snd_mss; + } + else if (tc->rcv_dupacks > TCP_DUPACK_THRESHOLD) + { + ASSERT (tcp_in_fastrecovery (tc)); + + tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK); + } +} + +void +tcp_cc_init (tcp_connection_t * tc) +{ + tc->cc_algo = tcp_cc_algo_get (TCP_CC_NEWRENO); + tc->cc_algo->init (tc); +} + +static int +tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b, + tcp_header_t * th, u32 * next, u32 * error) +{ + u32 new_snd_wnd; + + /* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) then send an + * ACK, drop the segment, and return */ + if (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)) + { + tcp_make_ack (tc, b); + *next = tcp_next_output (tc->c_is_ip4); + *error = TCP_ERROR_ACK_INVALID; + return -1; + } + + /* If old ACK, discard */ + if (seq_lt (vnet_buffer (b)->tcp.ack_number, tc->snd_una)) + { + *error = TCP_ERROR_ACK_OLD; + return -1; + } + + if (tcp_opts_sack_permitted (&tc->opt)) + tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number); + + new_snd_wnd = clib_net_to_host_u32 (th->window) << tc->snd_wscale; + + if (tcp_ack_is_dupack (tc, b, new_snd_wnd)) + { + tcp_cc_rcv_dupack (tc, vnet_buffer (b)->tcp.ack_number); + *error = TCP_ERROR_ACK_DUP; + return -1; + } + + /* Valid ACK */ + tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una; + tc->snd_una = vnet_buffer (b)->tcp.ack_number; + + /* Dequeue ACKed packet and update RTT */ + tcp_dequeue_acked (tc, vnet_buffer (b)->tcp.ack_number); + + tcp_update_snd_wnd (tc, vnet_buffer (b)->tcp.seq_number, + vnet_buffer (b)->tcp.ack_number, new_snd_wnd); + + /* Updates congestion control (slow start/congestion avoidance) */ + tcp_cc_rcv_ack (tc); + + /* If everything has been acked, stop retransmit timer + * otherwise update */ + if (tc->snd_una == tc->snd_una_max) + tcp_timer_reset (tc, TCP_TIMER_RETRANSMIT); + else + tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, tc->rto); + + return 0; +} + +/** + * Build SACK list as per RFC2018. + * + * Makes sure the first block contains the segment that generated the current + * ACK and the following ones are the ones most recently reported in SACK + * blocks. + * + * @param tc TCP connection for which the SACK list is updated + * @param start Start sequence number of the newest SACK block + * @param end End sequence of the newest SACK block + */ +static void +tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end) +{ + sack_block_t *new_list = 0, block; + u32 n_elts; + int i; + u8 new_head = 0; + + /* If the first segment is ooo add it to the list. Last write might've moved + * rcv_nxt over the first segment. */ + if (seq_lt (tc->rcv_nxt, start)) + { + block.start = start; + block.end = end; + vec_add1 (new_list, block); + new_head = 1; + } + + /* Find the blocks still worth keeping. */ + for (i = 0; i < vec_len (tc->snd_sacks); i++) + { + /* Discard if: + * 1) rcv_nxt advanced beyond current block OR + * 2) Segment overlapped by the first segment, i.e., it has been merged + * into it.*/ + if (seq_leq (tc->snd_sacks[i].start, tc->rcv_nxt) + || seq_leq (tc->snd_sacks[i].start, end)) + continue; + + /* Save subsequent segments to new SACK list. */ + n_elts = clib_min (vec_len (tc->snd_sacks) - i, + TCP_MAX_SACK_BLOCKS - new_head); + vec_insert_elts (new_list, &tc->snd_sacks[i], n_elts, new_head); + break; + } + + /* Replace old vector with new one */ + vec_free (tc->snd_sacks); + tc->snd_sacks = new_list; +} + +/** Enqueue data for delivery to application */ +always_inline u32 +tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b, + u16 data_len) +{ + int written; + + /* Pure ACK. Update rcv_nxt and be done. */ + if (PREDICT_FALSE (data_len == 0)) + { + tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end; + return TCP_ERROR_PURE_ACK; + } + + written = stream_session_enqueue_data (&tc->connection, + vlib_buffer_get_current (b), + data_len, 1 /* queue event */ ); + + /* Update rcv_nxt */ + if (PREDICT_TRUE (written == data_len)) + { + tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end; + } + /* If more data written than expected, account for out-of-order bytes. */ + else if (written > data_len) + { + tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end + written - data_len; + + /* Send ACK confirming the update */ + tc->flags |= TCP_CONN_SNDACK; + + /* Update SACK list if need be */ + if (tcp_opts_sack_permitted (&tc->opt)) + { + /* Remove SACK blocks that have been delivered */ + tcp_update_sack_list (tc, tc->rcv_nxt, tc->rcv_nxt); + } + } + else + { + ASSERT (0); + return TCP_ERROR_FIFO_FULL; + } + + return TCP_ERROR_ENQUEUED; +} + +/** Enqueue out-of-order data */ +always_inline u32 +tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b, + u16 data_len) +{ + stream_session_t *s0; + u32 offset, seq; + + s0 = stream_session_get (tc->c_s_index, tc->c_thread_index); + seq = vnet_buffer (b)->tcp.seq_number; + offset = seq - tc->rcv_nxt; + + if (svm_fifo_enqueue_with_offset (s0->server_rx_fifo, s0->pid, offset, + data_len, vlib_buffer_get_current (b))) + return TCP_ERROR_FIFO_FULL; + + /* Update SACK list if in use */ + if (tcp_opts_sack_permitted (&tc->opt)) + { + ooo_segment_t *newest; + u32 start, end; + + /* Get the newest segment from the fifo */ + newest = svm_fifo_newest_ooo_segment (s0->server_rx_fifo); + start = tc->rcv_nxt + ooo_segment_offset (s0->server_rx_fifo, newest); + end = tc->rcv_nxt + ooo_segment_end_offset (s0->server_rx_fifo, newest); + + tcp_update_sack_list (tc, start, end); + } + + return TCP_ERROR_ENQUEUED; +} + +/** + * Check if ACK could be delayed. DELACK timer is set only after frame is + * processed so this can return true for a full bursts of packets. + */ +always_inline int +tcp_can_delack (tcp_connection_t * tc) +{ + /* If there's no DELACK timer set and the last window sent wasn't 0 we + * can safely delay. */ + if (!tcp_timer_is_active (tc, TCP_TIMER_DELACK) + && (tc->flags & TCP_CONN_SENT_RCV_WND0) == 0 + && (tc->flags & TCP_CONN_SNDACK) == 0) + return 1; + + return 0; +} + +static int +tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b, + u16 n_data_bytes, u32 * next0) +{ + u32 error = 0; + + /* Handle out-of-order data */ + if (PREDICT_FALSE (vnet_buffer (b)->tcp.seq_number != tc->rcv_nxt)) + { + error = tcp_session_enqueue_ooo (tc, b, n_data_bytes); + + /* Don't send more than 3 dupacks per burst + * XXX decide if this is good */ + if (tc->snt_dupacks < 3) + { + /* RFC2581: Send DUPACK for fast retransmit */ + tcp_make_ack (tc, b); + *next0 = tcp_next_output (tc->c_is_ip4); + + /* Mark as DUPACK. We may filter these in output if + * the burst fills the holes. */ + vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_DUPACK; + + tc->snt_dupacks++; + } + + goto done; + } + + /* In order data, enqueue. Fifo figures out by itself if any out-of-order + * segments can be enqueued after fifo tail offset changes. */ + error = tcp_session_enqueue_data (tc, b, n_data_bytes); + + /* Check if ACK can be delayed */ + if (tcp_can_delack (tc)) + { + /* Nothing to do for pure ACKs */ + if (n_data_bytes == 0) + goto done; + + /* If connection has not been previously marked for delay ack + * add it to the list and flag it */ + if (!tc->flags & TCP_CONN_DELACK) + { + vec_add1 (tm->delack_connections[tc->c_thread_index], + tc->c_c_index); + tc->flags |= TCP_CONN_DELACK; + } + } + else + { + /* Check if a packet has already been enqueued to output for burst. + * If yes, then drop this one, otherwise, let it pass through to + * output */ + if ((tc->flags & TCP_CONN_BURSTACK) == 0) + { + *next0 = tcp_next_output (tc->c_is_ip4); + tcp_make_ack (tc, b); + error = TCP_ERROR_ENQUEUED; + + /* TODO: maybe add counter to ensure N acks will be sent/burst */ + tc->flags |= TCP_CONN_BURSTACK; + } + } + +done: + return error; +} + +void +delack_timers_init (tcp_main_t * tm, u32 thread_index) +{ + tcp_connection_t *tc; + u32 i, *conns; + tw_timer_wheel_16t_2w_512sl_t *tw; + + tw = &tm->timer_wheels[thread_index]; + conns = tm->delack_connections[thread_index]; + for (i = 0; i < vec_len (conns); i++) + { + tc = pool_elt_at_index (tm->connections[thread_index], conns[i]); + ASSERT (0 != tc); + + tc->timers[TCP_TIMER_DELACK] + = tw_timer_start_16t_2w_512sl (tw, conns[i], + TCP_TIMER_DELACK, TCP_DELACK_TIME); + } + vec_reset_length (tm->delack_connections[thread_index]); +} + +always_inline uword +tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index, errors = 0; + tcp_main_t *tm = vnet_get_tcp_main (); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_header_t *th0 = 0; + tcp_connection_t *tc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 n_advance_bytes0, n_data_bytes0; + u32 next0 = TCP_ESTABLISHED_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, + my_thread_index); + + /* Checksum computed by ipx_local no need to compute again */ + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + th0 = ip4_next_header (ip40); + n_advance_bytes0 = (ip4_header_bytes (ip40) + + tcp_header_bytes (th0)); + n_data_bytes0 = clib_net_to_host_u16 (ip40->length) + - n_advance_bytes0; + } + else + { + ip60 = vlib_buffer_get_current (b0); + th0 = ip6_next_header (ip60); + n_advance_bytes0 = tcp_header_bytes (th0); + n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) + - n_advance_bytes0; + n_advance_bytes0 += sizeof (ip60[0]); + } + + /* SYNs, FINs and data consume sequence numbers */ + vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number + + tcp_is_syn (th0) + tcp_is_fin (th0) + n_data_bytes0; + + /* TODO header prediction fast path */ + + /* 1-4: check SEQ, RST, SYN */ + if (PREDICT_FALSE (tcp_segment_validate (vm, tc0, b0, th0, &next0))) + { + error0 = TCP_ERROR_SEGMENT_INVALID; + goto drop; + } + + /* 5: check the ACK field */ + if (tcp_rcv_ack (tc0, b0, th0, &next0, &error0)) + { + goto drop; + } + + /* 6: check the URG bit TODO */ + + /* 7: process the segment text */ + vlib_buffer_advance (b0, n_advance_bytes0); + error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0); + + /* 8: check the FIN bit */ + if (tcp_fin (th0)) + { + /* Send ACK and enter CLOSE-WAIT */ + tcp_make_ack (tc0, b0); + tcp_connection_force_ack (tc0, b0); + next0 = tcp_next_output (tc0->c_is_ip4); + tc0->state = TCP_STATE_CLOSE_WAIT; + stream_session_disconnect_notify (&tc0->connection); + } + + drop: + b0->error = node->errors[error0]; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + errors = session_manager_flush_enqueue_events (my_thread_index); + if (errors) + { + if (is_ip4) + vlib_node_increment_counter (vm, tcp4_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + else + vlib_node_increment_counter (vm, tcp6_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + } + + delack_timers_init (tm, my_thread_index); + + return from_frame->n_vectors; +} + +static uword +tcp4_established (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_established_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_established (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_established_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_established_node) = +{ + .function = tcp4_established, + .name = "tcp4-established", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR,.error_strings = tcp_error_strings, + .n_next_nodes = TCP_ESTABLISHED_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_established_node, tcp4_established); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_established_node) = +{ + .function = tcp6_established, + .name = "tcp6-established", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_ESTABLISHED_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_established_node, tcp6_established); + +vlib_node_registration_t tcp4_syn_sent_node; +vlib_node_registration_t tcp6_syn_sent_node; + +always_inline uword +tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index, errors = 0; + u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, ack0, seq0; + vlib_buffer_t *b0; + tcp_header_t *tcp0 = 0; + tcp_connection_t *tc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 n_advance_bytes0, n_data_bytes0; + tcp_connection_t *new_tc0; + u32 next0 = TCP_SYN_SENT_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = + tcp_half_open_connection_get (vnet_buffer (b0)-> + tcp.connection_index); + + ack0 = vnet_buffer (b0)->tcp.ack_number; + seq0 = vnet_buffer (b0)->tcp.seq_number; + + /* Checksum computed by ipx_local no need to compute again */ + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + tcp0 = ip4_next_header (ip40); + n_advance_bytes0 = (ip4_header_bytes (ip40) + + tcp_header_bytes (tcp0)); + n_data_bytes0 = clib_net_to_host_u16 (ip40->length) + - n_advance_bytes0; + } + else + { + ip60 = vlib_buffer_get_current (b0); + tcp0 = ip6_next_header (ip60); + n_advance_bytes0 = tcp_header_bytes (tcp0); + n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) + - n_advance_bytes0; + n_advance_bytes0 += sizeof (ip60[0]); + } + + if (PREDICT_FALSE + (!tcp_ack (tcp0) && !tcp_rst (tcp0) && !tcp_syn (tcp0))) + goto drop; + + /* SYNs, FINs and data consume sequence numbers */ + vnet_buffer (b0)->tcp.seq_end = seq0 + tcp_is_syn (tcp0) + + tcp_is_fin (tcp0) + n_data_bytes0; + + /* + * 1. check the ACK bit + */ + + /* + * If the ACK bit is set + * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send a reset (unless + * the RST bit is set, if so drop the segment and return) + * + * and discard the segment. Return. + * If SND.UNA =< SEG.ACK =< SND.NXT then the ACK is acceptable. + */ + if (tcp_ack (tcp0)) + { + if (ack0 <= tc0->iss || ack0 > tc0->snd_nxt) + { + if (!tcp_rst (tcp0)) + tcp_send_reset (b0, is_ip4); + + goto drop; + } + + /* Make sure ACK is valid */ + if (tc0->snd_una > ack0) + goto drop; + } + + /* + * 2. check the RST bit + */ + + if (tcp_rst (tcp0)) + { + /* If ACK is acceptable, signal client that peer is not + * willing to accept connection and drop connection*/ + if (tcp_ack (tcp0)) + { + stream_session_connect_notify (&tc0->connection, sst, + 1 /* fail */ ); + tcp_connection_cleanup (tc0); + } + goto drop; + } + + /* + * 3. check the security and precedence (skipped) + */ + + /* + * 4. check the SYN bit + */ + + /* No SYN flag. Drop. */ + if (!tcp_syn (tcp0)) + goto drop; + + /* Stop connection establishment and retransmit timers */ + tcp_timer_reset (tc0, TCP_TIMER_ESTABLISH); + tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN); + + /* Valid SYN or SYN-ACK. Move connection from half-open pool to + * current thread pool. */ + pool_get (tm->connections[my_thread_index], new_tc0); + clib_memcpy (new_tc0, tc0, sizeof (*new_tc0)); + + new_tc0->c_thread_index = my_thread_index; + + /* Cleanup half-open connection XXX lock */ + pool_put (tm->half_open_connections, tc0); + + new_tc0->rcv_nxt = vnet_buffer (b0)->tcp.seq_end; + new_tc0->irs = seq0; + + /* Parse options */ + tcp_options_parse (tcp0, &new_tc0->opt); + tcp_connection_init_vars (new_tc0); + + if (tcp_opts_tstamp (&new_tc0->opt)) + { + new_tc0->tsval_recent = new_tc0->opt.tsval; + new_tc0->tsval_recent_age = tcp_time_now (); + } + + if (tcp_opts_wscale (&new_tc0->opt)) + new_tc0->snd_wscale = new_tc0->opt.wscale; + + new_tc0->snd_wnd = clib_net_to_host_u32 (tcp0->window) + << new_tc0->snd_wscale; + new_tc0->snd_wl1 = seq0; + new_tc0->snd_wl2 = ack0; + + /* SYN-ACK: See if we can switch to ESTABLISHED state */ + if (tcp_ack (tcp0)) + { + /* Our SYN is ACKed: we have iss < ack = snd_una */ + + /* TODO Dequeue acknowledged segments if we support Fast Open */ + new_tc0->snd_una = ack0; + new_tc0->state = TCP_STATE_ESTABLISHED; + + /* Notify app that we have connection */ + stream_session_connect_notify (&new_tc0->connection, sst, 0); + + /* Make sure after data segment processing ACK is sent */ + new_tc0->flags |= TCP_CONN_SNDACK; + } + /* SYN: Simultaneous open. Change state to SYN-RCVD and send SYN-ACK */ + else + { + new_tc0->state = TCP_STATE_SYN_RCVD; + + /* Notify app that we have connection XXX */ + stream_session_connect_notify (&new_tc0->connection, sst, 0); + + tcp_make_synack (new_tc0, b0); + next0 = tcp_next_output (is_ip4); + + goto drop; + } + + /* Read data, if any */ + if (n_data_bytes0) + { + error0 = + tcp_segment_rcv (tm, new_tc0, b0, n_data_bytes0, &next0); + if (error0 == TCP_ERROR_PURE_ACK) + error0 = TCP_ERROR_SYN_ACKS_RCVD; + } + else + { + tcp_make_ack (new_tc0, b0); + next0 = tcp_next_output (new_tc0->c_is_ip4); + } + + drop: + + b0->error = error0 ? node->errors[error0] : 0; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + errors = session_manager_flush_enqueue_events (my_thread_index); + if (errors) + { + if (is_ip4) + vlib_node_increment_counter (vm, tcp4_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + else + vlib_node_increment_counter (vm, tcp6_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + } + + return from_frame->n_vectors; +} + +static uword +tcp4_syn_sent (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_syn_sent_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_syn_sent_rcv (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_syn_sent_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_syn_sent_node) = +{ + .function = tcp4_syn_sent, + .name = "tcp4-syn-sent", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_SYN_SENT_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_syn_sent_node, tcp4_syn_sent); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_syn_sent_node) = +{ + .function = tcp6_syn_sent_rcv, + .name = "tcp6-syn-sent", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_SYN_SENT_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + } +,}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_syn_sent_node, tcp6_syn_sent_rcv); +/** + * Handles reception for all states except LISTEN, SYN-SEND and ESTABLISHED + * as per RFC793 p. 64 + */ +always_inline uword +tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index, errors = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_header_t *tcp0 = 0; + tcp_connection_t *tc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 n_advance_bytes0, n_data_bytes0; + u32 next0 = TCP_RCV_PROCESS_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, + my_thread_index); + + /* Checksum computed by ipx_local no need to compute again */ + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + tcp0 = ip4_next_header (ip40); + n_advance_bytes0 = (ip4_header_bytes (ip40) + + tcp_header_bytes (tcp0)); + n_data_bytes0 = clib_net_to_host_u16 (ip40->length) + - n_advance_bytes0; + } + else + { + ip60 = vlib_buffer_get_current (b0); + tcp0 = ip6_next_header (ip60); + n_advance_bytes0 = tcp_header_bytes (tcp0); + n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) + - n_advance_bytes0; + n_advance_bytes0 += sizeof (ip60[0]); + } + + /* SYNs, FINs and data consume sequence numbers */ + vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number + + tcp_is_syn (tcp0) + tcp_is_fin (tcp0) + n_data_bytes0; + + /* + * Special treatment for CLOSED + */ + switch (tc0->state) + { + case TCP_STATE_CLOSED: + goto drop; + break; + } + + /* + * For all other states (except LISTEN) + */ + + /* 1-4: check SEQ, RST, SYN */ + if (PREDICT_FALSE + (tcp_segment_validate (vm, tc0, b0, tcp0, &next0))) + { + error0 = TCP_ERROR_SEGMENT_INVALID; + goto drop; + } + + /* 5: check the ACK field */ + switch (tc0->state) + { + case TCP_STATE_SYN_RCVD: + /* + * If the segment acknowledgment is not acceptable, form a + * reset segment, + * + * and send it. + */ + if (!tcp_rcv_ack_is_acceptable (tc0, b0)) + { + tcp_send_reset (b0, is_ip4); + goto drop; + } + /* Switch state to ESTABLISHED */ + tc0->state = TCP_STATE_ESTABLISHED; + + /* Initialize session variables */ + tc0->snd_una = vnet_buffer (b0)->tcp.ack_number; + tc0->snd_wnd = clib_net_to_host_u32 (tcp0->window) + << tc0->opt.wscale; + tc0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number; + tc0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number; + + /* Shoulder tap the server */ + stream_session_accept_notify (&tc0->connection); + + tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN); + break; + case TCP_STATE_ESTABLISHED: + /* We can get packets in established state here because they + * were enqueued before state change */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + + break; + case TCP_STATE_FIN_WAIT_1: + /* In addition to the processing for the ESTABLISHED state, if + * our FIN is now acknowledged then enter FIN-WAIT-2 and + * continue processing in that state. */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + tc0->state = TCP_STATE_FIN_WAIT_2; + /* Stop all timers, 2MSL will be set lower */ + tcp_connection_timers_reset (tc0); + break; + case TCP_STATE_FIN_WAIT_2: + /* In addition to the processing for the ESTABLISHED state, if + * the retransmission queue is empty, the user's CLOSE can be + * acknowledged ("ok") but do not delete the TCB. */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + /* check if rtx queue is empty and ack CLOSE TODO */ + break; + case TCP_STATE_CLOSE_WAIT: + /* Do the same processing as for the ESTABLISHED state. */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + break; + case TCP_STATE_CLOSING: + /* In addition to the processing for the ESTABLISHED state, if + * the ACK acknowledges our FIN then enter the TIME-WAIT state, + * otherwise ignore the segment. */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + + /* XXX test that send queue empty */ + tc0->state = TCP_STATE_TIME_WAIT; + goto drop; + + break; + case TCP_STATE_LAST_ACK: + /* The only thing that can arrive in this state is an + * acknowledgment of our FIN. If our FIN is now acknowledged, + * delete the TCB, enter the CLOSED state, and return. */ + + if (!tcp_rcv_ack_is_acceptable (tc0, b0)) + goto drop; + + tcp_connection_del (tc0); + goto drop; + + break; + case TCP_STATE_TIME_WAIT: + /* The only thing that can arrive in this state is a + * retransmission of the remote FIN. Acknowledge it, and restart + * the 2 MSL timeout. */ + + /* TODO */ + goto drop; + break; + default: + ASSERT (0); + } + + /* 6: check the URG bit TODO */ + + /* 7: process the segment text */ + switch (tc0->state) + { + case TCP_STATE_ESTABLISHED: + case TCP_STATE_FIN_WAIT_1: + case TCP_STATE_FIN_WAIT_2: + error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0); + break; + case TCP_STATE_CLOSE_WAIT: + case TCP_STATE_CLOSING: + case TCP_STATE_LAST_ACK: + case TCP_STATE_TIME_WAIT: + /* This should not occur, since a FIN has been received from the + * remote side. Ignore the segment text. */ + break; + } + + /* 8: check the FIN bit */ + if (!tcp_fin (tcp0)) + goto drop; + + switch (tc0->state) + { + case TCP_STATE_ESTABLISHED: + case TCP_STATE_SYN_RCVD: + /* Send FIN-ACK notify app and enter CLOSE-WAIT */ + tcp_connection_timers_reset (tc0); + tcp_make_finack (tc0, b0); + next0 = tcp_next_output (tc0->c_is_ip4); + stream_session_disconnect_notify (&tc0->connection); + tc0->state = TCP_STATE_CLOSE_WAIT; + break; + case TCP_STATE_CLOSE_WAIT: + case TCP_STATE_CLOSING: + case TCP_STATE_LAST_ACK: + /* move along .. */ + break; + case TCP_STATE_FIN_WAIT_1: + tc0->state = TCP_STATE_TIME_WAIT; + tcp_connection_timers_reset (tc0); + tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME); + break; + case TCP_STATE_FIN_WAIT_2: + /* Got FIN, send ACK! */ + tc0->state = TCP_STATE_TIME_WAIT; + tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME); + tcp_make_ack (tc0, b0); + next0 = tcp_next_output (is_ip4); + break; + case TCP_STATE_TIME_WAIT: + /* Remain in the TIME-WAIT state. Restart the 2 MSL time-wait + * timeout. + */ + tcp_timer_update (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME); + break; + } + + b0->error = error0 ? node->errors[error0] : 0; + + drop: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + errors = session_manager_flush_enqueue_events (my_thread_index); + if (errors) + { + if (is_ip4) + vlib_node_increment_counter (vm, tcp4_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + else + vlib_node_increment_counter (vm, tcp6_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + } + + return from_frame->n_vectors; +} + +static uword +tcp4_rcv_process (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_rcv_process_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_rcv_process (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_rcv_process_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_rcv_process_node) = +{ + .function = tcp4_rcv_process, + .name = "tcp4-rcv-process", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_RCV_PROCESS_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_rcv_process_node, tcp4_rcv_process); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_rcv_process_node) = +{ + .function = tcp6_rcv_process, + .name = "tcp6-rcv-process", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_RCV_PROCESS_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_rcv_process_node, tcp6_rcv_process); + +vlib_node_registration_t tcp4_listen_node; +vlib_node_registration_t tcp6_listen_node; + +/** + * LISTEN state processing as per RFC 793 p. 65 + */ +always_inline uword +tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index; + tcp_main_t *tm = vnet_get_tcp_main (); + u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_header_t *th0 = 0; + tcp_connection_t *lc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + tcp_connection_t *child0; + u32 error0 = TCP_ERROR_SYNS_RCVD, next0 = TCP_LISTEN_NEXT_DROP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + lc0 = tcp_listener_get (vnet_buffer (b0)->tcp.connection_index); + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + th0 = ip4_next_header (ip40); + } + else + { + ip60 = vlib_buffer_get_current (b0); + th0 = ip6_next_header (ip60); + } + + /* Create child session. For syn-flood protection use filter */ + + /* 1. first check for an RST */ + if (tcp_rst (th0)) + goto drop; + + /* 2. second check for an ACK */ + if (tcp_ack (th0)) + { + tcp_send_reset (b0, is_ip4); + goto drop; + } + + /* 3. check for a SYN (did that already) */ + + /* Create child session and send SYN-ACK */ + pool_get (tm->connections[my_thread_index], child0); + memset (child0, 0, sizeof (*child0)); + + child0->c_c_index = child0 - tm->connections[my_thread_index]; + child0->c_lcl_port = lc0->c_lcl_port; + child0->c_rmt_port = th0->src_port; + child0->c_is_ip4 = is_ip4; + child0->c_thread_index = my_thread_index; + + if (is_ip4) + { + child0->c_lcl_ip4.as_u32 = ip40->dst_address.as_u32; + child0->c_rmt_ip4.as_u32 = ip40->src_address.as_u32; + } + else + { + clib_memcpy (&child0->c_lcl_ip6, &ip60->dst_address, + sizeof (ip6_address_t)); + clib_memcpy (&child0->c_rmt_ip6, &ip60->src_address, + sizeof (ip6_address_t)); + } + + if (stream_session_accept (&child0->connection, lc0->c_s_index, sst, + 0 /* notify */ )) + { + error0 = TCP_ERROR_CREATE_SESSION_FAIL; + goto drop; + } + + tcp_options_parse (th0, &child0->opt); + tcp_connection_init_vars (child0); + + child0->irs = vnet_buffer (b0)->tcp.seq_number; + child0->rcv_nxt = vnet_buffer (b0)->tcp.seq_number + 1; + child0->state = TCP_STATE_SYN_RCVD; + + /* RFC1323: TSval timestamps sent on {SYN} and {SYN,ACK} + * segments are used to initialize PAWS. */ + if (tcp_opts_tstamp (&child0->opt)) + { + child0->tsval_recent = child0->opt.tsval; + child0->tsval_recent_age = tcp_time_now (); + } + + /* Reuse buffer to make syn-ack and send */ + tcp_make_synack (child0, b0); + next0 = tcp_next_output (is_ip4); + + drop: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + b0->error = error0 ? node->errors[error0] : 0; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static uword +tcp4_listen (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_listen_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_listen (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_listen_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_listen_node) = +{ + .function = tcp4_listen, + .name = "tcp4-listen", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_LISTEN_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_LISTEN_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_listen_node, tcp4_listen); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_listen_node) = +{ + .function = tcp6_listen, + .name = "tcp6-listen", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_LISTEN_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_LISTEN_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_listen_node, tcp6_listen); + +vlib_node_registration_t tcp4_input_node; +vlib_node_registration_t tcp6_input_node; + +typedef enum _tcp_input_next +{ + TCP_INPUT_NEXT_DROP, + TCP_INPUT_NEXT_LISTEN, + TCP_INPUT_NEXT_RCV_PROCESS, + TCP_INPUT_NEXT_SYN_SENT, + TCP_INPUT_NEXT_ESTABLISHED, + TCP_INPUT_NEXT_RESET, + TCP_INPUT_N_NEXT +} tcp_input_next_t; + +#define foreach_tcp4_input_next \ + _ (DROP, "error-drop") \ + _ (LISTEN, "tcp4-listen") \ + _ (RCV_PROCESS, "tcp4-rcv-process") \ + _ (SYN_SENT, "tcp4-syn-sent") \ + _ (ESTABLISHED, "tcp4-established") \ + _ (RESET, "tcp4-reset") + +#define foreach_tcp6_input_next \ + _ (DROP, "error-drop") \ + _ (LISTEN, "tcp6-listen") \ + _ (RCV_PROCESS, "tcp6-rcv-process") \ + _ (SYN_SENT, "tcp6-syn-sent") \ + _ (ESTABLISHED, "tcp6-established") \ + _ (RESET, "tcp6-reset") + +typedef struct +{ + u16 src_port; + u16 dst_port; + u8 state; +} tcp_rx_trace_t; + +const char *tcp_fsm_states[] = { +#define _(sym, str) str, + foreach_tcp_fsm_state +#undef _ +}; + +u8 * +format_tcp_state (u8 * s, va_list * args) +{ + tcp_state_t *state = va_arg (*args, tcp_state_t *); + + if (state[0] < TCP_N_STATES) + s = format (s, "%s", tcp_fsm_states[state[0]]); + else + s = format (s, "UNKNOWN"); + + return s; +} + +u8 * +format_tcp_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *); + + s = format (s, "TCP: src-port %d dst-port %U%s\n", + clib_net_to_host_u16 (t->src_port), + clib_net_to_host_u16 (t->dst_port), format_tcp_state, t->state); + + return s; +} + +#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN) + +always_inline uword +tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index; + tcp_main_t *tm = vnet_get_tcp_main (); + session_manager_main_t *ssm = vnet_get_session_manager_main (); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_header_t *tcp0 = 0; + tcp_connection_t *tc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 error0 = TCP_ERROR_NO_LISTENER, next0 = TCP_INPUT_NEXT_DROP; + u8 flags0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + tcp0 = ip4_next_header (ip40); + + /* lookup session */ + tc0 = + (tcp_connection_t *) stream_session_lookup_transport4 (ssm, + &ip40->dst_address, + &ip40->src_address, + tcp0->dst_port, + tcp0->src_port, + SESSION_TYPE_IP4_TCP, + my_thread_index); + } + else + { + ip60 = vlib_buffer_get_current (b0); + tcp0 = ip6_next_header (ip60); + tc0 = + (tcp_connection_t *) stream_session_lookup_transport6 (ssm, + &ip60->src_address, + &ip60->dst_address, + tcp0->src_port, + tcp0->dst_port, + SESSION_TYPE_IP6_TCP, + my_thread_index); + } + + /* Session exists */ + if (PREDICT_TRUE (0 != tc0)) + { + /* Save connection index */ + vnet_buffer (b0)->tcp.connection_index = tc0->c_c_index; + vnet_buffer (b0)->tcp.seq_number = + clib_net_to_host_u32 (tcp0->seq_number); + vnet_buffer (b0)->tcp.ack_number = + clib_net_to_host_u32 (tcp0->ack_number); + + flags0 = tcp0->flags & filter_flags; + next0 = tm->dispatch_table[tc0->state][flags0].next; + error0 = tm->dispatch_table[tc0->state][flags0].error; + + if (PREDICT_FALSE (error0 == TCP_ERROR_DISPATCH)) + { + /* Overload tcp flags to store state */ + vnet_buffer (b0)->tcp.flags = tc0->state; + } + } + else + { + /* Send reset */ + next0 = TCP_INPUT_NEXT_RESET; + error0 = TCP_ERROR_NO_LISTENER; + vnet_buffer (b0)->tcp.flags = 0; + } + + b0->error = error0 ? node->errors[error0] : 0; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static uword +tcp4_input (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_input (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_input_node) = +{ + .function = tcp4_input, + .name = "tcp4-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_INPUT_NEXT_##s] = n, + foreach_tcp4_input_next +#undef _ + }, + .format_buffer = format_tcp_header, + .format_trace = format_tcp_rx_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_input_node, tcp4_input); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_input_node) = +{ + .function = tcp6_input, + .name = "tcp6-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_INPUT_NEXT_##s] = n, + foreach_tcp6_input_next +#undef _ + }, + .format_buffer = format_tcp_header, + .format_trace = format_tcp_rx_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_input_node, tcp6_input); +void +tcp_update_time (f64 now, u32 thread_index) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + tw_timer_expire_timers_16t_2w_512sl (&tm->timer_wheels[thread_index], now); +} + +static void +tcp_dispatch_table_init (tcp_main_t * tm) +{ + int i, j; + for (i = 0; i < ARRAY_LEN (tm->dispatch_table); i++) + for (j = 0; j < ARRAY_LEN (tm->dispatch_table[i]); j++) + { + tm->dispatch_table[i][j].next = TCP_INPUT_NEXT_DROP; + tm->dispatch_table[i][j].error = TCP_ERROR_DISPATCH; + } + +#define _(t,f,n,e) \ +do { \ + tm->dispatch_table[TCP_STATE_##t][f].next = (n); \ + tm->dispatch_table[TCP_STATE_##t][f].error = (e); \ +} while (0) + + /* SYNs for new connections -> tcp-listen. */ + _(LISTEN, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE); + /* ACK for for a SYN-ACK -> tcp-rcv-process. */ + _(SYN_RCVD, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + /* SYN-ACK for a SYN */ + _(SYN_SENT, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, + TCP_ERROR_NONE); + _(SYN_SENT, TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE); + _(SYN_SENT, TCP_FLAG_RST, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE); + _(SYN_SENT, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, + TCP_ERROR_NONE); + /* ACK for for established connection -> tcp-established. */ + _(ESTABLISHED, TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE); + /* FIN for for established connection -> tcp-established. */ + _(ESTABLISHED, TCP_FLAG_FIN, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE); + _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED, + TCP_ERROR_NONE); + /* ACK or FIN-ACK to our FIN */ + _(FIN_WAIT_1, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(FIN_WAIT_1, TCP_FLAG_ACK | TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, + TCP_ERROR_NONE); + /* FIN in reply to our FIN from the other side */ + _(FIN_WAIT_1, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + /* FIN confirming that the peer (app) has closed */ + _(FIN_WAIT_2, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(FIN_WAIT_2, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, + TCP_ERROR_NONE); + _(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); +#undef _ +} + +clib_error_t * +tcp_input_init (vlib_main_t * vm) +{ + clib_error_t *error = 0; + tcp_main_t *tm = vnet_get_tcp_main (); + + if ((error = vlib_call_init_function (vm, tcp_init))) + return error; + + /* Initialize dispatch table. */ + tcp_dispatch_table_init (tm); + + return error; +} + +VLIB_INIT_FUNCTION (tcp_input_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c new file mode 100644 index 00000000..856dffe4 --- /dev/null +++ b/src/vnet/tcp/tcp_newreno.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +void +newreno_congestion (tcp_connection_t * tc) +{ + tc->prev_ssthresh = tc->ssthresh; + tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss); +} + +void +newreno_recovered (tcp_connection_t * tc) +{ + tc->cwnd = tc->ssthresh; +} + +void +newreno_rcv_ack (tcp_connection_t * tc) +{ + if (tcp_in_slowstart (tc)) + { + tc->cwnd += clib_min (tc->snd_mss, tc->bytes_acked); + } + else + { + /* Round up to 1 if needed */ + tc->cwnd += clib_max (tc->snd_mss * tc->snd_mss / tc->cwnd, 1); + } +} + +void +newreno_rcv_cong_ack (tcp_connection_t * tc, tcp_cc_ack_t ack_type) +{ + if (ack_type == TCP_CC_DUPACK) + { + tc->cwnd += tc->snd_mss; + } + else if (ack_type == TCP_CC_PARTIALACK) + { + tc->cwnd -= tc->bytes_acked; + if (tc->bytes_acked > tc->snd_mss) + tc->bytes_acked += tc->snd_mss; + } +} + +void +newreno_conn_init (tcp_connection_t * tc) +{ + tc->ssthresh = tc->snd_wnd; + tc->cwnd = tcp_initial_cwnd (tc); +} + +const static tcp_cc_algorithm_t tcp_newreno = { + .congestion = newreno_congestion, + .recovered = newreno_recovered, + .rcv_ack = newreno_rcv_ack, + .rcv_cong_ack = newreno_rcv_cong_ack, + .init = newreno_conn_init +}; + +clib_error_t * +newreno_init (vlib_main_t * vm) +{ + clib_error_t *error = 0; + + tcp_cc_algo_register (TCP_CC_NEWRENO, &tcp_newreno); + + return error; +} + +VLIB_INIT_FUNCTION (newreno_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c new file mode 100644 index 00000000..dbcf1f74 --- /dev/null +++ b/src/vnet/tcp/tcp_output.c @@ -0,0 +1,1412 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +vlib_node_registration_t tcp4_output_node; +vlib_node_registration_t tcp6_output_node; + +typedef enum _tcp_output_nect +{ + TCP_OUTPUT_NEXT_DROP, + TCP_OUTPUT_NEXT_IP_LOOKUP, + TCP_OUTPUT_N_NEXT +} tcp_output_next_t; + +#define foreach_tcp4_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip4-lookup") + +#define foreach_tcp6_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip6-lookup") + +static char *tcp_error_strings[] = { +#define tcp_error(n,s) s, +#include +#undef tcp_error +}; + +typedef struct +{ + u16 src_port; + u16 dst_port; + u8 state; +} tcp_tx_trace_t; + +u16 dummy_mtu = 400; + +u8 * +format_tcp_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + + s = format (s, "TBD\n"); + + return s; +} + +void +tcp_set_snd_mss (tcp_connection_t * tc) +{ + u16 snd_mss; + + /* TODO find our iface MTU */ + snd_mss = dummy_mtu; + + /* TODO cache mss and consider PMTU discovery */ + snd_mss = tc->opt.mss < snd_mss ? tc->opt.mss : snd_mss; + + tc->snd_mss = snd_mss; + + if (tc->snd_mss == 0) + { + clib_warning ("snd mss is 0"); + tc->snd_mss = dummy_mtu; + } +} + +static u8 +tcp_window_compute_scale (u32 available_space) +{ + u8 wnd_scale = 0; + while (wnd_scale < TCP_MAX_WND_SCALE + && (available_space >> wnd_scale) > TCP_WND_MAX) + wnd_scale++; + return wnd_scale; +} + +/** + * Compute initial window and scale factor. As per RFC1323, window field in + * SYN and SYN-ACK segments is never scaled. + */ +u32 +tcp_initial_window_to_advertise (tcp_connection_t * tc) +{ + u32 available_space; + + /* Initial wnd for SYN. Fifos are not allocated yet. + * Use some predefined value */ + if (tc->state != TCP_STATE_SYN_RCVD) + { + return TCP_DEFAULT_RX_FIFO_SIZE; + } + + available_space = stream_session_max_enqueue (&tc->connection); + tc->rcv_wscale = tcp_window_compute_scale (available_space); + tc->rcv_wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale); + + return clib_min (tc->rcv_wnd, TCP_WND_MAX); +} + +/** + * Compute and return window to advertise, scaled as per RFC1323 + */ +u32 +tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state) +{ + u32 available_space, wnd, scaled_space; + + if (state != TCP_STATE_ESTABLISHED) + return tcp_initial_window_to_advertise (tc); + + available_space = stream_session_max_enqueue (&tc->connection); + scaled_space = available_space >> tc->rcv_wscale; + + /* Need to update scale */ + if (PREDICT_FALSE ((scaled_space == 0 && available_space != 0)) + || (scaled_space >= TCP_WND_MAX)) + tc->rcv_wscale = tcp_window_compute_scale (available_space); + + wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale); + tc->rcv_wnd = wnd; + + return wnd >> tc->rcv_wscale; +} + +/** + * Write TCP options to segment. + */ +u32 +tcp_options_write (u8 * data, tcp_options_t * opts) +{ + u32 opts_len = 0; + u32 buf, seq_len = 4; + + if (tcp_opts_mss (opts)) + { + *data++ = TCP_OPTION_MSS; + *data++ = TCP_OPTION_LEN_MSS; + buf = clib_host_to_net_u16 (opts->mss); + clib_memcpy (data, &buf, sizeof (opts->mss)); + data += sizeof (opts->mss); + opts_len += TCP_OPTION_LEN_MSS; + } + + if (tcp_opts_wscale (opts)) + { + *data++ = TCP_OPTION_WINDOW_SCALE; + *data++ = TCP_OPTION_LEN_WINDOW_SCALE; + *data++ = opts->wscale; + opts_len += TCP_OPTION_LEN_WINDOW_SCALE; + } + + if (tcp_opts_sack_permitted (opts)) + { + *data++ = TCP_OPTION_SACK_PERMITTED; + *data++ = TCP_OPTION_LEN_SACK_PERMITTED; + opts_len += TCP_OPTION_LEN_SACK_PERMITTED; + } + + if (tcp_opts_tstamp (opts)) + { + *data++ = TCP_OPTION_TIMESTAMP; + *data++ = TCP_OPTION_LEN_TIMESTAMP; + buf = clib_host_to_net_u32 (opts->tsval); + clib_memcpy (data, &buf, sizeof (opts->tsval)); + data += sizeof (opts->tsval); + buf = clib_host_to_net_u32 (opts->tsecr); + clib_memcpy (data, &buf, sizeof (opts->tsecr)); + data += sizeof (opts->tsecr); + opts_len += TCP_OPTION_LEN_TIMESTAMP; + } + + if (tcp_opts_sack (opts)) + { + int i; + u32 n_sack_blocks = clib_min (vec_len (opts->sacks), + TCP_OPTS_MAX_SACK_BLOCKS); + + if (n_sack_blocks != 0) + { + *data++ = TCP_OPTION_SACK_BLOCK; + *data++ = 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK; + for (i = 0; i < n_sack_blocks; i++) + { + buf = clib_host_to_net_u32 (opts->sacks[i].start); + clib_memcpy (data, &buf, seq_len); + data += seq_len; + buf = clib_host_to_net_u32 (opts->sacks[i].end); + clib_memcpy (data, &buf, seq_len); + data += seq_len; + } + opts_len += 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK; + } + } + + /* Terminate TCP options */ + if (opts_len % 4) + { + *data++ = TCP_OPTION_EOL; + opts_len += TCP_OPTION_LEN_EOL; + } + + /* Pad with zeroes to a u32 boundary */ + while (opts_len % 4) + { + *data++ = TCP_OPTION_NOOP; + opts_len += TCP_OPTION_LEN_NOOP; + } + return opts_len; +} + +always_inline int +tcp_make_syn_options (tcp_options_t * opts, u32 initial_wnd) +{ + u8 len = 0; + + opts->flags |= TCP_OPTS_FLAG_MSS; + opts->mss = dummy_mtu; /*XXX discover that */ + len += TCP_OPTION_LEN_MSS; + + opts->flags |= TCP_OPTS_FLAG_WSCALE; + opts->wscale = tcp_window_compute_scale (initial_wnd); + len += TCP_OPTION_LEN_WINDOW_SCALE; + + opts->flags |= TCP_OPTS_FLAG_TSTAMP; + opts->tsval = tcp_time_now (); + opts->tsecr = 0; + len += TCP_OPTION_LEN_TIMESTAMP; + + opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED; + len += TCP_OPTION_LEN_SACK_PERMITTED; + + /* Align to needed boundary */ + len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN; + return len; +} + +always_inline int +tcp_make_synack_options (tcp_connection_t * tc, tcp_options_t * opts) +{ + u8 len = 0; + + opts->flags |= TCP_OPTS_FLAG_MSS; + opts->mss = dummy_mtu; /*XXX discover that */ + len += TCP_OPTION_LEN_MSS; + + if (tcp_opts_wscale (&tc->opt)) + { + opts->flags |= TCP_OPTS_FLAG_WSCALE; + opts->wscale = tc->rcv_wscale; + len += TCP_OPTION_LEN_WINDOW_SCALE; + } + + if (tcp_opts_tstamp (&tc->opt)) + { + opts->flags |= TCP_OPTS_FLAG_TSTAMP; + opts->tsval = tcp_time_now (); + opts->tsecr = tc->tsval_recent; + len += TCP_OPTION_LEN_TIMESTAMP; + } + + if (tcp_opts_sack_permitted (&tc->opt)) + { + opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED; + len += TCP_OPTION_LEN_SACK_PERMITTED; + } + + /* Align to needed boundary */ + len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN; + return len; +} + +always_inline int +tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts) +{ + u8 len = 0; + + opts->flags = 0; + + if (tcp_opts_tstamp (&tc->opt)) + { + opts->flags |= TCP_OPTS_FLAG_TSTAMP; + opts->tsval = tcp_time_now (); + opts->tsecr = tc->tsval_recent; + len += TCP_OPTION_LEN_TIMESTAMP; + } + if (tcp_opts_sack_permitted (&tc->opt)) + { + if (vec_len (tc->snd_sacks)) + { + opts->flags |= TCP_OPTS_FLAG_SACK; + opts->sacks = tc->snd_sacks; + opts->n_sack_blocks = vec_len (tc->snd_sacks); + len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks; + } + } + + /* Align to needed boundary */ + len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN; + return len; +} + +always_inline int +tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts, + tcp_state_t state) +{ + switch (state) + { + case TCP_STATE_ESTABLISHED: + case TCP_STATE_FIN_WAIT_1: + return tcp_make_established_options (tc, opts); + case TCP_STATE_SYN_RCVD: + return tcp_make_synack_options (tc, opts); + case TCP_STATE_SYN_SENT: + return tcp_make_syn_options (opts, + tcp_initial_window_to_advertise (tc)); + default: + clib_warning ("Not handled!"); + return 0; + } +} + +#define tcp_get_free_buffer_index(tm, bidx) \ +do { \ + u32 *my_tx_buffers, n_free_buffers; \ + u32 cpu_index = tm->vlib_main->cpu_index; \ + my_tx_buffers = tm->tx_buffers[cpu_index]; \ + if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0)) \ + { \ + n_free_buffers = 32; /* TODO config or macro */ \ + vec_validate (my_tx_buffers, n_free_buffers - 1); \ + _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list ( \ + tm->vlib_main, my_tx_buffers, n_free_buffers, \ + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); \ + tm->tx_buffers[cpu_index] = my_tx_buffers; \ + } \ + /* buffer shortage */ \ + if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0)) \ + return; \ + *bidx = my_tx_buffers[_vec_len (my_tx_buffers)-1]; \ + _vec_len (my_tx_buffers) -= 1; \ +} while (0) + +always_inline void +tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) +{ + vlib_buffer_t *it = b; + do + { + it->current_data = 0; + it->current_length = 0; + it->total_length_not_including_first_buffer = 0; + } + while ((it->flags & VLIB_BUFFER_NEXT_PRESENT) + && (it = vlib_get_buffer (vm, it->next_buffer))); + + /* Leave enough space for headers */ + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); +} + +/** + * Prepare ACK + */ +void +tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state, + u8 flags) +{ + tcp_options_t _snd_opts, *snd_opts = &_snd_opts; + u8 tcp_opts_len, tcp_hdr_opts_len; + tcp_header_t *th; + u16 wnd; + + wnd = tcp_window_to_advertise (tc, state); + + /* Make and write options */ + tcp_opts_len = tcp_make_established_options (tc, snd_opts); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt, + tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd); + + tcp_options_write ((u8 *) (th + 1), snd_opts); + + /* Mark as ACK */ + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; +} + +/** + * Convert buffer to ACK + */ +void +tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + + tcp_reuse_buffer (vm, b); + tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK); + vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK; +} + +/** + * Convert buffer to FIN-ACK + */ +void +tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + + tcp_reuse_buffer (vm, b); + tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK | TCP_FLAG_FIN); + + /* Reset flags, make sure ack is sent */ + tc->flags = TCP_CONN_SNDACK; + vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK; + + tc->snd_nxt += 1; +} + +/** + * Convert buffer to SYN-ACK + */ +void +tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + tcp_options_t _snd_opts, *snd_opts = &_snd_opts; + u8 tcp_opts_len, tcp_hdr_opts_len; + tcp_header_t *th; + u16 initial_wnd; + u32 time_now; + + memset (snd_opts, 0, sizeof (*snd_opts)); + + tcp_reuse_buffer (vm, b); + + /* Set random initial sequence */ + time_now = tcp_time_now (); + + tc->iss = random_u32 (&time_now); + tc->snd_una = tc->iss; + tc->snd_nxt = tc->iss + 1; + tc->snd_una_max = tc->snd_nxt; + + initial_wnd = tcp_initial_window_to_advertise (tc); + + /* Make and write options */ + tcp_opts_len = tcp_make_synack_options (tc, snd_opts); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss, + tc->rcv_nxt, tcp_hdr_opts_len, + TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd); + + tcp_options_write ((u8 *) (th + 1), snd_opts); + + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; + vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK; + + /* Init retransmit timer */ + tcp_retransmit_timer_set (tm, tc); +} + +always_inline void +tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, + u8 is_ip4) +{ + u32 *to_next, next_index; + vlib_frame_t *f; + + b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b->error = 0; + + /* Default FIB for now */ + vnet_buffer (b)->sw_if_index[VLIB_TX] = 0; + + /* Send to IP lookup */ + next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index; + f = vlib_get_frame_to_node (vm, next_index); + + /* Enqueue the packet */ + to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, next_index, f); +} + +int +tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0, + tcp_state_t state, u32 my_thread_index, u8 is_ip4) +{ + u8 tcp_hdr_len = sizeof (tcp_header_t); + ip4_header_t *ih4; + ip6_header_t *ih6; + tcp_header_t *th0; + ip4_address_t src_ip40; + ip6_address_t src_ip60; + u16 src_port0; + u32 tmp; + + /* Find IP and TCP headers */ + if (is_ip4) + { + ih4 = vlib_buffer_get_current (b0); + th0 = ip4_next_header (ih4); + } + else + { + ih6 = vlib_buffer_get_current (b0); + th0 = ip6_next_header (ih6); + } + + /* Swap src and dst ip */ + if (is_ip4) + { + ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40); + src_ip40.as_u32 = ih4->src_address.as_u32; + ih4->src_address.as_u32 = ih4->dst_address.as_u32; + ih4->dst_address.as_u32 = src_ip40.as_u32; + + /* Chop the end of the pkt */ + b0->current_length += ip4_header_bytes (ih4) + tcp_hdr_len; + } + else + { + ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60); + clib_memcpy (&src_ip60, &ih6->src_address, sizeof (ip6_address_t)); + clib_memcpy (&ih6->src_address, &ih6->dst_address, + sizeof (ip6_address_t)); + clib_memcpy (&ih6->dst_address, &src_ip60, sizeof (ip6_address_t)); + + /* Chop the end of the pkt */ + b0->current_length += sizeof (ip6_header_t) + tcp_hdr_len; + } + + /* Try to determine what/why we're actually resetting and swap + * src and dst ports */ + if (state == TCP_STATE_CLOSED) + { + if (!tcp_syn (th0)) + return -1; + + tmp = clib_net_to_host_u32 (th0->seq_number); + + /* Got a SYN for no listener. */ + th0->flags = TCP_FLAG_RST | TCP_FLAG_ACK; + th0->ack_number = clib_host_to_net_u32 (tmp + 1); + th0->seq_number = 0; + + } + else if (state >= TCP_STATE_SYN_SENT) + { + th0->flags = TCP_FLAG_RST | TCP_FLAG_ACK; + th0->seq_number = th0->ack_number; + th0->ack_number = 0; + } + + src_port0 = th0->src_port; + th0->src_port = th0->dst_port; + th0->dst_port = src_port0; + th0->window = 0; + th0->data_offset_and_reserved = (tcp_hdr_len >> 2) << 4; + th0->urgent_pointer = 0; + + /* Compute checksum */ + if (is_ip4) + { + th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih4); + } + else + { + int bogus = ~0; + th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih6, &bogus); + ASSERT (!bogus); + } + + return 0; +} + +/** + * Send reset without reusing existing buffer + */ +void +tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4) +{ + vlib_buffer_t *b; + u32 bi; + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u8 tcp_hdr_len, flags = 0; + tcp_header_t *th, *pkt_th; + u32 seq, ack; + ip4_header_t *ih4, *pkt_ih4; + ip6_header_t *ih6, *pkt_ih6; + + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + /* Leave enough space for headers */ + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + + /* Make and write options */ + tcp_hdr_len = sizeof (tcp_header_t); + + if (is_ip4) + { + pkt_ih4 = vlib_buffer_get_current (pkt); + pkt_th = ip4_next_header (pkt_ih4); + } + else + { + pkt_ih6 = vlib_buffer_get_current (pkt); + pkt_th = ip6_next_header (pkt_ih6); + } + + if (tcp_ack (pkt_th)) + { + flags = TCP_FLAG_RST; + seq = pkt_th->ack_number; + ack = 0; + } + else + { + flags = TCP_FLAG_RST | TCP_FLAG_ACK; + seq = 0; + ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end); + } + + th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port, + seq, ack, tcp_hdr_len, flags, 0); + + /* Swap src and dst ip */ + if (is_ip4) + { + ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40); + ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address, + &pkt_ih4->src_address, IP_PROTOCOL_TCP); + th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4); + } + else + { + int bogus = ~0; + pkt_ih6 = (ip6_header_t *) (pkt_th - 1); + ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) == + 0x60); + ih6 = + vlib_buffer_push_ip6 (vm, b, &pkt_ih6->dst_address, + &pkt_ih6->src_address, IP_PROTOCOL_TCP); + th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus); + ASSERT (!bogus); + } + + tcp_enqueue_to_ip_lookup (vm, b, bi, is_ip4); +} + +void +tcp_push_ip_hdr (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b) +{ + tcp_header_t *th = vlib_buffer_get_current (b); + + if (tc->c_is_ip4) + { + ip4_header_t *ih; + ih = vlib_buffer_push_ip4 (tm->vlib_main, b, &tc->c_lcl_ip4, + &tc->c_rmt_ip4, IP_PROTOCOL_TCP); + th->checksum = ip4_tcp_udp_compute_checksum (tm->vlib_main, b, ih); + } + else + { + ip6_header_t *ih; + int bogus = ~0; + + ih = vlib_buffer_push_ip6 (tm->vlib_main, b, &tc->c_lcl_ip6, + &tc->c_rmt_ip6, IP_PROTOCOL_TCP); + th->checksum = ip6_tcp_udp_icmp_compute_checksum (tm->vlib_main, b, ih, + &bogus); + ASSERT (!bogus); + } +} + +/** + * Send SYN + * + * Builds a SYN packet for a half-open connection and sends it to ipx_lookup. + * The packet is not forwarded through tcpx_output to avoid doing lookups + * in the half_open pool. + */ +void +tcp_send_syn (tcp_connection_t * tc) +{ + vlib_buffer_t *b; + u32 bi; + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u8 tcp_hdr_opts_len, tcp_opts_len; + tcp_header_t *th; + u32 time_now; + u16 initial_wnd; + tcp_options_t snd_opts; + + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + /* Leave enough space for headers */ + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + + /* Set random initial sequence */ + time_now = tcp_time_now (); + + tc->iss = random_u32 (&time_now); + tc->snd_una = tc->iss; + tc->snd_una_max = tc->snd_nxt = tc->iss + 1; + + initial_wnd = tcp_initial_window_to_advertise (tc); + + /* Make and write options */ + memset (&snd_opts, 0, sizeof (snd_opts)); + tcp_opts_len = tcp_make_syn_options (&snd_opts, initial_wnd); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss, + tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN, + initial_wnd); + + tcp_options_write ((u8 *) (th + 1), &snd_opts); + + /* Measure RTT with this */ + tc->rtt_ts = tcp_time_now (); + tc->rtt_seq = tc->snd_nxt; + + /* Start retransmit trimer */ + tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN, tc->rto * TCP_TO_TIMER_TICK); + tc->rto_boff = 0; + + /* Set the connection establishment timer */ + tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME); + + tcp_push_ip_hdr (tm, tc, b); + tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4); +} + +always_inline void +tcp_enqueue_to_output (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, u8 is_ip4) +{ + u32 *to_next, next_index; + vlib_frame_t *f; + + b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b->error = 0; + + /* Decide where to send the packet */ + next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index; + f = vlib_get_frame_to_node (vm, next_index); + + /* Enqueue the packet */ + to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, next_index, f); +} + +/** + * Send FIN + */ +void +tcp_send_fin (tcp_connection_t * tc) +{ + vlib_buffer_t *b; + u32 bi; + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + /* Leave enough space for headers */ + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + + tcp_make_finack (tc, b); + + tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); +} + +always_inline u8 +tcp_make_state_flags (tcp_state_t next_state) +{ + switch (next_state) + { + case TCP_STATE_ESTABLISHED: + return TCP_FLAG_ACK; + case TCP_STATE_SYN_RCVD: + return TCP_FLAG_SYN | TCP_FLAG_ACK; + case TCP_STATE_SYN_SENT: + return TCP_FLAG_SYN; + case TCP_STATE_LAST_ACK: + case TCP_STATE_FIN_WAIT_1: + return TCP_FLAG_FIN; + default: + clib_warning ("Shouldn't be here!"); + } + return 0; +} + +/** + * Push TCP header and update connection variables + */ +static void +tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, + tcp_state_t next_state) +{ + u32 advertise_wnd, data_len; + u8 tcp_opts_len, tcp_hdr_opts_len, opts_write_len, flags; + tcp_options_t _snd_opts, *snd_opts = &_snd_opts; + tcp_header_t *th; + + data_len = b->current_length; + vnet_buffer (b)->tcp.flags = 0; + + /* Make and write options */ + memset (snd_opts, 0, sizeof (*snd_opts)); + tcp_opts_len = tcp_make_options (tc, snd_opts, next_state); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + /* Get rcv window to advertise */ + advertise_wnd = tcp_window_to_advertise (tc, next_state); + flags = tcp_make_state_flags (next_state); + + /* Push header and options */ + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt, + tc->rcv_nxt, tcp_hdr_opts_len, flags, + advertise_wnd); + + opts_write_len = tcp_options_write ((u8 *) (th + 1), snd_opts); + + ASSERT (opts_write_len == tcp_opts_len); + + /* Tag the buffer with the connection index */ + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; + + tc->snd_nxt += data_len; +} + +/* Send delayed ACK when timer expires */ +void +tcp_timer_delack_handler (u32 index) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u32 thread_index = os_get_cpu_number (); + tcp_connection_t *tc; + vlib_buffer_t *b; + u32 bi; + + tc = tcp_connection_get (index, thread_index); + + /* Get buffer */ + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + /* Fill in the ACK */ + tcp_make_ack (tc, b); + + tc->timers[TCP_TIMER_DELACK] = TCP_TIMER_HANDLE_INVALID; + tc->flags &= ~TCP_CONN_DELACK; + + tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); +} + +/** Build a retransmit segment + * + * @return the number of bytes in the segment or 0 if there's nothing to + * retransmit + * */ +u32 +tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b, + u32 max_bytes) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u32 n_bytes, offset = 0; + sack_scoreboard_hole_t *hole; + u32 hole_size; + + tcp_reuse_buffer (vm, b); + + ASSERT (tc->state == TCP_STATE_ESTABLISHED); + ASSERT (max_bytes != 0); + + if (tcp_opts_sack_permitted (&tc->opt)) + { + /* XXX get first hole not retransmitted yet */ + hole = scoreboard_first_hole (&tc->sack_sb); + if (!hole) + return 0; + + offset = hole->start - tc->snd_una; + hole_size = hole->end - hole->start; + + ASSERT (hole_size); + + if (hole_size < max_bytes) + max_bytes = hole_size; + } + else + { + if (seq_geq (tc->snd_nxt, tc->snd_una_max)) + return 0; + } + + n_bytes = stream_session_peek_bytes (&tc->connection, + vlib_buffer_get_current (b), offset, + max_bytes); + ASSERT (n_bytes != 0); + + tc->snd_nxt += n_bytes; + tcp_push_hdr_i (tc, b, tc->state); + + return n_bytes; +} + +static void +tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u32 thread_index = os_get_cpu_number (); + tcp_connection_t *tc; + vlib_buffer_t *b; + u32 bi, max_bytes, snd_space; + + if (is_syn) + { + tc = tcp_half_open_connection_get (index); + } + else + { + tc = tcp_connection_get (index, thread_index); + } + + /* Make sure timer handle is set to invalid */ + tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID; + + /* Increment RTO backoff (also equal to number of retries) */ + tc->rto_boff += 1; + + /* Go back to first un-acked byte */ + tc->snd_nxt = tc->snd_una; + + /* Get buffer */ + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + if (tc->state == TCP_STATE_ESTABLISHED) + { + tcp_fastrecovery_off (tc); + + /* Exponential backoff */ + tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); + + /* Figure out what and how many bytes we can send */ + snd_space = tcp_available_snd_space (tc); + max_bytes = clib_min (tc->snd_mss, snd_space); + tcp_prepare_retransmit_segment (tc, b, max_bytes); + + tc->rtx_bytes += max_bytes; + + /* No fancy recovery for now! */ + scoreboard_clear (&tc->sack_sb); + } + else + { + /* Retransmit for SYN/SYNACK */ + ASSERT (tc->state == TCP_STATE_SYN_RCVD + || tc->state == TCP_STATE_SYN_SENT); + + /* Try without increasing RTO a number of times. If this fails, + * start growing RTO exponentially */ + if (tc->rto_boff > TCP_RTO_SYN_RETRIES) + tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); + + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + tcp_push_hdr_i (tc, b, tc->state); + } + + if (!is_syn) + { + tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); + + /* Re-enable retransmit timer */ + tcp_retransmit_timer_set (tm, tc); + } + else + { + ASSERT (tc->state == TCP_STATE_SYN_SENT); + + /* This goes straight to ipx_lookup */ + tcp_push_ip_hdr (tm, tc, b); + tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4); + + /* Re-enable retransmit timer */ + tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN, + tc->rto * TCP_TO_TIMER_TICK); + } +} + +void +tcp_timer_retransmit_handler (u32 index) +{ + tcp_timer_retransmit_handler_i (index, 0); +} + +void +tcp_timer_retransmit_syn_handler (u32 index) +{ + tcp_timer_retransmit_handler_i (index, 1); +} + +/** + * Retansmit first unacked segment */ +void +tcp_retransmit_first_unacked (tcp_connection_t * tc) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 snd_nxt = tc->snd_nxt; + vlib_buffer_t *b; + u32 bi; + + tc->snd_nxt = tc->snd_una; + + /* Get buffer */ + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (tm->vlib_main, bi); + + tcp_prepare_retransmit_segment (tc, b, tc->snd_mss); + tcp_enqueue_to_output (tm->vlib_main, b, bi, tc->c_is_ip4); + + tc->snd_nxt = snd_nxt; + tc->rtx_bytes += tc->snd_mss; +} + +void +tcp_fast_retransmit (tcp_connection_t * tc) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 snd_space, max_bytes, n_bytes, bi; + vlib_buffer_t *b; + + ASSERT (tcp_in_fastrecovery (tc)); + + clib_warning ("fast retransmit!"); + + /* Start resending from first un-acked segment */ + tc->snd_nxt = tc->snd_una; + + snd_space = tcp_available_snd_space (tc); + + while (snd_space) + { + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (tm->vlib_main, bi); + + max_bytes = clib_min (tc->snd_mss, snd_space); + n_bytes = tcp_prepare_retransmit_segment (tc, b, max_bytes); + + /* Nothing left to retransmit */ + if (n_bytes == 0) + return; + + tcp_enqueue_to_output (tm->vlib_main, b, bi, tc->c_is_ip4); + + snd_space -= n_bytes; + } + + /* If window allows, send new data */ + tc->snd_nxt = tc->snd_una_max; +} + +always_inline u32 +tcp_session_has_ooo_data (tcp_connection_t * tc) +{ + stream_session_t *s = + stream_session_get (tc->c_s_index, tc->c_thread_index); + return svm_fifo_has_ooo_data (s->server_rx_fifo); +} + +always_inline uword +tcp46_output_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_connection_t *tc0; + tcp_header_t *th0; + u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, + my_thread_index); + th0 = vlib_buffer_get_current (b0); + + if (is_ip4) + { + ip4_header_t *ih0; + ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, + &tc0->c_rmt_ip4, IP_PROTOCOL_TCP); + th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih0); + } + else + { + ip6_header_t *ih0; + int bogus = ~0; + + ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6, + &tc0->c_rmt_ip6, IP_PROTOCOL_TCP); + th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih0, + &bogus); + ASSERT (!bogus); + } + + /* Filter out DUPACKs if there are no OOO segments left */ + if (PREDICT_FALSE + (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK)) + { + tc0->snt_dupacks--; + ASSERT (tc0->snt_dupacks >= 0); + if (!tcp_session_has_ooo_data (tc0)) + { + error0 = TCP_ERROR_FILTERED_DUPACKS; + next0 = TCP_OUTPUT_NEXT_DROP; + goto done; + } + } + + /* Retransmitted SYNs do reach this but it should be harmless */ + tc0->rcv_las = tc0->rcv_nxt; + + /* Stop DELACK timer and fix flags */ + tc0->flags &= + ~(TCP_CONN_SNDACK | TCP_CONN_DELACK | TCP_CONN_BURSTACK); + if (tcp_timer_is_active (tc0, TCP_TIMER_DELACK)) + { + tcp_timer_reset (tc0, TCP_TIMER_DELACK); + } + + /* If not retransmitting + * 1) update snd_una_max (SYN, SYNACK, new data, FIN) + * 2) If we're not tracking an ACK, start tracking */ + if (seq_lt (tc0->snd_una_max, tc0->snd_nxt)) + { + tc0->snd_una_max = tc0->snd_nxt; + if (tc0->rtt_ts == 0) + { + tc0->rtt_ts = tcp_time_now (); + tc0->rtt_seq = tc0->snd_nxt; + } + } + + /* Set the retransmit timer if not set already and not + * doing a pure ACK */ + if (!tcp_timer_is_active (tc0, TCP_TIMER_RETRANSMIT) + && tc0->snd_nxt != tc0->snd_una) + { + tcp_retransmit_timer_set (tm, tc0); + tc0->rto_boff = 0; + } + + /* set fib index to default and lookup node */ + /* XXX network virtualization (vrf/vni) */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + + done: + b0->error = error0 != 0 ? node->errors[error0] : 0; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static uword +tcp4_output (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_output (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +VLIB_REGISTER_NODE (tcp4_output_node) = +{ + .function = tcp4_output,.name = "tcp4-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32),.n_errors = TCP_N_ERROR,.error_strings = + tcp_error_strings,.n_next_nodes = TCP_OUTPUT_N_NEXT,.next_nodes = + { +#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, + foreach_tcp4_output_next +#undef _ + } +,.format_buffer = format_tcp_header,.format_trace = format_tcp_tx_trace,}; + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_output_node, tcp4_output) +VLIB_REGISTER_NODE (tcp6_output_node) = +{ + .function = tcp6_output,.name = "tcp6-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32),.n_errors = TCP_N_ERROR,.error_strings = + tcp_error_strings,.n_next_nodes = TCP_OUTPUT_N_NEXT,.next_nodes = + { +#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, + foreach_tcp6_output_next +#undef _ + } +,.format_buffer = format_tcp_header,.format_trace = format_tcp_tx_trace,}; + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_output_node, tcp6_output) u32 +tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +{ + tcp_connection_t *tc; + + tc = (tcp_connection_t *) tconn; + tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED); + return 0; +} + +typedef enum _tcp_reset_next +{ + TCP_RESET_NEXT_DROP, + TCP_RESET_NEXT_IP_LOOKUP, + TCP_RESET_N_NEXT +} tcp_reset_next_t; + +#define foreach_tcp4_reset_next \ + _(DROP, "error-drop") \ + _(IP_LOOKUP, "ip4-lookup") + +#define foreach_tcp6_reset_next \ + _(DROP, "error-drop") \ + _(IP_LOOKUP, "ip6-lookup") + +static uword +tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, u8 is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (tcp_make_reset_in_place (vm, b0, vnet_buffer (b0)->tcp.flags, + my_thread_index, is_ip4)) + { + error0 = TCP_ERROR_LOOKUP_DROPS; + next0 = TCP_RESET_NEXT_DROP; + goto done; + } + + /* Prepare to send to IP lookup */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0; + next0 = TCP_RESET_NEXT_IP_LOOKUP; + + done: + b0->error = error0 != 0 ? node->errors[error0] : 0; + b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static uword +tcp4_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_send_reset_inline (vm, node, from_frame, 1); +} + +static uword +tcp6_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_send_reset_inline (vm, node, from_frame, 0); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_reset_node) = { + .function = tcp4_send_reset, + .name = "tcp4-reset", + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_RESET_N_NEXT, + .next_nodes = { +#define _(s,n) [TCP_RESET_NEXT_##s] = n, + foreach_tcp4_reset_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_reset_node) = { + .function = tcp6_send_reset, + .name = "tcp6-reset", + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_RESET_N_NEXT, + .next_nodes = { +#define _(s,n) [TCP_RESET_NEXT_##s] = n, + foreach_tcp6_reset_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_packet.h b/src/vnet/tcp/tcp_packet.h new file mode 100644 index 00000000..866c5fd6 --- /dev/null +++ b/src/vnet/tcp/tcp_packet.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_tcp_packet_h +#define included_tcp_packet_h + +#include + +/* TCP flags bit 0 first. */ +#define foreach_tcp_flag \ + _ (FIN) /**< No more data from sender. */ \ + _ (SYN) /**< Synchronize sequence numbers. */ \ + _ (RST) /**< Reset the connection. */ \ + _ (PSH) /**< Push function. */ \ + _ (ACK) /**< Ack field significant. */ \ + _ (URG) /**< Urgent pointer field significant. */ \ + _ (ECE) /**< ECN-echo. Receiver got CE packet */ \ + _ (CWR) /**< Sender reduced congestion window */ + +enum +{ +#define _(f) TCP_FLAG_BIT_##f, + foreach_tcp_flag +#undef _ + TCP_N_FLAG_BITS, +}; + +enum +{ +#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f, + foreach_tcp_flag +#undef _ +}; + +typedef struct _tcp_header +{ + union + { + struct + { + u16 src_port; /**< Source port. */ + u16 dst_port; /**< Destination port. */ + }; + struct + { + u16 src, dst; + }; + }; + + u32 seq_number; /**< Sequence number of the first data octet in this + * segment, except when SYN is present. If SYN + * is present the seq number is is the ISN and the + * first data octet is ISN+1 */ + u32 ack_number; /**< Acknowledgement number if ACK is set. It contains + * the value of the next sequence number the sender + * of the segment is expecting to receive. */ + u8 data_offset_and_reserved; + u8 flags; /**< Flags: see the macro above */ + u16 window; /**< Number of bytes sender is willing to receive. */ + + u16 checksum; /**< Checksum of TCP pseudo header and data. */ + u16 urgent_pointer; /**< Seq number of the byte after the urgent data. */ +} __attribute__ ((packed)) tcp_header_t; + +/* Flag tests that return 0 or !0 */ +#define tcp_doff(_th) ((_th)->data_offset_and_reserved >> 4) +#define tcp_fin(_th) ((_th)->flags & TCP_FLAG_FIN) +#define tcp_syn(_th) ((_th)->flags & TCP_FLAG_SYN) +#define tcp_rst(_th) ((_th)->flags & TCP_FLAG_RST) +#define tcp_psh(_th) ((_th)->flags & TCP_FLAG_PSH) +#define tcp_ack(_th) ((_th)->flags & TCP_FLAG_ACK) +#define tcp_urg(_th) ((_th)->flags & TCP_FLAG_URG) +#define tcp_ece(_th) ((_th)->flags & TCP_FLAG_ECE) +#define tcp_cwr(_th) ((_th)->flags & TCP_FLAG_CWR) + +/* Flag tests that return 0 or 1 */ +#define tcp_is_syn(_th) !!((_th)->flags & TCP_FLAG_SYN) +#define tcp_is_fin(_th) !!((_th)->flags & TCP_FLAG_FIN) + +always_inline int +tcp_header_bytes (tcp_header_t * t) +{ + return tcp_doff (t) * sizeof (u32); +} + +/* + * TCP options. + */ + +typedef enum tcp_option_type +{ + TCP_OPTION_EOL = 0, /**< End of options. */ + TCP_OPTION_NOOP = 1, /**< No operation. */ + TCP_OPTION_MSS = 2, /**< Limit MSS. */ + TCP_OPTION_WINDOW_SCALE = 3, /**< Window scale. */ + TCP_OPTION_SACK_PERMITTED = 4, /**< Selective Ack permitted. */ + TCP_OPTION_SACK_BLOCK = 5, /**< Selective Ack block. */ + TCP_OPTION_TIMESTAMP = 8, /**< Timestamps. */ + TCP_OPTION_UTO = 28, /**< User timeout. */ + TCP_OPTION_AO = 29, /**< Authentication Option. */ +} tcp_option_type_t; + +#define foreach_tcp_options_flag \ + _ (MSS) /**< MSS advertised in SYN */ \ + _ (TSTAMP) /**< Timestamp capability advertised in SYN */ \ + _ (WSCALE) /**< Wnd scale capability advertised in SYN */ \ + _ (SACK_PERMITTED) /**< SACK capability advertised in SYN */ \ + _ (SACK) /**< SACK present */ + +enum +{ +#define _(f) TCP_OPTS_FLAG_BIT_##f, + foreach_tcp_options_flag +#undef _ + TCP_OPTIONS_N_FLAG_BITS, +}; + +enum +{ +#define _(f) TCP_OPTS_FLAG_##f = 1 << TCP_OPTS_FLAG_BIT_##f, + foreach_tcp_options_flag +#undef _ +}; + +typedef struct _sack_block +{ + u32 start; /**< Start sequence number */ + u32 end; /**< End sequence number */ +} sack_block_t; + +typedef struct +{ + u8 flags; /** Option flags, see above */ + + /* Received options */ + u16 mss; /**< Maximum segment size advertised by peer */ + u8 wscale; /**< Window scale advertised by peer */ + u32 tsval; /**< Peer's timestamp value */ + u32 tsecr; /**< Echoed/reflected time stamp */ + sack_block_t *sacks; /**< SACK blocks received */ + u8 n_sack_blocks; /**< Number of SACKs blocks */ +} tcp_options_t; + +/* Flag tests that return 0 or !0 */ +#define tcp_opts_mss(_to) ((_to)->flags & TCP_OPTS_FLAG_MSS) +#define tcp_opts_tstamp(_to) ((_to)->flags & TCP_OPTS_FLAG_TSTAMP) +#define tcp_opts_wscale(_to) ((_to)->flags & TCP_OPTS_FLAG_WSCALE) +#define tcp_opts_sack(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK) +#define tcp_opts_sack_permitted(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK_PERMITTED) + +/* TCP option lengths */ +#define TCP_OPTION_LEN_EOL 1 +#define TCP_OPTION_LEN_NOOP 1 +#define TCP_OPTION_LEN_MSS 4 +#define TCP_OPTION_LEN_WINDOW_SCALE 3 +#define TCP_OPTION_LEN_SACK_PERMITTED 2 +#define TCP_OPTION_LEN_TIMESTAMP 10 +#define TCP_OPTION_LEN_SACK_BLOCK 8 + +#define TCP_WND_MAX 65535U +#define TCP_MAX_WND_SCALE 14 /* See RFC 1323 */ +#define TCP_OPTS_ALIGN 4 +#define TCP_OPTS_MAX_SACK_BLOCKS 3 +#endif /* included_tcp_packet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_pg.c b/src/vnet/tcp/tcp_pg.c new file mode 100644 index 00000000..dc324049 --- /dev/null +++ b/src/vnet/tcp/tcp_pg.c @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/tcp_pg: TCP packet-generator interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +/* TCP flags bit 0 first. */ +#define foreach_tcp_flag \ + _ (FIN) \ + _ (SYN) \ + _ (RST) \ + _ (PSH) \ + _ (ACK) \ + _ (URG) \ + _ (ECE) \ + _ (CWR) + +static void +tcp_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, + u32 n_packets) +{ + vlib_main_t * vm = vlib_get_main(); + u32 ip_offset, tcp_offset; + + tcp_offset = g->start_byte_offset; + ip_offset = (g-1)->start_byte_offset; + + while (n_packets >= 1) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + tcp_header_t * tcp0; + ip_csum_t sum0; + u32 tcp_len0; + + p0 = vlib_get_buffer (vm, packets[0]); + n_packets -= 1; + packets += 1; + + ASSERT (p0->current_data == 0); + ip0 = (void *) (p0->data + ip_offset); + tcp0 = (void *) (p0->data + tcp_offset); + tcp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]); + + /* Initialize checksum with header. */ + if (BITS (sum0) == 32) + { + sum0 = clib_mem_unaligned (&ip0->src_address, u32); + sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32)); + } + else + sum0 = clib_mem_unaligned (&ip0->src_address, u64); + + sum0 = ip_csum_with_carry + (sum0, clib_host_to_net_u32 (tcp_len0 + (ip0->protocol << 16))); + + /* Invalidate possibly old checksum. */ + tcp0->checksum = 0; + + sum0 = ip_incremental_checksum_buffer (vm, p0, tcp_offset, tcp_len0, sum0); + + tcp0->checksum = ~ ip_csum_fold (sum0); + } +} + +typedef struct { + pg_edit_t src, dst; + pg_edit_t seq_number, ack_number; + pg_edit_t data_offset_and_reserved; +#define _(f) pg_edit_t f##_flag; + foreach_tcp_flag +#undef _ + pg_edit_t window; + pg_edit_t checksum; + pg_edit_t urgent_pointer; +} pg_tcp_header_t; + +static inline void +pg_tcp_header_init (pg_tcp_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, tcp_header_t, f); + _ (src); + _ (dst); + _ (seq_number); + _ (ack_number); + _ (window); + _ (checksum); + _ (urgent_pointer); +#undef _ + + /* Initialize bit fields. */ +#define _(f) \ + pg_edit_init_bitfield (&p->f##_flag, tcp_header_t, \ + flags, \ + TCP_FLAG_BIT_##f, 1); + + foreach_tcp_flag +#undef _ + + pg_edit_init_bitfield (&p->data_offset_and_reserved, tcp_header_t, + data_offset_and_reserved, + 4, 4); +} + +uword +unformat_pg_tcp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_tcp_header_t * p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (tcp_header_t), + &group_index); + pg_tcp_header_init (p); + + /* Defaults. */ + pg_edit_set_fixed (&p->seq_number, 0); + pg_edit_set_fixed (&p->ack_number, 0); + + pg_edit_set_fixed (&p->data_offset_and_reserved, + sizeof (tcp_header_t) / sizeof (u32)); + + pg_edit_set_fixed (&p->window, 4096); + pg_edit_set_fixed (&p->urgent_pointer, 0); + +#define _(f) pg_edit_set_fixed (&p->f##_flag, 0); + foreach_tcp_flag +#undef _ + + p->checksum.type = PG_EDIT_UNSPECIFIED; + + if (! unformat (input, "TCP: %U -> %U", + unformat_pg_edit, + unformat_tcp_udp_port, &p->src, + unformat_pg_edit, + unformat_tcp_udp_port, &p->dst)) + goto error; + + /* Parse options. */ + while (1) + { + if (unformat (input, "window %U", + unformat_pg_edit, + unformat_pg_number, &p->window)) + ; + + else if (unformat (input, "checksum %U", + unformat_pg_edit, + unformat_pg_number, &p->checksum)) + ; + + /* Flags. */ +#define _(f) else if (unformat (input, #f)) pg_edit_set_fixed (&p->f##_flag, 1); + foreach_tcp_flag +#undef _ + + /* Can't parse input: try next protocol level. */ + else + break; + } + + { + ip_main_t * im = &ip_main; + u16 dst_port; + tcp_udp_port_info_t * pi; + + pi = 0; + if (p->dst.type == PG_EDIT_FIXED) + { + dst_port = pg_edit_get_value (&p->dst, PG_EDIT_LO); + pi = ip_get_tcp_udp_port_info (im, dst_port); + } + + if (pi && pi->unformat_pg_edit + && unformat_user (input, pi->unformat_pg_edit, s)) + ; + + else if (! unformat_user (input, unformat_pg_payload, s)) + goto error; + + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t * g = pg_stream_get_group (s, group_index); + g->edit_function = tcp_pg_edit_function; + g->edit_function_opaque = 0; + } + + return 1; + } + + error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + diff --git a/src/vnet/tcp/tcp_syn_filter4.c b/src/vnet/tcp/tcp_syn_filter4.c new file mode 100644 index 00000000..c7605a30 --- /dev/null +++ b/src/vnet/tcp/tcp_syn_filter4.c @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +typedef struct +{ + f64 next_reset; + f64 reset_interval; + u8 *syn_counts; +} syn_filter4_runtime_t; + +typedef struct +{ + u32 next_index; + int not_a_syn; + u8 filter_value; +} syn_filter4_trace_t; + +/* packet trace format function */ +static u8 * +format_syn_filter4_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + syn_filter4_trace_t *t = va_arg (*args, syn_filter4_trace_t *); + + s = format (s, "SYN_FILTER4: next index %d, %s", + t->next_index, t->not_a_syn ? "not a syn" : "syn"); + if (t->not_a_syn == 0) + s = format (s, ", filter value %d\n", t->filter_value); + else + s = format (s, "\n"); + return s; +} + +static vlib_node_registration_t syn_filter4_node; + +#define foreach_syn_filter_error \ +_(THROTTLED, "TCP SYN packet throttle drops") \ +_(OK, "TCP SYN packets passed") + +typedef enum +{ +#define _(sym,str) SYN_FILTER_ERROR_##sym, + foreach_syn_filter_error +#undef _ + SYN_FILTER_N_ERROR, +} syn_filter_error_t; + +static char *syn_filter4_error_strings[] = { +#define _(sym,string) string, + foreach_syn_filter_error +#undef _ +}; + +typedef enum +{ + SYN_FILTER_NEXT_DROP, + SYN_FILTER_N_NEXT, +} syn_filter_next_t; + +extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local; + +static uword +syn_filter4_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + syn_filter_next_t next_index; + u32 ok_syn_packets = 0; + vnet_feature_main_t *fm = &feature_main; + u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; + vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index]; + syn_filter4_runtime_t *rt = (syn_filter4_runtime_t *) node->runtime_data; + f64 now = vlib_time_now (vm); + /* Shut up spurious gcc warnings. */ + u8 *c0 = 0, *c1 = 0, *c2 = 0, *c3 = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (now > rt->next_reset) + { + memset (rt->syn_counts, 0, vec_len (rt->syn_counts)); + rt->next_reset = now + rt->reset_interval; + } + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + ip4_header_t *ip0, *ip1, *ip2, *ip3; + tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3; + u32 not_a_syn0 = 1, not_a_syn1 = 1, not_a_syn2 = 1, not_a_syn3 = 1; + u64 hash0, hash1, hash2, hash3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + vnet_get_config_data + (&cm->config_main, &b0->current_config_index, + &next0, 0 /* sizeof (c0[0]) */ ); + vnet_get_config_data + (&cm->config_main, &b1->current_config_index, + &next1, 0 /* sizeof (c0[0]) */ ); + vnet_get_config_data + (&cm->config_main, &b2->current_config_index, + &next2, 0 /* sizeof (c0[0]) */ ); + vnet_get_config_data + (&cm->config_main, &b3->current_config_index, + &next3, 0 /* sizeof (c0[0]) */ ); + + /* Not TCP? */ + ip0 = vlib_buffer_get_current (b0); + if (ip0->protocol != IP_PROTOCOL_TCP) + goto trace00; + + tcp0 = ip4_next_header (ip0); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp0->flags & 0x2))) + goto trace00; + + not_a_syn0 = 0; + hash0 = clib_xxhash ((u64) ip0->src_address.as_u32); + c0 = &rt->syn_counts[hash0 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c0 >= 0x80)) + { + next0 = SYN_FILTER_NEXT_DROP; + b0->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace00; + } + *c0 += 1; + ok_syn_packets++; + + trace00: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->not_a_syn = not_a_syn0; + t->next_index = next0; + t->filter_value = not_a_syn0 ? 0 : *c0; + } + + /* Not TCP? */ + ip1 = vlib_buffer_get_current (b1); + if (ip1->protocol != IP_PROTOCOL_TCP) + goto trace01; + + tcp1 = ip4_next_header (ip1); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp1->flags & 0x2))) + goto trace01; + + not_a_syn1 = 0; + hash1 = clib_xxhash ((u64) ip1->src_address.as_u32); + c1 = &rt->syn_counts[hash1 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c1 >= 0x80)) + { + next1 = SYN_FILTER_NEXT_DROP; + b1->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace01; + } + *c1 += 1; + ok_syn_packets++; + + trace01: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->not_a_syn = not_a_syn1; + t->next_index = next1; + t->filter_value = not_a_syn1 ? 0 : *c1; + } + + /* Not TCP? */ + ip2 = vlib_buffer_get_current (b2); + if (ip2->protocol != IP_PROTOCOL_TCP) + goto trace02; + + tcp2 = ip4_next_header (ip2); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp2->flags & 0x2))) + goto trace02; + + not_a_syn2 = 0; + hash2 = clib_xxhash ((u64) ip2->src_address.as_u32); + c2 = &rt->syn_counts[hash2 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c2 >= 0x80)) + { + next2 = SYN_FILTER_NEXT_DROP; + b2->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace02; + } + *c2 += 1; + ok_syn_packets++; + + trace02: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b2->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b2, sizeof (*t)); + t->not_a_syn = not_a_syn2; + t->next_index = next2; + t->filter_value = not_a_syn2 ? 0 : *c2; + } + + /* Not TCP? */ + ip3 = vlib_buffer_get_current (b3); + if (ip3->protocol != IP_PROTOCOL_TCP) + goto trace03; + + tcp3 = ip4_next_header (ip3); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp3->flags & 0x2))) + goto trace03; + + not_a_syn3 = 0; + hash3 = clib_xxhash ((u64) ip3->src_address.as_u32); + c3 = &rt->syn_counts[hash3 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c3 >= 0x80)) + { + next3 = SYN_FILTER_NEXT_DROP; + b3->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace03; + } + *c3 += 1; + ok_syn_packets++; + + trace03: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b3->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b3, sizeof (*t)); + t->not_a_syn = not_a_syn3; + t->next_index = next3; + t->filter_value = not_a_syn3 ? 0 : *c3; + } + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + ip4_header_t *ip0; + tcp_header_t *tcp0; + u32 not_a_syn0 = 1; + u32 hash0; + u8 *c0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + vnet_get_config_data + (&cm->config_main, &b0->current_config_index, + &next0, 0 /* sizeof (c0[0]) */ ); + + /* Not TCP? */ + ip0 = vlib_buffer_get_current (b0); + if (ip0->protocol != IP_PROTOCOL_TCP) + goto trace0; + + tcp0 = ip4_next_header (ip0); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp0->flags & 0x2))) + goto trace0; + + not_a_syn0 = 0; + hash0 = clib_xxhash ((u64) ip0->src_address.as_u32); + c0 = &rt->syn_counts[hash0 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c0 >= 0x80)) + { + next0 = SYN_FILTER_NEXT_DROP; + b0->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace0; + } + *c0 += 1; + ok_syn_packets++; + + trace0: + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->not_a_syn = not_a_syn0; + t->next_index = next0; + t->filter_value = not_a_syn0 ? 0 : *c0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, syn_filter4_node.index, + SYN_FILTER_ERROR_OK, ok_syn_packets); + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (syn_filter4_node, static) = +{ + .function = syn_filter4_node_fn, + .name = "syn-filter-4", + .vector_size = sizeof (u32), + .format_trace = format_syn_filter4_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .runtime_data_bytes = sizeof (syn_filter4_runtime_t), + .n_errors = ARRAY_LEN(syn_filter4_error_strings), + .error_strings = syn_filter4_error_strings, + + .n_next_nodes = SYN_FILTER_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SYN_FILTER_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (syn_filter4_node, syn_filter4_node_fn); + +/* *INDENT-OFF* */ +VNET_FEATURE_INIT (syn_filter_4, static) = +{ + .arc_name = "ip4-local", + .node_name = "syn-filter-4", + .runs_before = VNET_FEATURES("ip4-local-end-of-arc"), +}; +/* *INDENT-ON* */ + +int +syn_filter_enable_disable (u32 sw_if_index, int enable_disable) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *sw; + int rv = 0; + + /* Utterly wrong? */ + if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + /* Not a physical port? */ + sw = vnet_get_sw_interface (vnm, sw_if_index); + if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if (enable_disable) + { + vlib_main_t *vm = vlib_get_main (); + syn_filter4_runtime_t *rt; + + rt = vlib_node_get_runtime_data (vm, syn_filter4_node.index); + vec_validate (rt->syn_counts, 1023); + /* + * Given perfect disperson / optimal hashing results: + * Allow 128k (successful) syns/sec. 1024, buckets each of which + * absorb 128 syns before filtering. Reset table once a second. + * Reality bites, lets try resetting once every 100ms. + */ + rt->reset_interval = 0.1; /* reset interval in seconds */ + } + + rv = vnet_feature_enable_disable ("ip4-local", "syn-filter-4", + sw_if_index, enable_disable, 0, 0); + + return rv; +} + +static clib_error_t * +syn_filter_enable_disable_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + u32 sw_if_index = ~0; + int enable_disable = 1; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "disable")) + enable_disable = 0; + else if (unformat (input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "Please specify an interface..."); + + rv = syn_filter_enable_disable (sw_if_index, enable_disable); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return + (0, "Invalid interface, only works on physical ports"); + break; + + case VNET_API_ERROR_UNIMPLEMENTED: + return clib_error_return (0, + "Device driver doesn't support redirection"); + break; + + case VNET_API_ERROR_INVALID_VALUE: + return clib_error_return (0, "feature arc not found"); + + case VNET_API_ERROR_INVALID_VALUE_2: + return clib_error_return (0, "feature node not found"); + + default: + return clib_error_return (0, "syn_filter_enable_disable returned %d", + rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_content_command, static) = +{ + .path = "ip syn filter", + .short_help = "ip syn filter [disable]", + .function = syn_filter_enable_disable_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_timer.h b/src/vnet/tcp/tcp_timer.h new file mode 100644 index 00000000..fa25268c --- /dev/null +++ b/src/vnet/tcp/tcp_timer.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_tcp_timer_h__ +#define __included_tcp_timer_h__ + +#include +#include + +#endif /* __included_tcp_timer_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/builtin_server.c b/src/vnet/udp/builtin_server.c new file mode 100644 index 00000000..afa66ba4 --- /dev/null +++ b/src/vnet/udp/builtin_server.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** @file + udp builtin server +*/ + +#include +#include +#include + +/** per-worker built-in server copy buffers */ +u8 **copy_buffers; + +static int +builtin_session_create_callback (stream_session_t * s) +{ + /* Simple version: declare session ready-to-go... */ + s->session_state = SESSION_STATE_READY; + return 0; +} + +static void +builtin_session_disconnect_callback (stream_session_t * s) +{ + stream_session_disconnect (s); +} + +static int +builtin_server_rx_callback (stream_session_t * s) +{ + svm_fifo_t *rx_fifo, *tx_fifo; + u32 this_transfer; + int actual_transfer; + u8 *my_copy_buffer; + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + + my_copy_buffer = copy_buffers[s->thread_index]; + rx_fifo = s->server_rx_fifo; + tx_fifo = s->server_tx_fifo; + + this_transfer = svm_fifo_max_enqueue (tx_fifo) + < svm_fifo_max_dequeue (rx_fifo) ? + svm_fifo_max_enqueue (tx_fifo) : svm_fifo_max_dequeue (rx_fifo); + + vec_validate (my_copy_buffer, this_transfer - 1); + _vec_len (my_copy_buffer) = this_transfer; + + actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, 0, this_transfer, + my_copy_buffer); + ASSERT (actual_transfer == this_transfer); + actual_transfer = svm_fifo_enqueue_nowait (tx_fifo, 0, this_transfer, + my_copy_buffer); + + copy_buffers[s->thread_index] = my_copy_buffer; + + /* Fabricate TX event, send to ourselves */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = actual_transfer; + evt.event_id = 0; + q = session_manager_get_vpp_event_queue (s->thread_index); + unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* do wait for mutex */ ); + + return 0; +} + +/* *INDENT-OFF* */ +static session_cb_vft_t builtin_server = { + .session_accept_callback = builtin_session_create_callback, + .session_disconnect_callback = builtin_session_disconnect_callback, + .builtin_server_rx_callback = builtin_server_rx_callback +}; +/* *INDENT-ON* */ + +static int +bind_builtin_uri_server (u8 * uri) +{ + vnet_bind_args_t _a, *a = &_a; + char segment_name[128]; + u32 segment_name_length; + int rv; + u64 options[16]; + + segment_name_length = ARRAY_LEN (segment_name); + + memset (a, 0, sizeof (*a)); + memset (options, 0, sizeof (options)); + + a->uri = (char *) uri; + a->api_client_index = ~0; /* built-in server */ + a->segment_name = segment_name; + a->segment_name_length = segment_name_length; + a->session_cb_vft = &builtin_server; + + options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678; + options[SESSION_OPTIONS_SEGMENT_SIZE] = (2 << 30); /*$$$$ config / arg */ + a->options = options; + + rv = vnet_bind_uri (a); + + return rv; +} + +static int +unbind_builtin_uri_server (u8 * uri) +{ + int rv; + + rv = vnet_unbind_uri ((char *) uri, ~0 /* client_index */ ); + + return rv; +} + +static clib_error_t * +builtin_server_init (vlib_main_t * vm) +{ + vlib_thread_main_t *vtm = vlib_get_thread_main (); + u32 num_threads; + + num_threads = 1 /* main thread */ + vtm->n_threads; + + vec_validate (copy_buffers, num_threads - 1); + return 0; +} + +VLIB_INIT_FUNCTION (builtin_server_init); + +static clib_error_t * +builtin_uri_bind_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 *uri = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "uri %s", &uri)) + ; + else + break; + } + + if (uri == 0) + return clib_error_return (0, "uri to bind not specified..."); + + rv = bind_builtin_uri_server (uri); + + vec_free (uri); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return (0, "bind_uri_server returned %d", rv); + break; + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (builtin_uri_bind_command, static) = +{ + .path = "builtin uri bind", + .short_help = "builtin uri bind", + .function = builtin_uri_bind_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +builtin_uri_unbind_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 *uri = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "uri %s", &uri)) + ; + else + break; + } + + if (uri == 0) + return clib_error_return (0, "uri to unbind not specified..."); + + rv = unbind_builtin_uri_server (uri); + + vec_free (uri); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return (0, "unbind_uri_server returned %d", rv); + break; + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (builtin_uri_unbind_command, static) = +{ + .path = "builtin uri unbind", + .short_help = "builtin uri unbind", + .function = builtin_uri_unbind_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c new file mode 100644 index 00000000..9e740466 --- /dev/null +++ b/src/vnet/udp/udp.c @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** @file + udp state machine, etc. +*/ + +#include +#include +#include +#include + +udp_uri_main_t udp_uri_main; + +u32 +udp_session_bind_ip4 (vlib_main_t * vm, u32 session_index, + ip46_address_t * ip, u16 port_number_host_byte_order) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + udp_connection_t *listener; + + pool_get (um->udp_listeners, listener); + memset (listener, 0, sizeof (udp_connection_t)); + listener->c_lcl_port = clib_host_to_net_u16 (port_number_host_byte_order); + listener->c_lcl_ip4.as_u32 = ip->ip4.as_u32; + listener->c_proto = SESSION_TYPE_IP4_UDP; + udp_register_dst_port (um->vlib_main, port_number_host_byte_order, + udp4_uri_input_node.index, 1 /* is_ipv4 */ ); + return 0; +} + +u32 +udp_session_bind_ip6 (vlib_main_t * vm, u32 session_index, + ip46_address_t * ip, u16 port_number_host_byte_order) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + udp_connection_t *listener; + + pool_get (um->udp_listeners, listener); + listener->c_lcl_port = clib_host_to_net_u16 (port_number_host_byte_order); + clib_memcpy (&listener->c_lcl_ip6, &ip->ip6, sizeof (ip6_address_t)); + listener->c_proto = SESSION_TYPE_IP6_UDP; + udp_register_dst_port (um->vlib_main, port_number_host_byte_order, + udp4_uri_input_node.index, 0 /* is_ipv4 */ ); + return 0; +} + +u32 +udp_session_unbind_ip4 (vlib_main_t * vm, u32 listener_index) +{ + udp_connection_t *listener; + listener = udp_listener_get (listener_index); + + /* deregister the udp_local mapping */ + udp_unregister_dst_port (vm, listener->c_lcl_port, 1 /* is_ipv4 */ ); + return 0; +} + +u32 +udp_session_unbind_ip6 (vlib_main_t * vm, u32 listener_index) +{ + udp_connection_t *listener; + + listener = udp_listener_get (listener_index); + + /* deregister the udp_local mapping */ + udp_unregister_dst_port (vm, listener->c_lcl_port, 0 /* is_ipv4 */ ); + return 0; +} + +transport_connection_t * +udp_session_get_listener (u32 listener_index) +{ + udp_connection_t *us; + + us = udp_listener_get (listener_index); + return &us->connection; +} + +u32 +udp_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +{ + udp_connection_t *us; + u8 *data; + udp_header_t *udp; + + us = (udp_connection_t *) tconn; + + if (tconn->is_ip4) + { + ip4_header_t *ip; + + data = vlib_buffer_get_current (b); + udp = (udp_header_t *) (data - sizeof (udp_header_t)); + ip = (ip4_header_t *) ((u8 *) udp - sizeof (ip4_header_t)); + + /* Build packet header, swap rx key src + dst fields */ + ip->src_address.as_u32 = us->c_lcl_ip4.as_u32; + ip->dst_address.as_u32 = us->c_rmt_ip4.as_u32; + ip->ip_version_and_header_length = 0x45; + ip->ttl = 254; + ip->protocol = IP_PROTOCOL_UDP; + ip->length = clib_host_to_net_u16 (b->current_length + sizeof (*udp)); + ip->checksum = ip4_header_checksum (ip); + + udp->src_port = us->c_lcl_port; + udp->dst_port = us->c_rmt_port; + udp->length = clib_host_to_net_u16 (b->current_length); + udp->checksum = 0; + + b->current_length = sizeof (*ip) + sizeof (*udp); + return SESSION_QUEUE_NEXT_IP4_LOOKUP; + } + else + { + vlib_main_t *vm = vlib_get_main (); + ip6_header_t *ip; + u16 payload_length; + int bogus = ~0; + + data = vlib_buffer_get_current (b); + udp = (udp_header_t *) (data - sizeof (udp_header_t)); + ip = (ip6_header_t *) ((u8 *) udp - sizeof (ip6_header_t)); + + /* Build packet header, swap rx key src + dst fields */ + clib_memcpy (&ip->src_address, &us->c_lcl_ip6, sizeof (ip6_address_t)); + clib_memcpy (&ip->dst_address, &us->c_rmt_ip6, sizeof (ip6_address_t)); + + ip->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + + ip->hop_limit = 0xff; + ip->protocol = IP_PROTOCOL_UDP; + + payload_length = vlib_buffer_length_in_chain (vm, b); + payload_length -= sizeof (*ip); + + ip->payload_length = clib_host_to_net_u16 (payload_length); + + udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip, &bogus); + ASSERT (!bogus); + + udp->src_port = us->c_lcl_port; + udp->dst_port = us->c_rmt_port; + udp->length = clib_host_to_net_u16 (b->current_length); + udp->checksum = 0; + + b->current_length = sizeof (*ip) + sizeof (*udp); + + return SESSION_QUEUE_NEXT_IP6_LOOKUP; + } +} + +transport_connection_t * +udp_session_get (u32 connection_index, u32 my_thread_index) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + + udp_connection_t *us; + us = + pool_elt_at_index (um->udp_sessions[my_thread_index], connection_index); + return &us->connection; +} + +void +udp_session_close (u32 connection_index, u32 my_thread_index) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + pool_put_index (um->udp_sessions[my_thread_index], connection_index); +} + +u8 * +format_udp_session_ip4 (u8 * s, va_list * args) +{ + u32 uci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + udp_connection_t *u4; + + u4 = udp_connection_get (uci, thread_index); + + s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip4_address, + &u4->c_lcl_ip4, clib_net_to_host_u16 (u4->c_lcl_port), + format_ip4_address, &u4->c_rmt_ip4, + clib_net_to_host_u16 (u4->c_rmt_port)); + return s; +} + +u8 * +format_udp_session_ip6 (u8 * s, va_list * args) +{ + u32 uci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + udp_connection_t *tc = udp_connection_get (uci, thread_index); + s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_udp_listener_session_ip4 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + udp_connection_t *tc = udp_listener_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip4_address, + &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip4_address, &tc->c_rmt_ip4, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_udp_listener_session_ip6 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + udp_connection_t *tc = udp_listener_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u16 +udp_send_mss_uri (transport_connection_t * t) +{ + /* TODO figure out MTU of output interface */ + return 400; +} + +u32 +udp_send_space_uri (transport_connection_t * t) +{ + /* No constraint on TX window */ + return ~0; +} + +int +udp_open_connection (ip46_address_t * addr, u16 port) +{ + clib_warning ("Not implemented"); + return 0; +} + +/* *INDENT-OFF* */ +const static transport_proto_vft_t udp4_proto = { + .bind = udp_session_bind_ip4, + .open = udp_open_connection, + .unbind = udp_session_unbind_ip4, + .push_header = udp_push_header, + .get_connection = udp_session_get, + .get_listener = udp_session_get_listener, + .close = udp_session_close, + .send_mss = udp_send_mss_uri, + .send_space = udp_send_space_uri, + .format_connection = format_udp_session_ip4, + .format_listener = format_udp_listener_session_ip4 +}; + +const static transport_proto_vft_t udp6_proto = { + .bind = udp_session_bind_ip6, + .open = udp_open_connection, + .unbind = udp_session_unbind_ip6, + .push_header = udp_push_header, + .get_connection = udp_session_get, + .get_listener = udp_session_get_listener, + .close = udp_session_close, + .send_mss = udp_send_mss_uri, + .send_space = udp_send_space_uri, + .format_connection = format_udp_session_ip6, + .format_listener = format_udp_listener_session_ip6 +}; +/* *INDENT-ON* */ + +static clib_error_t * +udp_init (vlib_main_t * vm) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + ip_main_t *im = &ip_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 num_threads; + clib_error_t *error = 0; + ip_protocol_info_t *pi; + + um->vlib_main = vm; + um->vnet_main = vnet_get_main (); + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + if ((error = vlib_call_init_function (vm, ip4_lookup_init))) + return error; + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + /* + * Registrations + */ + + /* IP registration */ + pi = ip_get_protocol_info (im, IP_PROTOCOL_UDP); + if (pi == 0) + return clib_error_return (0, "UDP protocol info AWOL"); + pi->format_header = format_udp_header; + pi->unformat_pg_edit = unformat_pg_udp_header; + + + /* Register as transport with URI */ + session_register_transport (SESSION_TYPE_IP4_UDP, &udp4_proto); + session_register_transport (SESSION_TYPE_IP6_UDP, &udp6_proto); + + /* + * Initialize data structures + */ + + num_threads = 1 /* main thread */ + tm->n_threads; + vec_validate (um->udp_sessions, num_threads - 1); + + return error; +} + +VLIB_INIT_FUNCTION (udp_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h new file mode 100644 index 00000000..7ab26ce9 --- /dev/null +++ b/src/vnet/udp/udp.h @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_udp_h__ +#define __included_udp_h__ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +typedef struct +{ + transport_connection_t connection; /** must be first */ + + /** ersatz MTU to limit fifo pushes to test data size */ + u32 mtu; +} udp_connection_t; + +typedef struct _udp_uri_main +{ + /* Per-worker thread udp connection pools */ + udp_connection_t **udp_sessions; + udp_connection_t *udp_listeners; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + ip4_main_t *ip4_main; + ip6_main_t *ip6_main; +} udp_uri_main_t; + +extern udp_uri_main_t udp_uri_main; +extern vlib_node_registration_t udp4_uri_input_node; + +always_inline udp_uri_main_t * +vnet_get_udp_main () +{ + return &udp_uri_main; +} + +always_inline udp_connection_t * +udp_connection_get (u32 conn_index, u32 thread_index) +{ + return pool_elt_at_index (udp_uri_main.udp_sessions[thread_index], + conn_index); +} + +always_inline udp_connection_t * +udp_listener_get (u32 conn_index) +{ + return pool_elt_at_index (udp_uri_main.udp_listeners, conn_index); +} + +typedef enum +{ +#define udp_error(n,s) UDP_ERROR_##n, +#include +#undef udp_error + UDP_N_ERROR, +} udp_error_t; + +#define foreach_udp4_dst_port \ +_ (67, dhcp_to_server) \ +_ (68, dhcp_to_client) \ +_ (500, ikev2) \ +_ (3784, bfd4) \ +_ (3785, bfd_echo4) \ +_ (4341, lisp_gpe) \ +_ (4342, lisp_cp) \ +_ (4739, ipfix) \ +_ (4789, vxlan) \ +_ (4789, vxlan6) \ +_ (4790, vxlan_gpe) \ +_ (6633, vpath_3) + + +#define foreach_udp6_dst_port \ +_ (547, dhcpv6_to_server) \ +_ (546, dhcpv6_to_client) \ +_ (3784, bfd6) \ +_ (3785, bfd_echo6) \ +_ (4341, lisp_gpe6) \ +_ (4342, lisp_cp6) \ +_ (4790, vxlan6_gpe) \ +_ (6633, vpath6_3) + +typedef enum +{ +#define _(n,f) UDP_DST_PORT_##f = n, + foreach_udp4_dst_port foreach_udp6_dst_port +#undef _ +} udp_dst_port_t; + +typedef enum +{ +#define _(n,f) UDP6_DST_PORT_##f = n, + foreach_udp6_dst_port +#undef _ +} udp6_dst_port_t; + +typedef struct +{ + /* Name (a c string). */ + char *name; + + /* GRE protocol type in host byte order. */ + udp_dst_port_t dst_port; + + /* Node which handles this type. */ + u32 node_index; + + /* Next index for this type. */ + u32 next_index; +} udp_dst_port_info_t; + +typedef enum +{ + UDP_IP6 = 0, + UDP_IP4, /* the code is full of is_ip4... */ + N_UDP_AF, +} udp_af_t; + +typedef struct +{ + udp_dst_port_info_t *dst_port_infos[N_UDP_AF]; + + /* Hash tables mapping name/protocol to protocol info index. */ + uword *dst_port_info_by_name[N_UDP_AF]; + uword *dst_port_info_by_dst_port[N_UDP_AF]; + + /* convenience */ + vlib_main_t *vlib_main; +} udp_main_t; + +always_inline udp_dst_port_info_t * +udp_get_dst_port_info (udp_main_t * um, udp_dst_port_t dst_port, u8 is_ip4) +{ + uword *p = hash_get (um->dst_port_info_by_dst_port[is_ip4], dst_port); + return p ? vec_elt_at_index (um->dst_port_infos[is_ip4], p[0]) : 0; +} + +format_function_t format_udp_header; +format_function_t format_udp_rx_trace; + +unformat_function_t unformat_udp_header; + +void udp_register_dst_port (vlib_main_t * vm, + udp_dst_port_t dst_port, + u32 node_index, u8 is_ip4); + +void +udp_unregister_dst_port (vlib_main_t * vm, + udp_dst_port_t dst_port, u8 is_ip4); + +void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add); + +always_inline void +ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) +{ + u16 new_l0; + udp_header_t *udp0; + + if (is_ip4) + { + ip4_header_t *ip0; + ip_csum_t sum0; + u16 old_l0 = 0; + + ip0 = vlib_buffer_get_current (b0); + + /* fix the ing outer-IP checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */ ); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + udp0->length = new_l0; + } + else + { + ip6_header_t *ip0; + int bogus0; + + ip0 = vlib_buffer_get_current (b0); + + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + ip0->payload_length = new_l0; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + udp0->length = new_l0; + + udp0->checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); + ASSERT (bogus0 == 0); + + if (udp0->checksum == 0) + udp0->checksum = 0xffff; + } +} + +always_inline void +ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, + u8 is_ip4) +{ + vlib_buffer_advance (b0, -ec_len); + + if (is_ip4) + { + ip4_header_t *ip0; + + ip0 = vlib_buffer_get_current (b0); + + /* Apply the encap string. */ + clib_memcpy (ip0, ec0, ec_len); + ip_udp_fixup_one (vm, b0, 1); + } + else + { + ip6_header_t *ip0; + + ip0 = vlib_buffer_get_current (b0); + + /* Apply the encap string. */ + clib_memcpy (ip0, ec0, ec_len); + ip_udp_fixup_one (vm, b0, 0); + } +} + +always_inline void +ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, + u8 * ec0, u8 * ec1, word ec_len, u8 is_v4) +{ + u16 new_l0, new_l1; + udp_header_t *udp0, *udp1; + + ASSERT (_vec_len (ec0) == _vec_len (ec1)); + + vlib_buffer_advance (b0, -ec_len); + vlib_buffer_advance (b1, -ec_len); + + if (is_v4) + { + ip4_header_t *ip0, *ip1; + ip_csum_t sum0, sum1; + u16 old_l0 = 0, old_l1 = 0; + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* Apply the encap string */ + clib_memcpy (ip0, ec0, ec_len); + clib_memcpy (ip1, ec1, ec_len); + + /* fix the ing outer-IP checksum */ + sum0 = ip0->checksum; + sum1 = ip1->checksum; + + /* old_l0 always 0, see the rewrite setup */ + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */ ); + sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, + length /* changed member */ ); + + ip0->checksum = ip_csum_fold (sum0); + ip1->checksum = ip_csum_fold (sum1); + + ip0->length = new_l0; + ip1->length = new_l1; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + udp1 = (udp_header_t *) (ip1 + 1); + + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - + sizeof (*ip0)); + new_l1 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) - + sizeof (*ip1)); + udp0->length = new_l0; + udp1->length = new_l1; + } + else + { + ip6_header_t *ip0, *ip1; + int bogus0, bogus1; + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* Apply the encap string. */ + clib_memcpy (ip0, ec0, ec_len); + clib_memcpy (ip1, ec1, ec_len); + + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) + - sizeof (*ip1)); + ip0->payload_length = new_l0; + ip1->payload_length = new_l1; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + udp1 = (udp_header_t *) (ip1 + 1); + + udp0->length = new_l0; + udp1->length = new_l1; + + udp0->checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); + udp1->checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, b1, ip1, &bogus1); + ASSERT (bogus0 == 0); + ASSERT (bogus1 == 0); + + if (udp0->checksum == 0) + udp0->checksum = 0xffff; + if (udp1->checksum == 0) + udp1->checksum = 0xffff; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif /* __included_udp_h__ */ diff --git a/src/vnet/udp/udp_error.def b/src/vnet/udp/udp_error.def new file mode 100644 index 00000000..bfdae0ac --- /dev/null +++ b/src/vnet/udp/udp_error.def @@ -0,0 +1,21 @@ +/* + * udp_error.def: udp errors + * + * Copyright (c) 2013-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +udp_error (NONE, "no error") +udp_error (NO_LISTENER, "no listener for dst port") +udp_error (LENGTH_ERROR, "UDP packets with length errors") +udp_error (PUNT, "no listener punt") diff --git a/src/vnet/udp/udp_format.c b/src/vnet/udp/udp_format.c new file mode 100644 index 00000000..abdf561e --- /dev/null +++ b/src/vnet/udp/udp_format.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/udp_format.c: udp formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +/* Format UDP header. */ +u8 * +format_udp_header (u8 * s, va_list * args) +{ + udp_header_t *udp = va_arg (*args, udp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + uword indent; + u32 header_bytes = sizeof (udp[0]); + + /* Nothing to do. */ + if (max_header_bytes < sizeof (udp[0])) + return format (s, "UDP header truncated"); + + indent = format_get_indent (s); + indent += 2; + + s = format (s, "UDP: %d -> %d", + clib_net_to_host_u16 (udp->src_port), + clib_net_to_host_u16 (udp->dst_port)); + + s = format (s, "\n%Ulength %d, checksum 0x%04x", + format_white_space, indent, + clib_net_to_host_u16 (udp->length), + clib_net_to_host_u16 (udp->checksum)); + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ip_main_t *im = &ip_main; + tcp_udp_port_info_t *pi; + + pi = ip_get_tcp_udp_port_info (im, udp->dst_port); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", + format_white_space, indent - 2, pi->format_header, + /* next protocol header */ (udp + 1), + max_header_bytes - sizeof (udp[0])); + } + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c new file mode 100644 index 00000000..4d509335 --- /dev/null +++ b/src/vnet/udp/udp_input.c @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include "../session/application_interface.h" + +vlib_node_registration_t udp4_uri_input_node; + +typedef struct +{ + u32 session; + u32 disposition; + u32 thread_index; +} udp4_uri_input_trace_t; + +/* packet trace format function */ +static u8 * +format_udp4_uri_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + udp4_uri_input_trace_t *t = va_arg (*args, udp4_uri_input_trace_t *); + + s = format (s, "UDP4_URI_INPUT: session %d, disposition %d, thread %d", + t->session, t->disposition, t->thread_index); + return s; +} + +typedef enum +{ + UDP4_URI_INPUT_NEXT_DROP, + UDP4_URI_INPUT_N_NEXT, +} udp4_uri_input_next_t; + +static char *udp4_uri_input_error_strings[] = { +#define _(sym,string) string, + foreach_session_input_error +#undef _ +}; + +static uword +udp4_uri_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + udp4_uri_input_next_t next_index; + udp_uri_main_t *um = vnet_get_udp_main (); + session_manager_main_t *smm = vnet_get_session_manager_main (); + u32 my_thread_index = vm->cpu_index; + u8 my_enqueue_epoch; + u32 *session_indices_to_enqueue; + static u32 serial_number; + int i; + + my_enqueue_epoch = ++smm->current_enqueue_epoch[my_thread_index]; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = UDP4_URI_INPUT_NEXT_DROP; + u32 error0 = SESSION_ERROR_ENQUEUED; + udp_header_t *udp0; + ip4_header_t *ip0; + stream_session_t *s0; + svm_fifo_t *f0; + u16 udp_len0; + u8 *data0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* udp_local hands us a pointer to the udp data */ + + data0 = vlib_buffer_get_current (b0); + udp0 = (udp_header_t *) (data0 - sizeof (*udp0)); + + /* $$$$ fixme: udp_local doesn't do ip options correctly anyhow */ + ip0 = (ip4_header_t *) (((u8 *) udp0) - sizeof (*ip0)); + s0 = 0; + + /* lookup session */ + s0 = stream_session_lookup4 (&ip0->dst_address, &ip0->src_address, + udp0->dst_port, udp0->src_port, + SESSION_TYPE_IP4_UDP, my_thread_index); + + /* no listener */ + if (PREDICT_FALSE (s0 == 0)) + { + error0 = SESSION_ERROR_NO_LISTENER; + goto trace0; + } + + f0 = s0->server_rx_fifo; + + /* established hit */ + if (PREDICT_TRUE (s0->session_state == SESSION_STATE_READY)) + { + udp_len0 = clib_net_to_host_u16 (udp0->length); + + if (PREDICT_FALSE (udp_len0 > svm_fifo_max_enqueue (f0))) + { + error0 = SESSION_ERROR_FIFO_FULL; + goto trace0; + } + + svm_fifo_enqueue_nowait (f0, 0 /* pid */ , + udp_len0 - sizeof (*udp0), + (u8 *) (udp0 + 1)); + + b0->error = node->errors[SESSION_ERROR_ENQUEUED]; + + /* We need to send an RX event on this fifo */ + if (s0->enqueue_epoch != my_enqueue_epoch) + { + s0->enqueue_epoch = my_enqueue_epoch; + + vec_add1 (smm->session_indices_to_enqueue_by_thread + [my_thread_index], + s0 - smm->sessions[my_thread_index]); + } + } + /* listener hit */ + else if (s0->session_state == SESSION_STATE_LISTENING) + { + udp_connection_t *us; + int rv; + + error0 = SESSION_ERROR_NOT_READY; + + /* + * create udp transport session + */ + pool_get (um->udp_sessions[my_thread_index], us); + + us->mtu = 1024; /* $$$$ policy */ + + us->c_lcl_ip4.as_u32 = ip0->dst_address.as_u32; + us->c_rmt_ip4.as_u32 = ip0->src_address.as_u32; + us->c_lcl_port = udp0->dst_port; + us->c_rmt_port = udp0->src_port; + us->c_proto = SESSION_TYPE_IP4_UDP; + us->c_c_index = us - um->udp_sessions[my_thread_index]; + + /* + * create stream session and attach the udp session to it + */ + rv = stream_session_accept (&us->connection, s0->session_index, + SESSION_TYPE_IP4_UDP, + 1 /*notify */ ); + if (rv) + error0 = rv; + + } + else + { + + error0 = SESSION_ERROR_NOT_READY; + goto trace0; + } + + trace0: + b0->error = node->errors[error0]; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + udp4_uri_input_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + + t->session = ~0; + if (s0) + t->session = s0 - smm->sessions[my_thread_index]; + t->disposition = error0; + t->thread_index = my_thread_index; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Send enqueue events */ + + session_indices_to_enqueue = + smm->session_indices_to_enqueue_by_thread[my_thread_index]; + + for (i = 0; i < vec_len (session_indices_to_enqueue); i++) + { + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + stream_session_t *s0; + application_t *server0; + + /* Get session */ + s0 = pool_elt_at_index (smm->sessions[my_thread_index], + session_indices_to_enqueue[i]); + + /* Get session's server */ + server0 = application_get (s0->app_index); + + /* Built-in server? Deliver the goods... */ + if (server0->cb_fns.builtin_server_rx_callback) + { + server0->cb_fns.builtin_server_rx_callback (s0); + continue; + } + + /* Fabricate event */ + evt.fifo = s0->server_rx_fifo; + evt.event_type = FIFO_EVENT_SERVER_RX; + evt.event_id = serial_number++; + evt.enqueue_length = svm_fifo_max_dequeue (s0->server_rx_fifo); + + /* Add event to server's event queue */ + q = server0->event_queue; + + /* Don't block for lack of space */ + if (PREDICT_TRUE (q->cursize < q->maxsize)) + unix_shared_memory_queue_add (server0->event_queue, (u8 *) & evt, + 0 /* do wait for mutex */ ); + else + { + vlib_node_increment_counter (vm, udp4_uri_input_node.index, + SESSION_ERROR_FIFO_FULL, 1); + } + if (1) + { + ELOG_TYPE_DECLARE (e) = + { + .format = "evt-enqueue: id %d length %d",.format_args = "i4i4",}; + struct + { + u32 data[2]; + } *ed; + ed = ELOG_DATA (&vlib_global_main.elog_main, e); + ed->data[0] = evt.event_id; + ed->data[1] = evt.enqueue_length; + } + } + + vec_reset_length (session_indices_to_enqueue); + + smm->session_indices_to_enqueue_by_thread[my_thread_index] = + session_indices_to_enqueue; + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (udp4_uri_input_node) = +{ + .function = udp4_uri_input_node_fn,.name = "udp4-uri-input",.vector_size = + sizeof (u32),.format_trace = format_udp4_uri_input_trace,.type = + VLIB_NODE_TYPE_INTERNAL,.n_errors = + ARRAY_LEN (udp4_uri_input_error_strings),.error_strings = + udp4_uri_input_error_strings,.n_next_nodes = UDP4_URI_INPUT_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = + { + [UDP4_URI_INPUT_NEXT_DROP] = "error-drop",} +,}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp_local.c b/src/vnet/udp/udp_local.c new file mode 100644 index 00000000..6b239f73 --- /dev/null +++ b/src/vnet/udp/udp_local.c @@ -0,0 +1,666 @@ +/* + * node.c: udp packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +udp_main_t udp_main; + +#define foreach_udp_input_next \ + _ (PUNT, "error-punt") \ + _ (DROP, "error-drop") \ + _ (ICMP4_ERROR, "ip4-icmp-error") \ + _ (ICMP6_ERROR, "ip6-icmp-error") + +typedef enum +{ +#define _(s,n) UDP_INPUT_NEXT_##s, + foreach_udp_input_next +#undef _ + UDP_INPUT_N_NEXT, +} udp_input_next_t; + +typedef struct +{ + u16 src_port; + u16 dst_port; + u8 bound; +} udp_rx_trace_t; + +u8 * +format_udp_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + udp_rx_trace_t *t = va_arg (*args, udp_rx_trace_t *); + + s = format (s, "UDP: src-port %d dst-port %d%s", + clib_net_to_host_u16 (t->src_port), + clib_net_to_host_u16 (t->dst_port), + t->bound ? "" : " (no listener)"); + return s; +} + +typedef struct +{ + /* Sparse vector mapping udp dst_port in network byte order + to next index. */ + u16 *next_by_dst_port; + u8 punt_unknown; +} udp_input_runtime_t; + +vlib_node_registration_t udp4_input_node; +vlib_node_registration_t udp6_input_node; + +always_inline uword +udp46_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + udp_input_runtime_t *rt = is_ip4 ? + (void *) vlib_node_get_runtime_data (vm, udp4_input_node.index) + : (void *) vlib_node_get_runtime_data (vm, udp6_input_node.index); + __attribute__ ((unused)) u32 n_left_from, next_index, *from, *to_next; + word n_no_listener = 0; + u8 punt_unknown = rt->punt_unknown; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + udp_header_t *h0 = 0, *h1 = 0; + u32 i0, i1, dst_port0, dst_port1; + u32 advance0, advance1; + u32 error0, next0, error1, next1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* ip4/6_local hands us the ip header, not the udp header */ + if (is_ip4) + { + advance0 = sizeof (ip4_header_t); + advance1 = sizeof (ip4_header_t); + } + else + { + advance0 = sizeof (ip6_header_t); + advance1 = sizeof (ip6_header_t); + } + + if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) + { + error0 = UDP_ERROR_LENGTH_ERROR; + next0 = UDP_INPUT_NEXT_DROP; + } + else + { + vlib_buffer_advance (b0, advance0); + h0 = vlib_buffer_get_current (b0); + error0 = next0 = 0; + if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) > + vlib_buffer_length_in_chain (vm, b0))) + { + error0 = UDP_ERROR_LENGTH_ERROR; + next0 = UDP_INPUT_NEXT_DROP; + } + } + + if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1))) + { + error1 = UDP_ERROR_LENGTH_ERROR; + next1 = UDP_INPUT_NEXT_DROP; + } + else + { + vlib_buffer_advance (b1, advance1); + h1 = vlib_buffer_get_current (b1); + error1 = next1 = 0; + if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) > + vlib_buffer_length_in_chain (vm, b1))) + { + error1 = UDP_ERROR_LENGTH_ERROR; + next1 = UDP_INPUT_NEXT_DROP; + } + } + + /* Index sparse array with network byte order. */ + dst_port0 = (error0 == 0) ? h0->dst_port : 0; + dst_port1 = (error1 == 0) ? h1->dst_port : 0; + sparse_vec_index2 (rt->next_by_dst_port, dst_port0, dst_port1, + &i0, &i1); + next0 = (error0 == 0) ? vec_elt (rt->next_by_dst_port, i0) : next0; + next1 = (error1 == 0) ? vec_elt (rt->next_by_dst_port, i1) : next1; + + if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX)) + { + // move the pointer back so icmp-error can find the + // ip packet header + vlib_buffer_advance (b0, -(word) advance0); + + if (PREDICT_FALSE (punt_unknown)) + { + b0->error = node->errors[UDP_ERROR_PUNT]; + next0 = UDP_INPUT_NEXT_PUNT; + } + else if (is_ip4) + { + icmp4_error_set_vnet_buffer (b0, + ICMP4_destination_unreachable, + ICMP4_destination_unreachable_port_unreachable, + 0); + next0 = UDP_INPUT_NEXT_ICMP4_ERROR; + n_no_listener++; + } + else + { + icmp6_error_set_vnet_buffer (b0, + ICMP6_destination_unreachable, + ICMP6_destination_unreachable_port_unreachable, + 0); + next0 = UDP_INPUT_NEXT_ICMP6_ERROR; + n_no_listener++; + } + } + else + { + b0->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b0, sizeof (*h0)); + } + + if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX)) + { + // move the pointer back so icmp-error can find the + // ip packet header + vlib_buffer_advance (b1, -(word) advance1); + + if (PREDICT_FALSE (punt_unknown)) + { + b1->error = node->errors[UDP_ERROR_PUNT]; + next1 = UDP_INPUT_NEXT_PUNT; + } + else if (is_ip4) + { + icmp4_error_set_vnet_buffer (b1, + ICMP4_destination_unreachable, + ICMP4_destination_unreachable_port_unreachable, + 0); + next1 = UDP_INPUT_NEXT_ICMP4_ERROR; + n_no_listener++; + } + else + { + icmp6_error_set_vnet_buffer (b1, + ICMP6_destination_unreachable, + ICMP6_destination_unreachable_port_unreachable, + 0); + next1 = UDP_INPUT_NEXT_ICMP6_ERROR; + n_no_listener++; + } + } + else + { + b1->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b1, sizeof (*h1)); + } + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) + { + tr->src_port = h0 ? h0->src_port : 0; + tr->dst_port = h0 ? h0->dst_port : 0; + tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR && + next0 != UDP_INPUT_NEXT_ICMP6_ERROR); + } + } + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_rx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + if (b1->error != node->errors[UDP_ERROR_LENGTH_ERROR]) + { + tr->src_port = h1 ? h1->src_port : 0; + tr->dst_port = h1 ? h1->dst_port : 0; + tr->bound = (next1 != UDP_INPUT_NEXT_ICMP4_ERROR && + next1 != UDP_INPUT_NEXT_ICMP6_ERROR); + } + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + udp_header_t *h0 = 0; + u32 i0, next0; + u32 advance0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* ip4/6_local hands us the ip header, not the udp header */ + if (is_ip4) + advance0 = sizeof (ip4_header_t); + else + advance0 = sizeof (ip6_header_t); + + if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) + { + b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; + next0 = UDP_INPUT_NEXT_DROP; + goto trace_x1; + } + + vlib_buffer_advance (b0, advance0); + + h0 = vlib_buffer_get_current (b0); + + if (PREDICT_TRUE (clib_net_to_host_u16 (h0->length) <= + vlib_buffer_length_in_chain (vm, b0))) + { + i0 = sparse_vec_index (rt->next_by_dst_port, h0->dst_port); + next0 = vec_elt (rt->next_by_dst_port, i0); + + if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX)) + { + // move the pointer back so icmp-error can find the + // ip packet header + vlib_buffer_advance (b0, -(word) advance0); + + if (PREDICT_FALSE (punt_unknown)) + { + b0->error = node->errors[UDP_ERROR_PUNT]; + next0 = UDP_INPUT_NEXT_PUNT; + } + else if (is_ip4) + { + icmp4_error_set_vnet_buffer (b0, + ICMP4_destination_unreachable, + ICMP4_destination_unreachable_port_unreachable, + 0); + next0 = UDP_INPUT_NEXT_ICMP4_ERROR; + n_no_listener++; + } + else + { + icmp6_error_set_vnet_buffer (b0, + ICMP6_destination_unreachable, + ICMP6_destination_unreachable_port_unreachable, + 0); + next0 = UDP_INPUT_NEXT_ICMP6_ERROR; + n_no_listener++; + } + } + else + { + b0->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b0, sizeof (*h0)); + } + } + else + { + b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; + next0 = UDP_INPUT_NEXT_DROP; + } + + trace_x1: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) + { + tr->src_port = h0->src_port; + tr->dst_port = h0->dst_port; + tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR && + next0 != UDP_INPUT_NEXT_ICMP6_ERROR); + } + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_error_count (vm, node->node_index, UDP_ERROR_NO_LISTENER, + n_no_listener); + return from_frame->n_vectors; +} + +static char *udp_error_strings[] = { +#define udp_error(n,s) s, +#include "udp_error.def" +#undef udp_error +}; + +static uword +udp4_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return udp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +udp6_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return udp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (udp4_input_node) = { + .function = udp4_input, + .name = "ip4-udp-lookup", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof (udp_input_runtime_t), + + .n_errors = UDP_N_ERROR, + .error_strings = udp_error_strings, + + .n_next_nodes = UDP_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [UDP_INPUT_NEXT_##s] = n, + foreach_udp_input_next +#undef _ + }, + + .format_buffer = format_udp_header, + .format_trace = format_udp_rx_trace, + .unformat_buffer = unformat_udp_header, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (udp4_input_node, udp4_input); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (udp6_input_node) = { + .function = udp6_input, + .name = "ip6-udp-lookup", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof (udp_input_runtime_t), + + .n_errors = UDP_N_ERROR, + .error_strings = udp_error_strings, + + .n_next_nodes = UDP_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [UDP_INPUT_NEXT_##s] = n, + foreach_udp_input_next +#undef _ + }, + + .format_buffer = format_udp_header, + .format_trace = format_udp_rx_trace, + .unformat_buffer = unformat_udp_header, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (udp6_input_node, udp6_input); + +static void +add_dst_port (udp_main_t * um, + udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4) +{ + udp_dst_port_info_t *pi; + u32 i; + + vec_add2 (um->dst_port_infos[is_ip4], pi, 1); + i = pi - um->dst_port_infos[is_ip4]; + + pi->name = dst_port_name; + pi->dst_port = dst_port; + pi->next_index = pi->node_index = ~0; + + hash_set (um->dst_port_info_by_dst_port[is_ip4], dst_port, i); + + if (pi->name) + hash_set_mem (um->dst_port_info_by_name[is_ip4], pi->name, i); +} + +void +udp_register_dst_port (vlib_main_t * vm, + udp_dst_port_t dst_port, u32 node_index, u8 is_ip4) +{ + udp_main_t *um = &udp_main; + udp_dst_port_info_t *pi; + udp_input_runtime_t *rt; + u16 *n; + + { + clib_error_t *error = vlib_call_init_function (vm, udp_local_init); + if (error) + clib_error_report (error); + } + + pi = udp_get_dst_port_info (um, dst_port, is_ip4); + if (!pi) + { + add_dst_port (um, dst_port, 0, is_ip4); + pi = udp_get_dst_port_info (um, dst_port, is_ip4); + ASSERT (pi); + } + + pi->node_index = node_index; + pi->next_index = vlib_node_add_next (vm, + is_ip4 ? udp4_input_node.index + : udp6_input_node.index, node_index); + + /* Setup udp protocol -> next index sparse vector mapping. */ + rt = vlib_node_get_runtime_data + (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); + n = sparse_vec_validate (rt->next_by_dst_port, + clib_host_to_net_u16 (dst_port)); + n[0] = pi->next_index; +} + +void +udp_unregister_dst_port (vlib_main_t * vm, udp_dst_port_t dst_port, u8 is_ip4) +{ + udp_main_t *um = &udp_main; + udp_dst_port_info_t *pi; + udp_input_runtime_t *rt; + u16 *n; + + pi = udp_get_dst_port_info (um, dst_port, is_ip4); + /* Not registered? Fagedaboudit */ + if (!pi) + return; + + /* Kill the mapping. Don't bother killing the pi, it may be back. */ + rt = vlib_node_get_runtime_data + (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); + n = sparse_vec_validate (rt->next_by_dst_port, + clib_host_to_net_u16 (dst_port)); + n[0] = SPARSE_VEC_INVALID_INDEX; +} + +void +udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add) +{ + udp_input_runtime_t *rt; + + { + clib_error_t *error = vlib_call_init_function (vm, udp_local_init); + if (error) + clib_error_report (error); + } + + rt = vlib_node_get_runtime_data + (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); + + rt->punt_unknown = is_add; +} + +/* Parse a UDP header. */ +uword +unformat_udp_header (unformat_input_t * input, va_list * args) +{ + u8 **result = va_arg (*args, u8 **); + udp_header_t *udp; + __attribute__ ((unused)) int old_length; + u16 src_port, dst_port; + + /* Allocate space for IP header. */ + { + void *p; + + old_length = vec_len (*result); + vec_add2 (*result, p, sizeof (ip4_header_t)); + udp = p; + } + + memset (udp, 0, sizeof (udp[0])); + if (unformat (input, "src-port %d dst-port %d", &src_port, &dst_port)) + { + udp->src_port = clib_host_to_net_u16 (src_port); + udp->dst_port = clib_host_to_net_u16 (dst_port); + return 1; + } + return 0; +} + +static void +udp_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t *n = vlib_get_node (vm, node_index); + pg_node_t *pn = pg_get_node (node_index); + + n->format_buffer = format_udp_header; + n->unformat_buffer = unformat_udp_header; + pn->unformat_edit = unformat_pg_udp_header; +} + +clib_error_t * +udp_local_init (vlib_main_t * vm) +{ + udp_input_runtime_t *rt; + udp_main_t *um = &udp_main; + int i; + + { + clib_error_t *error; + error = vlib_call_init_function (vm, udp_init); + if (error) + clib_error_report (error); + } + + + for (i = 0; i < 2; i++) + { + um->dst_port_info_by_name[i] = hash_create_string (0, sizeof (uword)); + um->dst_port_info_by_dst_port[i] = hash_create (0, sizeof (uword)); + } + + udp_setup_node (vm, udp4_input_node.index); + udp_setup_node (vm, udp6_input_node.index); + + rt = vlib_node_get_runtime_data (vm, udp4_input_node.index); + + rt->next_by_dst_port = sparse_vec_new + ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]), + /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); + + rt->punt_unknown = 0; + +#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */); + foreach_udp4_dst_port +#undef _ + rt = vlib_node_get_runtime_data (vm, udp6_input_node.index); + + rt->next_by_dst_port = sparse_vec_new + ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]), + /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); + + rt->punt_unknown = 0; + +#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */); + foreach_udp6_dst_port +#undef _ + ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index); + /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */ + return 0; +} + +VLIB_INIT_FUNCTION (udp_local_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp_packet.h b/src/vnet/udp/udp_packet.h new file mode 100644 index 00000000..beea3059 --- /dev/null +++ b/src/vnet/udp/udp_packet.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip4/udp_packet.h: UDP packet format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_udp_packet_h +#define included_udp_packet_h + +typedef struct +{ + /* Source and destination port. */ + u16 src_port, dst_port; + + /* Length of UDP header plus payload. */ + u16 length; + + /* Checksum of UDP pseudo-header and data or + zero if checksum is disabled. */ + u16 checksum; +} udp_header_t; + +#endif /* included_udp_packet_h */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp_pg.c b/src/vnet/udp/udp_pg.c new file mode 100644 index 00000000..c9d8d38c --- /dev/null +++ b/src/vnet/udp/udp_pg.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/udp_pg: UDP packet-generator interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include /* for unformat_udp_udp_port */ + +#define UDP_PG_EDIT_LENGTH (1 << 0) +#define UDP_PG_EDIT_CHECKSUM (1 << 1) + +always_inline void +udp_pg_edit_function_inline (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, u32 n_packets, u32 flags) +{ + vlib_main_t *vm = vlib_get_main (); + u32 ip_offset, udp_offset; + + udp_offset = g->start_byte_offset; + ip_offset = (g - 1)->start_byte_offset; + + while (n_packets >= 1) + { + vlib_buffer_t *p0; + ip4_header_t *ip0; + udp_header_t *udp0; + u32 udp_len0; + + p0 = vlib_get_buffer (vm, packets[0]); + n_packets -= 1; + packets += 1; + + ip0 = (void *) (p0->data + ip_offset); + udp0 = (void *) (p0->data + udp_offset); + udp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]); + + if (flags & UDP_PG_EDIT_LENGTH) + udp0->length = + clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, p0) + - ip_offset); + + /* Initialize checksum with header. */ + if (flags & UDP_PG_EDIT_CHECKSUM) + { + ip_csum_t sum0; + + sum0 = clib_mem_unaligned (&ip0->src_address, u64); + + sum0 = ip_csum_with_carry + (sum0, clib_host_to_net_u32 (udp_len0 + (ip0->protocol << 16))); + + /* Invalidate possibly old checksum. */ + udp0->checksum = 0; + + sum0 = + ip_incremental_checksum_buffer (vm, p0, udp_offset, udp_len0, + sum0); + + sum0 = ~ip_csum_fold (sum0); + + /* Zero checksum means checksumming disabled. */ + sum0 = sum0 != 0 ? sum0 : 0xffff; + + udp0->checksum = sum0; + } + } +} + +static void +udp_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, u32 * packets, u32 n_packets) +{ + switch (g->edit_function_opaque) + { + case UDP_PG_EDIT_LENGTH: + udp_pg_edit_function_inline (pg, s, g, packets, n_packets, + UDP_PG_EDIT_LENGTH); + break; + + case UDP_PG_EDIT_CHECKSUM: + udp_pg_edit_function_inline (pg, s, g, packets, n_packets, + UDP_PG_EDIT_CHECKSUM); + break; + + case UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH: + udp_pg_edit_function_inline (pg, s, g, packets, n_packets, + UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH); + break; + + default: + ASSERT (0); + break; + } +} + +typedef struct +{ + pg_edit_t src_port, dst_port; + pg_edit_t length; + pg_edit_t checksum; +} pg_udp_header_t; + +static inline void +pg_udp_header_init (pg_udp_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, udp_header_t, f); + _(src_port); + _(dst_port); + _(length); + _(checksum); +#undef _ +} + +uword +unformat_pg_udp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t *s = va_arg (*args, pg_stream_t *); + pg_udp_header_t *p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (udp_header_t), + &group_index); + pg_udp_header_init (p); + + /* Defaults. */ + p->checksum.type = PG_EDIT_UNSPECIFIED; + p->length.type = PG_EDIT_UNSPECIFIED; + + if (!unformat (input, "UDP: %U -> %U", + unformat_pg_edit, + unformat_tcp_udp_port, &p->src_port, + unformat_pg_edit, unformat_tcp_udp_port, &p->dst_port)) + goto error; + + /* Parse options. */ + while (1) + { + if (unformat (input, "length %U", + unformat_pg_edit, unformat_pg_number, &p->length)) + ; + + else if (unformat (input, "checksum %U", + unformat_pg_edit, unformat_pg_number, &p->checksum)) + ; + + /* Can't parse input: try next protocol level. */ + else + break; + } + + { + ip_main_t *im = &ip_main; + u16 dst_port; + tcp_udp_port_info_t *pi; + + pi = 0; + if (p->dst_port.type == PG_EDIT_FIXED) + { + dst_port = pg_edit_get_value (&p->dst_port, PG_EDIT_LO); + pi = ip_get_tcp_udp_port_info (im, dst_port); + } + + if (pi && pi->unformat_pg_edit + && unformat_user (input, pi->unformat_pg_edit, s)) + ; + + else if (!unformat_user (input, unformat_pg_payload, s)) + goto error; + + p = pg_get_edit_group (s, group_index); + if (p->checksum.type == PG_EDIT_UNSPECIFIED + || p->length.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t *g = pg_stream_get_group (s, group_index); + g->edit_function = udp_pg_edit_function; + g->edit_function_opaque = 0; + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + g->edit_function_opaque |= UDP_PG_EDIT_CHECKSUM; + if (p->length.type == PG_EDIT_UNSPECIFIED) + g->edit_function_opaque |= UDP_PG_EDIT_LENGTH; + } + + return 1; + } + +error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index 142acedc..c4075db6 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/vnet/vxlan-gpe/vxlan_gpe.h index 1b4bc44e..e768d230 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.h +++ b/src/vnet/vxlan-gpe/vxlan_gpe.h @@ -29,7 +29,7 @@ #include #include #include -#include +#include /** * @brief VXLAN GPE header struct diff --git a/src/vnet/vxlan/vxlan.h b/src/vnet/vxlan/vxlan.h index adfa3a8e..dca1cd12 100644 --- a/src/vnet/vxlan/vxlan.h +++ b/src/vnet/vxlan/vxlan.h @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 24f48293..2d6e4f37 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -38,6 +38,7 @@ * IPSEC-GRE APIs: see .../src/vnet/ipsec-gre/{ipsec_gre.api, ipsec_gre_api.c} * LISP APIs: see .../src/vnet/lisp/{lisp.api, lisp_api.c} * LISP-GPE APIs: see .../src/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c} + * SESSION APIs: .../vnet/session/{session.api session_api.c} * MPLS APIs: see .../src/vnet/mpls/{mpls.api, mpls_api.c} * SR APIs: see .../src/vnet/sr/{sr.api, sr_api.c} * DPDK APIs: see ... /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} diff --git a/src/vppinfra.am b/src/vppinfra.am index 8d375958..4b9f0c29 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -157,7 +157,9 @@ nobase_include_HEADERS = \ vppinfra/asm_mips.h \ vppinfra/asm_x86.h \ vppinfra/bihash_8_8.h \ + vppinfra/bihash_16_8.h \ vppinfra/bihash_24_8.h \ + vppinfra/bihash_48_8.h \ vppinfra/bihash_template.h \ vppinfra/bihash_template.c \ vppinfra/bitmap.h \ @@ -206,6 +208,7 @@ nobase_include_HEADERS = \ vppinfra/timer.h \ vppinfra/tw_timer_2t_1w_2048sl.h \ vppinfra/tw_timer_16t_2w_512sl.h \ + vppinfra/tw_timer_16t_1w_2048sl.h \ vppinfra/tw_timer_template.h \ vppinfra/tw_timer_template.c \ vppinfra/types.h \ @@ -261,6 +264,8 @@ CLIB_CORE = \ vppinfra/tw_timer_2t_1w_2048sl.c \ vppinfra/tw_timer_16t_2w_512sl.h \ vppinfra/tw_timer_16t_2w_512sl.c \ + vppinfra/tw_timer_16t_1w_2048sl.h \ + vppinfra/tw_timer_16t_1w_2048sl.c \ vppinfra/unformat.c \ vppinfra/vec.c \ vppinfra/vector.c \ diff --git a/src/vppinfra/bihash_16_8.h b/src/vppinfra/bihash_16_8.h new file mode 100644 index 00000000..ce80f70e --- /dev/null +++ b/src/vppinfra/bihash_16_8.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#undef BIHASH_TYPE + +#define BIHASH_TYPE _16_8 +#define BIHASH_KVP_PER_PAGE 4 + +#ifndef __included_bihash_16_8_h__ +#define __included_bihash_16_8_h__ + +#include +#include +#include +#include + +typedef struct +{ + u64 key[2]; + u64 value; +} clib_bihash_kv_16_8_t; + +static inline int +clib_bihash_is_free_16_8 (clib_bihash_kv_16_8_t * v) +{ + /* Free values are memset to 0xff, check a bit... */ + if (v->key[0] == ~0ULL && v->value == ~0ULL) + return 1; + return 0; +} + +#if __SSE4_2__ +#ifndef __defined_crc_u32__ +#define __defined_crc_u32__ +static inline u32 +crc_u32 (u32 data, u32 value) +{ + __asm__ volatile ("crc32l %[data], %[value];":[value] "+r" (value):[data] + "rm" (data)); + return value; +} +#endif /* __defined_crc_u32__ */ + +static inline u64 +clib_bihash_hash_16_8 (clib_bihash_kv_16_8_t * v) +{ + u32 *dp = (u32 *) & v->key[0]; + u32 value = 0; + + value = crc_u32 (dp[0], value); + value = crc_u32 (dp[1], value); + value = crc_u32 (dp[2], value); + value = crc_u32 (dp[3], value); + + return value; +} +#else +static inline u64 +clib_bihash_hash_16_8 (clib_bihash_kv_16_8_t * v) +{ + u64 tmp = v->key[0] ^ v->key[1]; + return clib_xxhash (tmp); +} +#endif + +static inline u8 * +format_bihash_kvp_16_8 (u8 * s, va_list * args) +{ + clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *); + + s = format (s, "key %llu %llu value %llu", v->key[0], v->key[1], v->value); + return s; +} + +static inline int +clib_bihash_key_compare_16_8 (u64 * a, u64 * b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1])) == 0; +} + +#undef __included_bihash_template_h__ +#include + +#endif /* __included_bihash_16_8_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/bihash_48_8.h b/src/vppinfra/bihash_48_8.h new file mode 100644 index 00000000..1a6e7691 --- /dev/null +++ b/src/vppinfra/bihash_48_8.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#undef BIHASH_TYPE + +#define BIHASH_TYPE _48_8 +#define BIHASH_KVP_PER_PAGE 4 + +#ifndef __included_bihash_48_8_h__ +#define __included_bihash_48_8_h__ + +#include +#include +#include +#include + +typedef struct +{ + u64 key[6]; + u64 value; +} clib_bihash_kv_48_8_t; + +static inline int +clib_bihash_is_free_48_8 (const clib_bihash_kv_48_8_t * v) +{ + /* Free values are memset to 0xff, check a bit... */ + if (v->key[0] == ~0ULL && v->value == ~0ULL) + return 1; + return 0; +} + +#if __SSE4_2__ +#ifndef __defined_crc_u32__ +#define __defined_crc_u32__ +static inline u32 +crc_u32 (u32 data, u32 value) +{ + __asm__ volatile ("crc32l %[data], %[value];":[value] "+r" (value):[data] + "rm" (data)); + return value; +} +#endif /* __defined_crc_u32__ */ + +static inline u64 +clib_bihash_hash_48_8 (const clib_bihash_kv_48_8_t * v) +{ + const u32 *dp = (const u32 *) &v->key[0]; + u32 value = 0; + + value = crc_u32 (dp[0], value); + value = crc_u32 (dp[1], value); + value = crc_u32 (dp[2], value); + value = crc_u32 (dp[3], value); + value = crc_u32 (dp[4], value); + value = crc_u32 (dp[5], value); + value = crc_u32 (dp[6], value); + value = crc_u32 (dp[7], value); + value = crc_u32 (dp[8], value); + value = crc_u32 (dp[9], value); + value = crc_u32 (dp[10], value); + value = crc_u32 (dp[11], value); + + return value; +} +#else +static inline u64 +clib_bihash_hash_48_8 (const clib_bihash_kv_48_8_t * v) +{ + u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2] ^ v->key[3] ^ v->key[4] + ^ v->key[5]; + return clib_xxhash (tmp); +} +#endif + +static inline u8 * +format_bihash_kvp_48_8 (u8 * s, va_list * args) +{ + clib_bihash_kv_48_8_t *v = va_arg (*args, clib_bihash_kv_48_8_t *); + + s = format (s, "key %llu %llu %llu %llu %llu %llu value %llu", v->key[0], + v->key[1], v->key[2], v->key[3], v->key[4], v->key[5], + v->value); + return s; +} + +static inline int +clib_bihash_key_compare_48_8 (const u64 * a, const u64 * b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) + | (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0; +} + +#undef __included_bihash_template_h__ +#include + +#endif /* __included_bihash_48_8_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.c b/src/vppinfra/tw_timer_16t_1w_2048sl.c new file mode 100644 index 00000000..3f342045 --- /dev/null +++ b/src/vppinfra/tw_timer_16t_1w_2048sl.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "tw_timer_16t_1w_2048sl.h" +#include "tw_timer_template.c" + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.h b/src/vppinfra/tw_timer_16t_1w_2048sl.h new file mode 100644 index 00000000..685ac31e --- /dev/null +++ b/src/vppinfra/tw_timer_16t_1w_2048sl.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_tw_timer_16t_2w_512sl_h__ +#define __included_tw_timer_16t_2w_512sl_h__ + +/* ... So that a client app can create multiple wheel geometries */ +#undef TW_TIMER_WHEELS +#undef TW_SLOTS_PER_RING +#undef TW_RING_SHIFT +#undef TW_RING_MASK +#undef TW_TIMERS_PER_OBJECT +#undef LOG2_TW_TIMERS_PER_OBJECT +#undef TW_SUFFIX + +#define TW_TIMER_WHEELS 1 +#define TW_SLOTS_PER_RING 2048 +#define TW_RING_SHIFT 11 +#define TW_RING_MASK (TW_SLOTS_PER_RING -1) +#define TW_TIMERS_PER_OBJECT 16 +#define LOG2_TW_TIMERS_PER_OBJECT 4 +#define TW_SUFFIX _16t_1w_2048sl + +#include + +#endif /* __included_tw_timer_16t_2w_512sl_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From c3a814be9dc769be942ff8029c7b6eccd4b3af05 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 28 Feb 2017 19:22:22 +0100 Subject: dpdk: be a plugin Change-Id: I238258cdeb77035adc5e88903d824593d0a1da90 Signed-off-by: Damjan Marion --- src/Makefile.am | 21 - src/plugins/Makefile.am | 5 + src/plugins/dpdk.am | 50 + src/plugins/dpdk/api/dpdk.api | 103 + src/plugins/dpdk/api/dpdk_all_api_h.h | 19 + src/plugins/dpdk/api/dpdk_msg_enum.h | 31 + src/plugins/dpdk/api/dpdk_test.c | 397 ++++ src/plugins/dpdk/buffer.c | 588 ++++++ src/plugins/dpdk/device/cli.c | 2079 ++++++++++++++++++++ src/plugins/dpdk/device/device.c | 852 ++++++++ src/plugins/dpdk/device/dpdk.h | 490 +++++ src/plugins/dpdk/device/dpdk_priv.h | 135 ++ src/plugins/dpdk/device/format.c | 754 +++++++ src/plugins/dpdk/device/node.c | 674 +++++++ src/plugins/dpdk/dir.dox | 27 + src/plugins/dpdk/hqos/hqos.c | 775 ++++++++ src/plugins/dpdk/hqos/qos_doc.md | 411 ++++ src/plugins/dpdk/init.c | 2074 +++++++++++++++++++ src/plugins/dpdk/ipsec/cli.c | 154 ++ src/plugins/dpdk/ipsec/crypto_node.c | 215 ++ src/plugins/dpdk/ipsec/dir.dox | 18 + src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md | 86 + src/plugins/dpdk/ipsec/esp.h | 249 +++ src/plugins/dpdk/ipsec/esp_decrypt.c | 594 ++++++ src/plugins/dpdk/ipsec/esp_encrypt.c | 609 ++++++ src/plugins/dpdk/ipsec/ipsec.c | 430 ++++ src/plugins/dpdk/ipsec/ipsec.h | 227 +++ src/plugins/dpdk/main.c | 95 + src/plugins/dpdk/thread.c | 85 + src/vat/api_format.c | 320 --- src/vnet.am | 41 +- src/vnet/devices/dpdk/buffer.c | 588 ------ src/vnet/devices/dpdk/cli.c | 2079 -------------------- src/vnet/devices/dpdk/device.c | 852 -------- src/vnet/devices/dpdk/dir.dox | 27 - src/vnet/devices/dpdk/dpdk.api | 103 - src/vnet/devices/dpdk/dpdk.h | 487 ----- src/vnet/devices/dpdk/dpdk_api.c | 246 --- src/vnet/devices/dpdk/dpdk_priv.h | 135 -- src/vnet/devices/dpdk/format.c | 754 ------- src/vnet/devices/dpdk/hqos.c | 775 -------- src/vnet/devices/dpdk/init.c | 1801 ----------------- src/vnet/devices/dpdk/ipsec/cli.c | 154 -- src/vnet/devices/dpdk/ipsec/crypto_node.c | 215 -- src/vnet/devices/dpdk/ipsec/dir.dox | 18 - .../devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md | 86 - src/vnet/devices/dpdk/ipsec/esp.h | 249 --- src/vnet/devices/dpdk/ipsec/esp_decrypt.c | 594 ------ src/vnet/devices/dpdk/ipsec/esp_encrypt.c | 609 ------ src/vnet/devices/dpdk/ipsec/ipsec.c | 430 ---- src/vnet/devices/dpdk/ipsec/ipsec.h | 227 --- src/vnet/devices/dpdk/main.c | 85 - src/vnet/devices/dpdk/node.c | 674 ------- src/vnet/devices/dpdk/qos_doc.md | 411 ---- src/vnet/devices/dpdk/thread.c | 85 - src/vnet/devices/virtio/vhost-user.h | 11 - src/vnet/ipsec/ipsec_api.c | 4 - src/vnet/pg/input.c | 11 +- src/vnet/pg/stream.c | 5 +- src/vnet/replication.c | 6 +- src/vnet/vnet_all_api_h.h | 3 - src/vpp/api/custom_dump.c | 64 - src/vpp/api/gmon.c | 3 +- src/vpp/api/vpe.api | 1 - src/vpp/app/l2t.c | 562 ------ src/vpp/app/l2t_l2.c | 267 --- 66 files changed, 12241 insertions(+), 12988 deletions(-) create mode 100644 src/plugins/dpdk.am create mode 100644 src/plugins/dpdk/api/dpdk.api create mode 100644 src/plugins/dpdk/api/dpdk_all_api_h.h create mode 100644 src/plugins/dpdk/api/dpdk_msg_enum.h create mode 100644 src/plugins/dpdk/api/dpdk_test.c create mode 100644 src/plugins/dpdk/buffer.c create mode 100644 src/plugins/dpdk/device/cli.c create mode 100644 src/plugins/dpdk/device/device.c create mode 100644 src/plugins/dpdk/device/dpdk.h create mode 100644 src/plugins/dpdk/device/dpdk_priv.h create mode 100644 src/plugins/dpdk/device/format.c create mode 100644 src/plugins/dpdk/device/node.c create mode 100644 src/plugins/dpdk/dir.dox create mode 100644 src/plugins/dpdk/hqos/hqos.c create mode 100644 src/plugins/dpdk/hqos/qos_doc.md create mode 100755 src/plugins/dpdk/init.c create mode 100644 src/plugins/dpdk/ipsec/cli.c create mode 100644 src/plugins/dpdk/ipsec/crypto_node.c create mode 100644 src/plugins/dpdk/ipsec/dir.dox create mode 100644 src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md create mode 100644 src/plugins/dpdk/ipsec/esp.h create mode 100644 src/plugins/dpdk/ipsec/esp_decrypt.c create mode 100644 src/plugins/dpdk/ipsec/esp_encrypt.c create mode 100644 src/plugins/dpdk/ipsec/ipsec.c create mode 100644 src/plugins/dpdk/ipsec/ipsec.h create mode 100644 src/plugins/dpdk/main.c create mode 100644 src/plugins/dpdk/thread.c delete mode 100644 src/vnet/devices/dpdk/buffer.c delete mode 100644 src/vnet/devices/dpdk/cli.c delete mode 100644 src/vnet/devices/dpdk/device.c delete mode 100644 src/vnet/devices/dpdk/dir.dox delete mode 100644 src/vnet/devices/dpdk/dpdk.api delete mode 100644 src/vnet/devices/dpdk/dpdk.h delete mode 100644 src/vnet/devices/dpdk/dpdk_api.c delete mode 100644 src/vnet/devices/dpdk/dpdk_priv.h delete mode 100644 src/vnet/devices/dpdk/format.c delete mode 100644 src/vnet/devices/dpdk/hqos.c delete mode 100755 src/vnet/devices/dpdk/init.c delete mode 100644 src/vnet/devices/dpdk/ipsec/cli.c delete mode 100644 src/vnet/devices/dpdk/ipsec/crypto_node.c delete mode 100644 src/vnet/devices/dpdk/ipsec/dir.dox delete mode 100644 src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md delete mode 100644 src/vnet/devices/dpdk/ipsec/esp.h delete mode 100644 src/vnet/devices/dpdk/ipsec/esp_decrypt.c delete mode 100644 src/vnet/devices/dpdk/ipsec/esp_encrypt.c delete mode 100644 src/vnet/devices/dpdk/ipsec/ipsec.c delete mode 100644 src/vnet/devices/dpdk/ipsec/ipsec.h delete mode 100644 src/vnet/devices/dpdk/main.c delete mode 100644 src/vnet/devices/dpdk/node.c delete mode 100644 src/vnet/devices/dpdk/qos_doc.md delete mode 100644 src/vnet/devices/dpdk/thread.c delete mode 100644 src/vpp/app/l2t.c delete mode 100644 src/vpp/app/l2t_l2.c (limited to 'src/vpp/api') diff --git a/src/Makefile.am b/src/Makefile.am index 641707ed..5daaa48e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -38,27 +38,6 @@ install-data-local: GREP_TIME=`echo $$GREP_TIME | awk '{print $$2}'` ; \ echo "Command list built, Time taken: $$GREP_TIME" -############################################################################### -# DPDK -############################################################################### - -if WITH_DPDK -if ENABLE_DPDK_SHARED -DPDK_LD_FLAGS = -Wl,--whole-archive,-ldpdk,--no-whole-archive -else -DPDK_LD_FLAGS = -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive,-lm,-ldl -endif -if WITH_DPDK_CRYPTO_SW -DPDK_LD_ADD = -lIPSec_MB -lisal_crypto -endif -if WITH_DPDK_MLX5_PMD -DPDK_LD_FLAGS += -libverbs -lmlx5 -lnuma -endif -else -DPDK_LD_FLAGS = -DPDK_LD_ADD = -endif - ############################################################################### # Components ############################################################################### diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 06b575d1..c8877899 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -24,6 +24,7 @@ vppplugins_LTLIBRARIES = vppapitestplugins_LTLIBRARIES = noinst_HEADERS = nobase_apiinclude_HEADERS = +nobase_include_HEADERS = vppapitestpluginsdir = ${libdir}/vpp_api_test_plugins vpppluginsdir = ${libdir}/vpp_plugins @@ -32,6 +33,10 @@ if ENABLE_ACL_PLUGIN include acl.am endif +if WITH_DPDK +include dpdk.am +endif + if ENABLE_FLOWPERPKT_PLUGIN include flowperpkt.am endif diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am new file mode 100644 index 00000000..212bbb73 --- /dev/null +++ b/src/plugins/dpdk.am @@ -0,0 +1,50 @@ +# Copyright (c) 2016 Cisco Systems, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vppapitestplugins_LTLIBRARIES += dpdk_test_plugin.la +vppplugins_LTLIBRARIES += dpdk_plugin.la + +dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive,-lm,-ldl + +dpdk_plugin_la_SOURCES = \ + dpdk/init.c \ + dpdk/main.c \ + dpdk/buffer.c \ + dpdk/thread.c \ + dpdk/device/cli.c \ + dpdk/device/dpdk_priv.h \ + dpdk/device/device.c \ + dpdk/device/format.c \ + dpdk/device/node.c \ + dpdk/hqos/hqos.c \ + dpdk/ipsec/esp_encrypt.c \ + dpdk/ipsec/esp_decrypt.c \ + dpdk/ipsec/crypto_node.c \ + dpdk/ipsec/cli.c \ + dpdk/ipsec/ipsec.c \ + dpdk/api/dpdk_plugin.api.h + +API_FILES += dpdk/api/dpdk.api + +nobase_include_HEADERS += \ + dpdk/device/dpdk.h \ + dpdk/api/dpdk_all_api_h.h + +nobase_include_HEADERS += \ + dpdk/ipsec/ipsec.h \ + dpdk/ipsec/esp.h + +dpdk_test_plugin_la_SOURCES = \ + dpdk/api/dpdk_test.c dpdk/api/dpdk_plugin.api.h + +# vi:syntax=automake diff --git a/src/plugins/dpdk/api/dpdk.api b/src/plugins/dpdk/api/dpdk.api new file mode 100644 index 00000000..21215d45 --- /dev/null +++ b/src/plugins/dpdk/api/dpdk.api @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief DPDK interface HQoS pipe profile set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param pipe - pipe ID within its subport + @param profile - pipe profile ID +*/ +define sw_interface_set_dpdk_hqos_pipe { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 pipe; + u32 profile; +}; + +/** \brief DPDK interface HQoS pipe profile set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_pipe_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS subport parameters set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param tb_rate - subport token bucket rate (measured in bytes/second) + @param tb_size - subport token bucket size (measured in credits) + @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) + @param tc_period - enforcement period for rates (measured in milliseconds) +*/ +define sw_interface_set_dpdk_hqos_subport { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 tb_rate; + u32 tb_size; + u32 tc_rate[4]; + u32 tc_period; +}; + +/** \brief DPDK interface HQoS subport parameters set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_subport_reply { + u32 context; + i32 retval; +}; + +/** \brief DPDK interface HQoS tctbl entry set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param entry - entry index ID + @param tc - traffic class (0 .. 3) + @param queue - traffic class queue (0 .. 3) +*/ +define sw_interface_set_dpdk_hqos_tctbl { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 entry; + u32 tc; + u32 queue; +}; + +/** \brief DPDK interface HQoS tctbl entry set reply + @param context - sender context, to match reply w/ request + @param retval - request return code +*/ +define sw_interface_set_dpdk_hqos_tctbl_reply { + u32 context; + i32 retval; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + \ No newline at end of file diff --git a/src/plugins/dpdk/api/dpdk_all_api_h.h b/src/plugins/dpdk/api/dpdk_all_api_h.h new file mode 100644 index 00000000..15eb98d6 --- /dev/null +++ b/src/plugins/dpdk/api/dpdk_all_api_h.h @@ -0,0 +1,19 @@ + +/* + * dpdk_all_api_h.h - skeleton vpp engine plug-in api #include file + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include diff --git a/src/plugins/dpdk/api/dpdk_msg_enum.h b/src/plugins/dpdk/api/dpdk_msg_enum.h new file mode 100644 index 00000000..952ce6ad --- /dev/null +++ b/src/plugins/dpdk/api/dpdk_msg_enum.h @@ -0,0 +1,31 @@ + +/* + * dpdk_msg_enum.h - skeleton vpp engine plug-in message enumeration + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_dpdk_msg_enum_h +#define included_dpdk_msg_enum_h + +#include + +#define vl_msg_id(n,h) n, +typedef enum { +#include + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_dpdk_msg_enum_h */ diff --git a/src/plugins/dpdk/api/dpdk_test.c b/src/plugins/dpdk/api/dpdk_test.c new file mode 100644 index 00000000..9fe0f934 --- /dev/null +++ b/src/plugins/dpdk/api/dpdk_test.c @@ -0,0 +1,397 @@ + +/* + * dpdk_test.c - skeleton vpp-api-test plug-in + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include + +uword unformat_sw_if_index (unformat_input_t * input, va_list * args); + +/* Declare message IDs */ +#include + +/* define message structures */ +#define vl_typedefs +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include +#undef vl_api_version + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} dpdk_test_main_t; + +dpdk_test_main_t dpdk_test_main; + +#define foreach_standard_reply_retval_handler \ +_(sw_interface_set_dpdk_hqos_pipe_reply) \ +_(sw_interface_set_dpdk_hqos_subport_reply) \ +_(sw_interface_set_dpdk_hqos_tctbl_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = dpdk_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY, \ + sw_interface_set_dpdk_hqos_pipe_reply) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY, \ + sw_interface_set_dpdk_hqos_subport_reply) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY, \ + sw_interface_set_dpdk_hqos_tctbl_reply) + +/* M: construct, but don't yet send a message */ +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + dm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +#define M2(T,t,n) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + dm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + } \ + return -99; \ +} while(0); + +static int +api_sw_interface_set_dpdk_hqos_pipe (vat_main_t * vam) +{ + dpdk_test_main_t * dm = &dpdk_test_main; + unformat_input_t *i = vam->input; + vl_api_sw_interface_set_dpdk_hqos_pipe_t *mp; + f64 timeout; + u32 sw_if_index; + u8 sw_if_index_set = 0; + u32 subport; + u8 subport_set = 0; + u32 pipe; + u8 pipe_set = 0; + u32 profile; + u8 profile_set = 0; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "rx sw_if_index %u", &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "subport %u", &subport)) + subport_set = 1; + else if (unformat (i, "pipe %u", &pipe)) + pipe_set = 1; + else if (unformat (i, "profile %u", &profile)) + profile_set = 1; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + + if (subport_set == 0) + { + errmsg ("missing subport "); + return -99; + } + + if (pipe_set == 0) + { + errmsg ("missing pipe"); + return -99; + } + + if (profile_set == 0) + { + errmsg ("missing profile"); + return -99; + } + + M (SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe); + + mp->sw_if_index = ntohl (sw_if_index); + mp->subport = ntohl (subport); + mp->pipe = ntohl (pipe); + mp->profile = ntohl (profile); + + + S; + W; + /* NOTREACHED */ + return 0; +} + +static int +api_sw_interface_set_dpdk_hqos_subport (vat_main_t * vam) +{ + dpdk_test_main_t * dm = &dpdk_test_main; + unformat_input_t *i = vam->input; + vl_api_sw_interface_set_dpdk_hqos_subport_t *mp; + f64 timeout; + u32 sw_if_index; + u8 sw_if_index_set = 0; + u32 subport; + u8 subport_set = 0; + u32 tb_rate = 1250000000; /* 10GbE */ + u32 tb_size = 1000000; + u32 tc_rate[] = { 1250000000, 1250000000, 1250000000, 1250000000 }; + u32 tc_period = 10; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "rx sw_if_index %u", &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "subport %u", &subport)) + subport_set = 1; + else if (unformat (i, "rate %u", &tb_rate)) + { + u32 tc_id; + + for (tc_id = 0; tc_id < (sizeof (tc_rate) / sizeof (tc_rate[0])); + tc_id++) + tc_rate[tc_id] = tb_rate; + } + else if (unformat (i, "bktsize %u", &tb_size)) + ; + else if (unformat (i, "tc0 %u", &tc_rate[0])) + ; + else if (unformat (i, "tc1 %u", &tc_rate[1])) + ; + else if (unformat (i, "tc2 %u", &tc_rate[2])) + ; + else if (unformat (i, "tc3 %u", &tc_rate[3])) + ; + else if (unformat (i, "period %u", &tc_period)) + ; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + + if (subport_set == 0) + { + errmsg ("missing subport "); + return -99; + } + + M (SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport); + + mp->sw_if_index = ntohl (sw_if_index); + mp->subport = ntohl (subport); + mp->tb_rate = ntohl (tb_rate); + mp->tb_size = ntohl (tb_size); + mp->tc_rate[0] = ntohl (tc_rate[0]); + mp->tc_rate[1] = ntohl (tc_rate[1]); + mp->tc_rate[2] = ntohl (tc_rate[2]); + mp->tc_rate[3] = ntohl (tc_rate[3]); + mp->tc_period = ntohl (tc_period); + + S; + W; + /* NOTREACHED */ + return 0; +} + +static int +api_sw_interface_set_dpdk_hqos_tctbl (vat_main_t * vam) +{ + dpdk_test_main_t * dm = &dpdk_test_main; + unformat_input_t *i = vam->input; + vl_api_sw_interface_set_dpdk_hqos_tctbl_t *mp; + f64 timeout; + u32 sw_if_index; + u8 sw_if_index_set = 0; + u8 entry_set = 0; + u8 tc_set = 0; + u8 queue_set = 0; + u32 entry, tc, queue; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "rx sw_if_index %u", &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "entry %d", &entry)) + entry_set = 1; + else if (unformat (i, "tc %d", &tc)) + tc_set = 1; + else if (unformat (i, "queue %d", &queue)) + queue_set = 1; + else + break; + } + + if (sw_if_index_set == 0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + + if (entry_set == 0) + { + errmsg ("missing entry "); + return -99; + } + + if (tc_set == 0) + { + errmsg ("missing traffic class "); + return -99; + } + + if (queue_set == 0) + { + errmsg ("missing queue "); + return -99; + } + + M (SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl); + + mp->sw_if_index = ntohl (sw_if_index); + mp->entry = ntohl (entry); + mp->tc = ntohl (tc); + mp->queue = ntohl (queue); + + S; + W; + /* NOTREACHED */ + return 0; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(sw_interface_set_dpdk_hqos_pipe, \ + "rx sw_if_index subport pipe \n" \ + "profile \n") \ +_(sw_interface_set_dpdk_hqos_subport, \ + "rx sw_if_index subport [rate ]\n" \ + "[bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ]\n") \ +_(sw_interface_set_dpdk_hqos_tctbl, \ + "rx sw_if_index entry tc queue \n") + +void vat_api_hookup (vat_main_t *vam) +{ + dpdk_test_main_t * dm __attribute__((unused)) = &dpdk_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + dm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * vat_plugin_register (vat_main_t *vam) +{ + dpdk_test_main_t * dm = &dpdk_test_main; + u8 * name; + + dm->vat_main = vam; + + /* Ask the vpp engine for the first assigned message-id */ + name = format (0, "dpdk_%08x%c", api_version, 0); + dm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (dm->msg_id_base != (u16) ~0) + vat_api_hookup (vam); + + vec_free(name); + + return 0; +} diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c new file mode 100644 index 00000000..2765c292 --- /dev/null +++ b/src/plugins/dpdk/buffer.c @@ -0,0 +1,588 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * buffer.c: allocate/free network buffers. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @file + * + * Allocate/free network buffers. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM, + "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM"); + +static_always_inline void +dpdk_rte_pktmbuf_free (vlib_main_t * vm, vlib_buffer_t * b) +{ + vlib_buffer_t *hb = b; + struct rte_mbuf *mb; + u32 next, flags; + mb = rte_mbuf_from_vlib_buffer (hb); + +next: + flags = b->flags; + next = b->next_buffer; + mb = rte_mbuf_from_vlib_buffer (b); + + if (PREDICT_FALSE (b->n_add_refs)) + { + rte_mbuf_refcnt_update (mb, b->n_add_refs); + b->n_add_refs = 0; + } + + rte_pktmbuf_free_seg (mb); + + if (flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, next); + goto next; + } +} + +static void +del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) +{ + u32 i; + vlib_buffer_t *b; + + for (i = 0; i < vec_len (f->buffers); i++) + { + b = vlib_get_buffer (vm, f->buffers[i]); + dpdk_rte_pktmbuf_free (vm, b); + } + + vec_free (f->name); + vec_free (f->buffers); +} + +/* Add buffer free list. */ +static void +dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *f; + u32 merge_index; + int i; + + ASSERT (os_get_cpu_number () == 0); + + f = vlib_buffer_get_free_list (vm, free_list_index); + + merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); + if (merge_index != ~0 && merge_index != free_list_index) + { + vlib_buffer_merge_free_lists (pool_elt_at_index + (bm->buffer_free_list_pool, merge_index), + f); + } + + del_free_list (vm, f); + + /* Poison it. */ + memset (f, 0xab, sizeof (f[0])); + + pool_put (bm->buffer_free_list_pool, f); + + for (i = 1; i < vec_len (vlib_mains); i++) + { + bm = vlib_mains[i]->buffer_main; + f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);; + memset (f, 0xab, sizeof (f[0])); + pool_put (bm->buffer_free_list_pool, f); + } +} + +/* Make sure free list has at least given number of free buffers. */ +static uword +fill_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * fl, uword min_free_buffers) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_buffer_t *b0, *b1, *b2, *b3; + int n, i; + u32 bi0, bi1, bi2, bi3; + unsigned socket_id = rte_socket_id (); + struct rte_mempool *rmp = dm->pktmbuf_pools[socket_id]; + struct rte_mbuf *mb0, *mb1, *mb2, *mb3; + + /* Too early? */ + if (PREDICT_FALSE (rmp == 0)) + return 0; + + /* Already have enough free buffers on free list? */ + n = min_free_buffers - vec_len (fl->buffers); + if (n <= 0) + return min_free_buffers; + + /* Always allocate round number of buffers. */ + n = round_pow2 (n, CLIB_CACHE_LINE_BYTES / sizeof (u32)); + + /* Always allocate new buffers in reasonably large sized chunks. */ + n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); + + vec_validate (vm->mbuf_alloc_list, n - 1); + + if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0) + return 0; + + _vec_len (vm->mbuf_alloc_list) = n; + + i = 0; + + while (i < (n - 7)) + { + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 4]), STORE); + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 5]), STORE); + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 6]), STORE); + vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf + (vm->mbuf_alloc_list[i + 7]), STORE); + + mb0 = vm->mbuf_alloc_list[i]; + mb1 = vm->mbuf_alloc_list[i + 1]; + mb2 = vm->mbuf_alloc_list[i + 2]; + mb3 = vm->mbuf_alloc_list[i + 3]; + + ASSERT (rte_mbuf_refcnt_read (mb0) == 0); + ASSERT (rte_mbuf_refcnt_read (mb1) == 0); + ASSERT (rte_mbuf_refcnt_read (mb2) == 0); + ASSERT (rte_mbuf_refcnt_read (mb3) == 0); + + rte_mbuf_refcnt_set (mb0, 1); + rte_mbuf_refcnt_set (mb1, 1); + rte_mbuf_refcnt_set (mb2, 1); + rte_mbuf_refcnt_set (mb3, 1); + + b0 = vlib_buffer_from_rte_mbuf (mb0); + b1 = vlib_buffer_from_rte_mbuf (mb1); + b2 = vlib_buffer_from_rte_mbuf (mb2); + b3 = vlib_buffer_from_rte_mbuf (mb3); + + bi0 = vlib_get_buffer_index (vm, b0); + bi1 = vlib_get_buffer_index (vm, b1); + bi2 = vlib_get_buffer_index (vm, b2); + bi3 = vlib_get_buffer_index (vm, b3); + + vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); + vec_add1_aligned (fl->buffers, bi1, CLIB_CACHE_LINE_BYTES); + vec_add1_aligned (fl->buffers, bi2, CLIB_CACHE_LINE_BYTES); + vec_add1_aligned (fl->buffers, bi3, CLIB_CACHE_LINE_BYTES); + + vlib_buffer_init_for_free_list (b0, fl); + vlib_buffer_init_for_free_list (b1, fl); + vlib_buffer_init_for_free_list (b2, fl); + vlib_buffer_init_for_free_list (b3, fl); + + if (fl->buffer_init_function) + { + fl->buffer_init_function (vm, fl, &bi0, 1); + fl->buffer_init_function (vm, fl, &bi1, 1); + fl->buffer_init_function (vm, fl, &bi2, 1); + fl->buffer_init_function (vm, fl, &bi3, 1); + } + i += 4; + } + + while (i < n) + { + mb0 = vm->mbuf_alloc_list[i]; + + ASSERT (rte_mbuf_refcnt_read (mb0) == 0); + rte_mbuf_refcnt_set (mb0, 1); + + b0 = vlib_buffer_from_rte_mbuf (mb0); + bi0 = vlib_get_buffer_index (vm, b0); + + vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); + + vlib_buffer_init_for_free_list (b0, fl); + + if (fl->buffer_init_function) + fl->buffer_init_function (vm, fl, &bi0, 1); + i++; + } + + fl->n_alloc += n; + + return n; +} + +static u32 +alloc_from_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * free_list, + u32 * alloc_buffers, u32 n_alloc_buffers) +{ + u32 *dst, *src; + uword len, n_filled; + + dst = alloc_buffers; + + n_filled = fill_free_list (vm, free_list, n_alloc_buffers); + if (n_filled == 0) + return 0; + + len = vec_len (free_list->buffers); + ASSERT (len >= n_alloc_buffers); + + src = free_list->buffers + len - n_alloc_buffers; + clib_memcpy (dst, src, n_alloc_buffers * sizeof (u32)); + + _vec_len (free_list->buffers) -= n_alloc_buffers; + + return n_alloc_buffers; +} + +/* Allocate a given number of buffers into given array. + Returns number actually allocated which will be either zero or + number requested. */ +u32 +dpdk_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + + return alloc_from_free_list + (vm, + pool_elt_at_index (bm->buffer_free_list_pool, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX), + buffers, n_buffers); +} + + +u32 +dpdk_buffer_alloc_from_free_list (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, u32 free_list_index) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *f; + f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); + return alloc_from_free_list (vm, f, buffers, n_buffers); +} + +static_always_inline void +vlib_buffer_free_inline (vlib_main_t * vm, + u32 * buffers, u32 n_buffers, u32 follow_buffer_next) +{ + vlib_buffer_main_t *bm = vm->buffer_main; + vlib_buffer_free_list_t *fl; + u32 fi; + int i; + u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, + u32 follow_buffer_next); + + cb = bm->buffer_free_callback; + + if (PREDICT_FALSE (cb != 0)) + n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); + + if (!n_buffers) + return; + + for (i = 0; i < n_buffers; i++) + { + vlib_buffer_t *b; + + b = vlib_get_buffer (vm, buffers[i]); + + fl = vlib_buffer_get_buffer_free_list (vm, b, &fi); + + /* The only current use of this callback: multicast recycle */ + if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) + { + int j; + + vlib_buffer_add_to_free_list + (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); + + for (j = 0; j < vec_len (bm->announce_list); j++) + { + if (fl == bm->announce_list[j]) + goto already_announced; + } + vec_add1 (bm->announce_list, fl); + already_announced: + ; + } + else + { + if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) + dpdk_rte_pktmbuf_free (vm, b); + } + } + if (vec_len (bm->announce_list)) + { + vlib_buffer_free_list_t *fl; + for (i = 0; i < vec_len (bm->announce_list); i++) + { + fl = bm->announce_list[i]; + fl->buffers_added_to_freelist_function (vm, fl); + } + _vec_len (bm->announce_list) = 0; + } +} + +static void +dpdk_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ + 1); +} + +static void +dpdk_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ + 0); +} + +static void +dpdk_packet_template_init (vlib_main_t * vm, + void *vt, + void *packet_data, + uword n_packet_data_bytes, + uword min_n_buffers_each_physmem_alloc, u8 * name) +{ + vlib_packet_template_t *t = (vlib_packet_template_t *) vt; + + vlib_worker_thread_barrier_sync (vm); + memset (t, 0, sizeof (t[0])); + + vec_add (t->packet_data, packet_data, n_packet_data_bytes); + + vlib_worker_thread_barrier_release (vm); +} + +clib_error_t * +vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, + unsigned socket_id) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_physmem_main_t *vpm = &vm->physmem_main; + struct rte_mempool *rmp; + int i; + + vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); + + /* pool already exists, nothing to do */ + if (dm->pktmbuf_pools[socket_id]) + return 0; + + u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0); + + rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */ + num_mbufs, /* number of mbufs */ + 512, /* cache size */ + VLIB_BUFFER_HDR_SIZE, /* priv size */ + VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */ + socket_id); /* cpu socket */ + + if (rmp) + { + { + uword this_pool_end; + uword this_pool_start; + uword this_pool_size; + uword save_vpm_start, save_vpm_end, save_vpm_size; + struct rte_mempool_memhdr *memhdr; + + this_pool_start = ~0ULL; + this_pool_end = 0LL; + + STAILQ_FOREACH (memhdr, &rmp->mem_list, next) + { + if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end) + this_pool_end = (uword) (memhdr->addr + memhdr->len); + if (((uword) memhdr->addr) < this_pool_start) + this_pool_start = (uword) (memhdr->addr); + } + ASSERT (this_pool_start < ~0ULL && this_pool_end > 0); + this_pool_size = this_pool_end - this_pool_start; + + if (CLIB_DEBUG > 1) + { + clib_warning ("%s: pool start %llx pool end %llx pool size %lld", + pool_name, this_pool_start, this_pool_end, + this_pool_size); + clib_warning + ("before: virtual.start %llx virtual.end %llx virtual.size %lld", + vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); + } + + save_vpm_start = vpm->virtual.start; + save_vpm_end = vpm->virtual.end; + save_vpm_size = vpm->virtual.size; + + if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0) + vpm->virtual.start = this_pool_start; + if (this_pool_end > vpm->virtual.end) + vpm->virtual.end = this_pool_end; + + vpm->virtual.size = vpm->virtual.end - vpm->virtual.start; + + if (CLIB_DEBUG > 1) + { + clib_warning + ("after: virtual.start %llx virtual.end %llx virtual.size %lld", + vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); + } + + /* check if fits into buffer index range */ + if ((u64) vpm->virtual.size > + ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) + { + clib_warning ("physmem: virtual size out of range!"); + vpm->virtual.start = save_vpm_start; + vpm->virtual.end = save_vpm_end; + vpm->virtual.size = save_vpm_size; + rmp = 0; + } + } + if (rmp) + { + dm->pktmbuf_pools[socket_id] = rmp; + vec_free (pool_name); + return 0; + } + } + + vec_free (pool_name); + + /* no usable pool for this socket, try to use pool from another one */ + for (i = 0; i < vec_len (dm->pktmbuf_pools); i++) + { + if (dm->pktmbuf_pools[i]) + { + clib_warning + ("WARNING: Failed to allocate mempool for CPU socket %u. " + "Threads running on socket %u will use socket %u mempool.", + socket_id, socket_id, i); + dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i]; + return 0; + } + } + + return clib_error_return (0, "failed to allocate mempool on socket %u", + socket_id); +} + +#if CLIB_DEBUG > 0 + +u32 *vlib_buffer_state_validation_lock; +uword *vlib_buffer_state_validation_hash; +void *vlib_buffer_state_heap; + +static clib_error_t * +buffer_state_validation_init (vlib_main_t * vm) +{ + void *oldheap; + + vlib_buffer_state_heap = mheap_alloc (0, 10 << 20); + + oldheap = clib_mem_set_heap (vlib_buffer_state_heap); + + vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword)); + vec_validate_aligned (vlib_buffer_state_validation_lock, 0, + CLIB_CACHE_LINE_BYTES); + clib_mem_set_heap (oldheap); + return 0; +} + +VLIB_INIT_FUNCTION (buffer_state_validation_init); +#endif + +static vlib_buffer_callbacks_t callbacks = { + .vlib_buffer_alloc_cb = &dpdk_buffer_alloc, + .vlib_buffer_alloc_from_free_list_cb = &dpdk_buffer_alloc_from_free_list, + .vlib_buffer_free_cb = &dpdk_buffer_free, + .vlib_buffer_free_no_next_cb = &dpdk_buffer_free_no_next, + .vlib_packet_template_init_cb = &dpdk_packet_template_init, + .vlib_buffer_delete_free_list_cb = &dpdk_buffer_delete_free_list, +}; + +static clib_error_t * +dpdk_buffer_init (vlib_main_t * vm) +{ + vlib_buffer_cb_register (vm, &callbacks); + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_buffer_init); + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c new file mode 100644 index 00000000..d2def2fc --- /dev/null +++ b/src/plugins/dpdk/device/cli.c @@ -0,0 +1,2079 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +/** + * @file + * @brief CLI for DPDK Abstraction Layer and pcap Tx Trace. + * + * This file contains the source code for CLI for DPDK + * Abstraction Layer and pcap Tx Trace. + */ + + +static clib_error_t * +get_hqos (u32 hw_if_index, u32 subport_id, dpdk_device_t ** xd, + dpdk_device_config_t ** devconf) +{ + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + struct rte_eth_dev_info dev_info; + uword *p = 0; + clib_error_t *error = NULL; + + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + if (subport_id != 0) + { + error = clib_error_return (0, "Invalid subport"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + *xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get ((*xd)->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + (*devconf) = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + (*devconf) = &dm->conf->default_devconf; + +done: + return error; +} + +static clib_error_t * +pcap_trace_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ +#define PCAP_DEF_PKT_TO_CAPTURE (100) + + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + u8 *filename; + u8 *chroot_filename = 0; + u32 max = 0; + int enabled = 0; + int errorFlag = 0; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "on")) + { + if (dm->tx_pcap_enable == 0) + { + enabled = 1; + } + else + { + vlib_cli_output (vm, "pcap tx capture already on..."); + errorFlag = 1; + break; + } + } + else if (unformat (line_input, "off")) + { + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, "captured %d pkts...", + dm->pcap_main.n_packets_captured + 1); + if (dm->pcap_main.n_packets_captured) + { + dm->pcap_main.n_packets_to_capture = + dm->pcap_main.n_packets_captured; + error = pcap_write (&dm->pcap_main); + if (error) + clib_error_report (error); + else + vlib_cli_output (vm, "saved to %s...", dm->pcap_filename); + } + + dm->tx_pcap_enable = 0; + } + else + { + vlib_cli_output (vm, "pcap tx capture already off..."); + errorFlag = 1; + break; + } + } + else if (unformat (line_input, "max %d", &max)) + { + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, + "can't change max value while pcap tx capture active..."); + errorFlag = 1; + break; + } + } + else if (unformat (line_input, "intfc %U", + unformat_vnet_sw_interface, dm->vnet_main, + &dm->pcap_sw_if_index)) + ; + + else if (unformat (line_input, "intfc any")) + { + dm->pcap_sw_if_index = 0; + } + else if (unformat (line_input, "file %s", &filename)) + { + if (dm->tx_pcap_enable) + { + vlib_cli_output (vm, + "can't change file while pcap tx capture active..."); + errorFlag = 1; + break; + } + + /* Brain-police user path input */ + if (strstr ((char *) filename, "..") + || index ((char *) filename, '/')) + { + vlib_cli_output (vm, "illegal characters in filename '%s'", + filename); + vlib_cli_output (vm, + "Hint: Only filename, do not enter directory structure."); + vec_free (filename); + errorFlag = 1; + break; + } + + chroot_filename = format (0, "/tmp/%s%c", filename, 0); + vec_free (filename); + } + else if (unformat (line_input, "status")) + { + if (dm->pcap_sw_if_index == 0) + { + vlib_cli_output (vm, "max is %d for any interface to file %s", + dm-> + pcap_pkts_to_capture ? dm->pcap_pkts_to_capture + : PCAP_DEF_PKT_TO_CAPTURE, + dm-> + pcap_filename ? dm->pcap_filename : (u8 *) + "/tmp/vpe.pcap"); + } + else + { + vlib_cli_output (vm, "max is %d for interface %U to file %s", + dm-> + pcap_pkts_to_capture ? dm->pcap_pkts_to_capture + : PCAP_DEF_PKT_TO_CAPTURE, + format_vnet_sw_if_index_name, dm->vnet_main, + dm->pcap_sw_if_index, + dm-> + pcap_filename ? dm->pcap_filename : (u8 *) + "/tmp/vpe.pcap"); + } + + if (dm->tx_pcap_enable == 0) + { + vlib_cli_output (vm, "pcap tx capture is off..."); + } + else + { + vlib_cli_output (vm, "pcap tx capture is on: %d of %d pkts...", + dm->pcap_main.n_packets_captured, + dm->pcap_main.n_packets_to_capture); + } + break; + } + + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + errorFlag = 1; + break; + } + } + unformat_free (line_input); + + + if (errorFlag == 0) + { + /* Since no error, save configured values. */ + if (chroot_filename) + { + if (dm->pcap_filename) + vec_free (dm->pcap_filename); + vec_add1 (chroot_filename, 0); + dm->pcap_filename = chroot_filename; + } + + if (max) + dm->pcap_pkts_to_capture = max; + + + if (enabled) + { + if (dm->pcap_filename == 0) + dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0); + + memset (&dm->pcap_main, 0, sizeof (dm->pcap_main)); + dm->pcap_main.file_name = (char *) dm->pcap_filename; + dm->pcap_main.n_packets_to_capture = PCAP_DEF_PKT_TO_CAPTURE; + if (dm->pcap_pkts_to_capture) + dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture; + + dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; + dm->tx_pcap_enable = 1; + vlib_cli_output (vm, "pcap tx capture on..."); + } + } + else if (chroot_filename) + vec_free (chroot_filename); + + + return error; +} + +/*? + * This command is used to start or stop a packet capture, or show + * the status of packet capture. + * + * This command has the following optional parameters: + * + * - on|off - Used to start or stop a packet capture. + * + * - max - Depth of local buffer. Once 'nn' number + * of packets have been received, buffer is flushed to file. Once another + * 'nn' number of packets have been received, buffer is flushed + * to file, overwriting previous write. If not entered, value defaults + * to 100. Can only be updated if packet capture is off. + * + * - intfc |any - Used to specify a given interface, + * or use 'any' to run packet capture on all interfaces. + * 'any' is the default if not provided. Settings from a previous + * packet capture are preserved, so 'any' can be used to reset + * the interface setting. + * + * - file - Used to specify the output filename. The file will + * be placed in the '/tmp' directory, so only the filename is + * supported. Directory should not be entered. If file already exists, file + * will be overwritten. If no filename is provided, '/tmp/vpe.pcap' + * will be used. Can only be updated if packet capture is off. + * + * - status - Displays the current status and configured attributes + * associated with a packet capture. If packet capture is in progress, + * 'status' also will return the number of packets currently in + * the local buffer. All additional attributes entered on command line + * with 'status' will be ingnored and not applied. + * + * @cliexpar + * Example of how to display the status of a tx packet capture when off: + * @cliexstart{pcap tx trace status} + * max is 100, for any interface to file /tmp/vpe.pcap + * pcap tx capture is off... + * @cliexend + * Example of how to start a tx packet capture: + * @cliexstart{pcap tx trace on max 35 intfc GigabitEthernet0/8/0 file vppTest.pcap} + * pcap tx capture on... + * @cliexend + * Example of how to display the status of a tx packet capture in progress: + * @cliexstart{pcap tx trace status} + * max is 35, for interface GigabitEthernet0/8/0 to file /tmp/vppTest.pcap + * pcap tx capture is on: 20 of 35 pkts... + * @cliexend + * Example of how to stop a tx packet capture: + * @cliexstart{vppctl pcap tx trace off} + * captured 21 pkts... + * saved to /tmp/vppTest.pcap... + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (pcap_trace_command, static) = { + .path = "pcap tx trace", + .short_help = + "pcap tx trace [on|off] [max ] [intfc |any] [file ] [status]", + .function = pcap_trace_command_fn, +}; +/* *INDENT-ON* */ + + +static clib_error_t * +show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + struct rte_mempool *rmp; + int i; + + for (i = 0; i < vec_len (dpdk_main.pktmbuf_pools); i++) + { + rmp = dpdk_main.pktmbuf_pools[i]; + if (rmp) + { + unsigned count = rte_mempool_avail_count (rmp); + unsigned free_count = rte_mempool_in_use_count (rmp); + + vlib_cli_output (vm, + "name=\"%s\" available = %7d allocated = %7d total = %7d\n", + rmp->name, (u32) count, (u32) free_count, + (u32) (count + free_count)); + } + else + { + vlib_cli_output (vm, "rte_mempool is NULL (!)\n"); + } + } + return 0; +} + +/*? + * This command displays statistics of each DPDK mempool. + * + * @cliexpar + * Example of how to display DPDK buffer data: + * @cliexstart{show dpdk buffer} + * name="mbuf_pool_socket0" available = 15104 allocated = 1280 total = 16384 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_bufferr,static) = { + .path = "show dpdk buffer", + .short_help = "show dpdk buffer", + .function = show_dpdk_buffer, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static clib_error_t * +test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + static u32 *allocated_buffers; + u32 n_alloc = 0; + u32 n_free = 0; + u32 first, actual_alloc; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "allocate %d", &n_alloc)) + ; + else if (unformat (input, "free %d", &n_free)) + ; + else + break; + } + + if (n_free) + { + if (vec_len (allocated_buffers) < n_free) + return clib_error_return (0, "Can't free %d, only %d allocated", + n_free, vec_len (allocated_buffers)); + + first = vec_len (allocated_buffers) - n_free; + vlib_buffer_free (vm, allocated_buffers + first, n_free); + _vec_len (allocated_buffers) = first; + } + if (n_alloc) + { + first = vec_len (allocated_buffers); + vec_validate (allocated_buffers, + vec_len (allocated_buffers) + n_alloc - 1); + + actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first, + n_alloc); + _vec_len (allocated_buffers) = first + actual_alloc; + + if (actual_alloc < n_alloc) + vlib_cli_output (vm, "WARNING: only allocated %d buffers", + actual_alloc); + } + + vlib_cli_output (vm, "Currently %d buffers allocated", + vec_len (allocated_buffers)); + + if (allocated_buffers && vec_len (allocated_buffers) == 0) + vec_free (allocated_buffers); + + return 0; +} + +/*? + * This command tests the allocation and freeing of DPDK buffers. + * If both 'allocate' and 'free' are entered on the + * same command, the 'free' is executed first. If no + * parameters are provided, this command display how many DPDK buffers + * the test command has allocated. + * + * @cliexpar + * @parblock + * + * Example of how to display how many DPDK buffer test command has allcoated: + * @cliexstart{test dpdk buffer} + * Currently 0 buffers allocated + * @cliexend + * + * Example of how to allocate DPDK buffers using the test command: + * @cliexstart{test dpdk buffer allocate 10} + * Currently 10 buffers allocated + * @cliexend + * + * Example of how to free DPDK buffers allocated by the test command: + * @cliexstart{test dpdk buffer free 10} + * Currently 0 buffers allocated + * @cliexend + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = { + .path = "test dpdk buffer", + .short_help = "test dpdk buffer [allocate ] [free ]", + .function = test_dpdk_buffer, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 nb_rx_desc = (u32) ~ 0; + u32 nb_tx_desc = (u32) ~ 0; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "tx %d", &nb_tx_desc)) + ; + else if (unformat (line_input, "rx %d", &nb_rx_desc)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + { + error = + clib_error_return (0, + "number of descriptors can be set only for " + "physical devices"); + goto done; + } + + if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) && + (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc)) + { + error = clib_error_return (0, "nothing changed"); + goto done; + } + + if (nb_rx_desc != (u32) ~ 0) + xd->nb_rx_desc = nb_rx_desc; + + if (nb_tx_desc != (u32) ~ 0) + xd->nb_tx_desc = nb_tx_desc; + + error = dpdk_port_setup (dm, xd); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command sets the number of DPDK 'rx' and + * 'tx' descriptors for the given physical interface. Use + * the command 'show hardware-interface' to display the + * current descriptor allocation. + * + * @cliexpar + * Example of how to set the DPDK interface descriptors: + * @cliexcmd{set dpdk interface descriptors GigabitEthernet0/8/0 rx 512 tx 512} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = { + .path = "set dpdk interface descriptors", + .short_help = "set dpdk interface descriptors [rx ] [tx ]", + .function = set_dpdk_if_desc, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + int cpu; + + if (tm->n_vlib_mains == 1) + vlib_cli_output (vm, "All interfaces are handled by main thread"); + + for (cpu = 0; cpu < vec_len (dm->devices_by_cpu); cpu++) + { + if (cpu >= dm->input_cpu_first_index && + cpu < (dm->input_cpu_first_index + dm->input_cpu_count)) + vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, + vlib_worker_threads[cpu].name, + vlib_worker_threads[cpu].lcore_id); + + /* *INDENT-OFF* */ + vec_foreach(dq, dm->devices_by_cpu[cpu]) + { + u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; + vnet_hw_interface_t * hi = vnet_get_hw_interface(dm->vnet_main, hw_if_index); + vlib_cli_output(vm, " %v queue %u", hi->name, dq->queue_id); + } + /* *INDENT-ON* */ + } + return 0; +} + +/*? + * This command is used to display the thread and core each + * DPDK interface and queue is assigned too. + * + * @cliexpar + * Example of how to display the DPDK interface placement: + * @cliexstart{show dpdk interface placement} + * Thread 1 (vpp_wk_0 at lcore 1): + * GigabitEthernet0/8/0 queue 0 + * GigabitEthernet0/9/0 queue 0 + * Thread 2 (vpp_wk_1 at lcore 2): + * GigabitEthernet0/8/0 queue 1 + * GigabitEthernet0/9/0 queue 1 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_if_placement,static) = { + .path = "show dpdk interface placement", + .short_help = "show dpdk interface placement", + .function = show_dpdk_if_placement, +}; +/* *INDENT-ON* */ + +static int +dpdk_device_queue_sort (void *a1, void *a2) +{ + dpdk_device_and_queue_t *dq1 = a1; + dpdk_device_and_queue_t *dq2 = a2; + + if (dq1->device > dq2->device) + return 1; + else if (dq1->device < dq2->device) + return -1; + else if (dq1->queue_id > dq2->queue_id) + return 1; + else if (dq1->queue_id < dq2->queue_id) + return -1; + else + return 0; +} + +static clib_error_t * +set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 queue = (u32) 0; + u32 cpu = (u32) ~ 0; + int i; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "queue %d", &queue)) + ; + else if (unformat (line_input, "thread %d", &cpu)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + if (cpu < dm->input_cpu_first_index || + cpu >= (dm->input_cpu_first_index + dm->input_cpu_count)) + { + error = clib_error_return (0, "please specify valid thread id"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + for (i = 0; i < vec_len (dm->devices_by_cpu); i++) + { + /* *INDENT-OFF* */ + vec_foreach(dq, dm->devices_by_cpu[i]) + { + if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index && + queue == dq->queue_id) + { + if (cpu == i) /* nothing to do */ + goto done; + + vec_del1(dm->devices_by_cpu[i], dq - dm->devices_by_cpu[i]); + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->queue_id = queue; + dq->device = xd->device_index; + xd->cpu_socket_id_by_queue[queue] = + rte_lcore_to_socket_id(vlib_worker_threads[cpu].lcore_id); + + vec_sort_with_function(dm->devices_by_cpu[i], + dpdk_device_queue_sort); + + vec_sort_with_function(dm->devices_by_cpu[cpu], + dpdk_device_queue_sort); + + if (vec_len(dm->devices_by_cpu[i]) == 0) + vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, + VLIB_NODE_STATE_DISABLED); + + if (vec_len(dm->devices_by_cpu[cpu]) == 1) + vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + + goto done; + } + } + /* *INDENT-ON* */ + } + + error = clib_error_return (0, "not found"); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to assign a given interface, and optionally a + * given queue, to a different thread. This will not create a thread, + * so the thread must already exist. Use '/etc/vpp/startup.conf' + * for the initial thread creation. If the 'queue' is not provided, + * it defaults to 0. + * + * @cliexpar + * Example of how to display the DPDK interface placement: + * @cliexstart{show dpdk interface placement} + * Thread 1 (vpp_wk_0 at lcore 1): + * GigabitEthernet0/8/0 queue 0 + * GigabitEthernet0/9/0 queue 0 + * Thread 2 (vpp_wk_1 at lcore 2): + * GigabitEthernet0/8/0 queue 1 + * GigabitEthernet0/9/0 queue 1 + * @cliexend + * Example of how to assign a DPDK interface and queue to a thread: + * @cliexcmd{set dpdk interface placement GigabitEthernet0/8/0 queue 1 thread 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { + .path = "set dpdk interface placement", + .short_help = "set dpdk interface placement [queue ] thread ", + .function = set_dpdk_if_placement, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + int cpu; + + if (tm->n_vlib_mains == 1) + vlib_cli_output (vm, "All interfaces are handled by main thread"); + + for (cpu = 0; cpu < vec_len (dm->devices_by_hqos_cpu); cpu++) + { + if (cpu >= dm->hqos_cpu_first_index && + cpu < (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) + vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, + vlib_worker_threads[cpu].name, + vlib_worker_threads[cpu].lcore_id); + + vec_foreach (dq, dm->devices_by_hqos_cpu[cpu]) + { + u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; + vnet_hw_interface_t *hi = + vnet_get_hw_interface (dm->vnet_main, hw_if_index); + vlib_cli_output (vm, " %v queue %u", hi->name, dq->queue_id); + } + } + return 0; +} + +/*? + * This command is used to display the thread and core each + * DPDK output interface and HQoS queue is assigned too. + * + * @cliexpar + * Example of how to display the DPDK output interface and HQoS queue placement: + * @cliexstart{show dpdk interface hqos placement} + * Thread 1 (vpp_hqos-threads_0 at lcore 3): + * GigabitEthernet0/8/0 queue 0 + * Thread 2 (vpp_hqos-threads_1 at lcore 4): + * GigabitEthernet0/9/0 queue 0 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos_placement, static) = { + .path = "show dpdk interface hqos placement", + .short_help = "show dpdk interface hqos placement", + .function = show_dpdk_if_hqos_placement, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_and_queue_t *dq; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 cpu = (u32) ~ 0; + int i; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "thread %d", &cpu)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + return clib_error_return (0, "please specify valid interface name"); + + if (cpu < dm->hqos_cpu_first_index || + cpu >= (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) + { + error = clib_error_return (0, "please specify valid thread id"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + for (i = 0; i < vec_len (dm->devices_by_hqos_cpu); i++) + { + vec_foreach (dq, dm->devices_by_hqos_cpu[i]) + { + if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index) + { + if (cpu == i) /* nothing to do */ + goto done; + + vec_del1 (dm->devices_by_hqos_cpu[i], + dq - dm->devices_by_hqos_cpu[i]); + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->queue_id = 0; + dq->device = xd->device_index; + + vec_sort_with_function (dm->devices_by_hqos_cpu[i], + dpdk_device_queue_sort); + + vec_sort_with_function (dm->devices_by_hqos_cpu[cpu], + dpdk_device_queue_sort); + + goto done; + } + } + } + + error = clib_error_return (0, "not found"); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to assign a given DPDK output interface and + * HQoS queue to a different thread. This will not create a thread, + * so the thread must already exist. Use '/etc/vpp/startup.conf' + * for the initial thread creation. See @ref qos_doc for more details. + * + * @cliexpar + * Example of how to display the DPDK output interface and HQoS queue placement: + * @cliexstart{show dpdk interface hqos placement} + * Thread 1 (vpp_hqos-threads_0 at lcore 3): + * GigabitEthernet0/8/0 queue 0 + * Thread 2 (vpp_hqos-threads_1 at lcore 4): + * GigabitEthernet0/9/0 queue 0 + * @cliexend + * Example of how to assign a DPDK output interface and HQoS queue to a thread: + * @cliexcmd{set dpdk interface hqos placement GigabitEthernet0/8/0 thread 2} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_placement, static) = { + .path = "set dpdk interface hqos placement", + .short_help = "set dpdk interface hqos placement thread ", + .function = set_dpdk_if_hqos_placement, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 subport_id = (u32) ~ 0; + u32 pipe_id = (u32) ~ 0; + u32 profile_id = (u32) ~ 0; + int rv; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "subport %d", &subport_id)) + ; + else if (unformat (line_input, "pipe %d", &pipe_id)) + ; + else if (unformat (line_input, "profile %d", &profile_id)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = + rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, + profile_id); + if (rv) + { + error = clib_error_return (0, "pipe configuration failed"); + goto done; + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to change the profile associate with a HQoS pipe. The + * '' is zero based. Use the command + * 'show dpdk interface hqos' to display the content of each profile. + * See @ref qos_doc for more details. + * + * @note + * Currently there is not an API to create a new HQoS pipe profile. One is + * created by default in the code (search for 'hqos_pipe_params_default''). + * Additional profiles can be created in code and code recompiled. Then use this + * command to assign it. + * + * @cliexpar + * Example of how to assign a new profile to a HQoS pipe: + * @cliexcmd{set dpdk interface hqos pipe GigabitEthernet0/8/0 subport 0 pipe 2 profile 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pipe, static) = +{ + .path = "set dpdk interface hqos pipe", + .short_help = "set dpdk interface hqos pipe subport pipe " + "profile ", + .function = set_dpdk_if_hqos_pipe, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = NULL; + u32 hw_if_index = (u32) ~ 0; + u32 subport_id = (u32) ~ 0; + struct rte_sched_subport_params p; + int rv; + clib_error_t *error = NULL; + u32 tb_rate = (u32) ~ 0; + u32 tb_size = (u32) ~ 0; + u32 tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = + { (u32) ~ 0, (u32) ~ 0, (u32) ~ 0, (u32) ~ 0 }; + u32 tc_period = (u32) ~ 0; + dpdk_device_config_t *devconf = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "subport %d", &subport_id)) + ; + else if (unformat (line_input, "rate %d", &tb_rate)) + ; + else if (unformat (line_input, "bktsize %d", &tb_size)) + ; + else if (unformat (line_input, "tc0 %d", &tc_rate[0])) + ; + else if (unformat (line_input, "tc1 %d", &tc_rate[1])) + ; + else if (unformat (line_input, "tc2 %d", &tc_rate[2])) + ; + else if (unformat (line_input, "tc3 %d", &tc_rate[3])) + ; + else if (unformat (line_input, "period %d", &tc_period)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + error = get_hqos (hw_if_index, subport_id, &xd, &devconf); + + if (error == NULL) + { + /* Copy the current values over to local structure. */ + memcpy (&p, &devconf->hqos.subport[subport_id], sizeof (p)); + + /* Update local structure with input values. */ + if (tb_rate != (u32) ~ 0) + { + p.tb_rate = tb_rate; + p.tc_rate[0] = tb_rate; + p.tc_rate[1] = tb_rate; + p.tc_rate[2] = tb_rate; + p.tc_rate[3] = tb_rate; + } + if (tb_size != (u32) ~ 0) + { + p.tb_size = tb_size; + } + if (tc_rate[0] != (u32) ~ 0) + { + p.tc_rate[0] = tc_rate[0]; + } + if (tc_rate[1] != (u32) ~ 0) + { + p.tc_rate[1] = tc_rate[1]; + } + if (tc_rate[2] != (u32) ~ 0) + { + p.tc_rate[2] = tc_rate[2]; + } + if (tc_rate[3] != (u32) ~ 0) + { + p.tc_rate[3] = tc_rate[3]; + } + if (tc_period != (u32) ~ 0) + { + p.tc_period = tc_period; + } + + /* Apply changes. */ + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p); + if (rv) + { + error = clib_error_return (0, "subport configuration failed"); + goto done; + } + else + { + /* Successfully applied, so save of the input values. */ + memcpy (&devconf->hqos.subport[subport_id], &p, sizeof (p)); + } + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to set the subport level parameters such as token + * bucket rate (bytes per seconds), token bucket size (bytes), traffic class + * rates (bytes per seconds) and token update period (Milliseconds). + * + * By default, the 'rate' is set to 1250000000 bytes/second (10GbE + * rate) and each of the four traffic classes is set to 100% of the port rate. + * If the 'rate' is updated by this command, all four traffic classes + * are assigned the same value. Each of the four traffic classes can be updated + * individually. + * + * @cliexpar + * Example of how modify the subport attributes for a 1GbE link: + * @cliexcmd{set dpdk interface hqos subport GigabitEthernet0/8/0 subport 0 rate 125000000} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_subport, static) = { + .path = "set dpdk interface hqos subport", + .short_help = "set dpdk interface hqos subport subport " + "[rate ] [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] " + "[period ]", + .function = set_dpdk_if_hqos_subport, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + u32 tc = (u32) ~ 0; + u32 queue = (u32) ~ 0; + u32 entry = (u32) ~ 0; + u32 val, i; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "entry %d", &entry)) + ; + else if (unformat (line_input, "tc %d", &tc)) + ; + else if (unformat (line_input, "queue %d", &queue)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + if (entry >= 64) + { + error = clib_error_return (0, "invalid entry"); + goto done; + } + if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + { + error = clib_error_return (0, "invalid traffic class"); + goto done; + } + if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + { + error = clib_error_return (0, "invalid traffic class queue"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + /* Should never happen, shut up Coverity warning */ + if (p == 0) + { + error = clib_error_return (0, "no worker registrations?"); + goto done; + } + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + for (i = 0; i < worker_thread_count; i++) + xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to set the traffic class translation table. The + * traffic class translation table is used to map 64 values (0-63) to one of + * four traffic class and one of four HQoS input queue. Use the 'show + * dpdk interface hqos' command to display the traffic class translation + * table. See @ref qos_doc for more details. + * + * This command has the following parameters: + * + * - - Used to specify the output interface. + * + * - entry - Mapped value (0-63) to assign traffic class and queue to. + * + * - tc - Traffic class (0-3) to be used by the provided mapped value. + * + * - queue - HQoS input queue (0-3) to be used by the provided mapped value. + * + * @cliexpar + * Example of how modify the traffic class translation table: + * @cliexcmd{set dpdk interface hqos tctbl GigabitEthernet0/8/0 entry 16 tc 2 queue 2} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_tctbl, static) = { + .path = "set dpdk interface hqos tctbl", + .short_help = "set dpdk interface hqos tctbl entry tc queue ", + .function = set_dpdk_if_hqos_tctbl, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + clib_error_t *error = NULL; + + /* Device specific data */ + struct rte_eth_dev_info dev_info; + dpdk_device_config_t *devconf = 0; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + u32 hw_if_index = (u32) ~ 0; + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + /* Should never happen, shut up Coverity warning */ + if (p == 0) + return clib_error_return (0, "no worker registrations?"); + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + /* Packet field configuration */ + u64 mask = (u64) ~ 0; + u32 id = (u32) ~ 0; + u32 offset = (u32) ~ 0; + + /* HQoS params */ + u32 n_subports_per_port, n_pipes_per_subport, tctbl_size; + + u32 i; + + /* Parse input arguments */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else if (unformat (line_input, "id subport")) + id = 0; + else if (unformat (line_input, "id pipe")) + id = 1; + else if (unformat (line_input, "id tc")) + id = 2; + else if (unformat (line_input, "id %d", &id)) + ; + else if (unformat (line_input, "offset %d", &offset)) + ; + else if (unformat (line_input, "mask %llx", &mask)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + /* Get interface */ + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify valid interface name"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get (xd->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + if (devconf->hqos_enabled == 0) + { + vlib_cli_output (vm, "HQoS disabled for this interface"); + goto done; + } + + n_subports_per_port = devconf->hqos.port.n_subports_per_port; + n_pipes_per_subport = devconf->hqos.port.n_pipes_per_subport; + tctbl_size = RTE_DIM (devconf->hqos.tc_table); + + /* Validate packet field configuration: id, offset and mask */ + if (id >= 3) + { + error = clib_error_return (0, "invalid packet field id"); + goto done; + } + + switch (id) + { + case 0: + if (dpdk_hqos_validate_mask (mask, n_subports_per_port) != 0) + { + error = clib_error_return (0, "invalid subport ID mask " + "(n_subports_per_port = %u)", + n_subports_per_port); + goto done; + } + break; + case 1: + if (dpdk_hqos_validate_mask (mask, n_pipes_per_subport) != 0) + { + error = clib_error_return (0, "invalid pipe ID mask " + "(n_pipes_per_subport = %u)", + n_pipes_per_subport); + goto done; + } + break; + case 2: + default: + if (dpdk_hqos_validate_mask (mask, tctbl_size) != 0) + { + error = clib_error_return (0, "invalid TC table index mask " + "(TC table size = %u)", tctbl_size); + goto done; + } + } + + /* Propagate packet field configuration to all workers */ + for (i = 0; i < worker_thread_count; i++) + switch (id) + { + case 0: + xd->hqos_wt[worker_thread_first + i].hqos_field0_slabpos = offset; + xd->hqos_wt[worker_thread_first + i].hqos_field0_slabmask = mask; + xd->hqos_wt[worker_thread_first + i].hqos_field0_slabshr = + __builtin_ctzll (mask); + break; + case 1: + xd->hqos_wt[worker_thread_first + i].hqos_field1_slabpos = offset; + xd->hqos_wt[worker_thread_first + i].hqos_field1_slabmask = mask; + xd->hqos_wt[worker_thread_first + i].hqos_field1_slabshr = + __builtin_ctzll (mask); + break; + case 2: + default: + xd->hqos_wt[worker_thread_first + i].hqos_field2_slabpos = offset; + xd->hqos_wt[worker_thread_first + i].hqos_field2_slabmask = mask; + xd->hqos_wt[worker_thread_first + i].hqos_field2_slabshr = + __builtin_ctzll (mask); + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to set the packet fields required for classifiying the + * incoming packet. As a result of classification process, packet field + * information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, + * color) and stored in packet mbuf. + * + * This command has the following parameters: + * + * - - Used to specify the output interface. + * + * - id subport|pipe|tc - Classification occurs across three fields. + * This parameter indicates which of the three masks are being configured. Legacy + * code used 0-2 to represent these three fields, so 0-2 is still accepted. + * - subport|0 - Currently only one subport is supported, so only + * an empty mask is supported for the subport classification. + * - pipe|1 - Currently, 4096 pipes per subport are supported, so a + * 12-bit mask should be configure to map to the 0-4095 pipes. + * - tc|2 - The translation table (see 'set dpdk interface hqos + * tctbl' command) maps each value (0-63) into one of the 4 traffic classes + * per pipe. A 6-bit mask should be configure to map this field to a traffic class. + * + * - offset - Offset in the packet to apply the 64-bit mask for classification. + * The offset should be on an 8-byte boundary (0,8,16,24..). + * + * - mask - 64-bit mask to apply to packet at the given 'offset'. + * Bits must be contiguous and should not include '0x'. + * + * The default values for the 'pktfield' assumes Ethernet/IPv4/UDP packets with + * no VLAN. Adjust based on expected packet format and desired classification field. + * - 'subport' is always empty (offset 0 mask 0000000000000000) + * - By default, 'pipe' maps to the UDP payload bits 12 .. 23 (offset 40 + * mask 0000000fff000000) + * - By default, 'tc' maps to the DSCP field in IP header (offset 48 mask + * 00000000000000fc) + * + * @cliexpar + * Example of how modify the 'pipe' classification filter to match VLAN: + * @cliexcmd{set dpdk interface hqos pktfield GigabitEthernet0/8/0 id pipe offset 8 mask 0000000000000FFF} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pktfield, static) = { + .path = "set dpdk interface hqos pktfield", + .short_help = "set dpdk interface hqos pktfield id subport|pipe|tc offset " + "mask ", + .function = set_dpdk_if_hqos_pktfield, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + dpdk_device_config_hqos_t *cfg; + dpdk_device_hqos_per_hqos_thread_t *ht; + dpdk_device_hqos_per_worker_thread_t *wk; + u32 *tctbl; + u32 hw_if_index = (u32) ~ 0; + u32 profile_id, subport_id, i; + struct rte_eth_dev_info dev_info; + dpdk_device_config_t *devconf = 0; + vlib_thread_registration_t *tr; + uword *p = 0; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify interface name!!"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get (xd->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + if (devconf->hqos_enabled == 0) + { + vlib_cli_output (vm, "HQoS disabled for this interface"); + goto done; + } + + /* Detect the set of worker threads */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + + /* Should never happen, shut up Coverity warning */ + if (p == 0) + { + error = clib_error_return (0, "no worker registrations?"); + goto done; + } + + tr = (vlib_thread_registration_t *) p[0]; + + cfg = &devconf->hqos; + ht = xd->hqos_ht; + wk = &xd->hqos_wt[tr->first_index]; + tctbl = wk->hqos_tc_table; + + vlib_cli_output (vm, " Thread:"); + vlib_cli_output (vm, " Input SWQ size = %u packets", cfg->swq_size); + vlib_cli_output (vm, " Enqueue burst size = %u packets", + ht->hqos_burst_enq); + vlib_cli_output (vm, " Dequeue burst size = %u packets", + ht->hqos_burst_deq); + + vlib_cli_output (vm, + " Packet field 0: slab position = %4u, slab bitmask = 0x%016llx (subport)", + wk->hqos_field0_slabpos, wk->hqos_field0_slabmask); + vlib_cli_output (vm, + " Packet field 1: slab position = %4u, slab bitmask = 0x%016llx (pipe)", + wk->hqos_field1_slabpos, wk->hqos_field1_slabmask); + vlib_cli_output (vm, + " Packet field 2: slab position = %4u, slab bitmask = 0x%016llx (tc)", + wk->hqos_field2_slabpos, wk->hqos_field2_slabmask); + vlib_cli_output (vm, + " Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...)"); + vlib_cli_output (vm, + " [ 0 .. 15]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[0] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[0] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[1] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[1] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[2] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[2] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[3] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[3] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[4] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[4] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[5] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[5] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[6] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[6] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[7] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[7] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[8] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[8] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[9] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[9] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[10] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[10] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[11] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[11] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[12] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[12] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[13] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[13] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[14] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[14] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[15] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[15] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, + " [16 .. 31]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[16] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[16] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[17] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[17] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[18] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[18] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[19] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[19] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[20] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[20] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[21] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[21] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[22] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[22] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[23] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[23] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[24] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[24] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[25] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[25] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[26] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[26] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[27] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[27] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[28] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[28] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[29] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[29] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[30] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[30] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[31] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[31] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, + " [32 .. 47]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[32] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[32] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[33] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[33] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[34] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[34] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[35] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[35] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[36] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[36] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[37] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[37] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[38] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[38] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[39] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[39] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[40] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[40] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[41] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[41] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[42] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[42] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[43] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[43] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[44] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[44] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[45] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[45] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[46] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[46] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[47] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[47] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, + " [48 .. 63]: " + "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", + tctbl[48] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[48] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[49] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[49] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[50] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[50] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[51] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[51] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[52] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[52] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[53] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[53] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[54] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[54] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[55] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[55] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[56] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[56] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[57] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[57] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[58] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[58] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[59] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[59] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[60] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[60] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[61] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[61] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[62] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[62] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[63] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, + tctbl[63] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + vlib_cli_output (vm, " Port:"); + vlib_cli_output (vm, " Rate = %u bytes/second", cfg->port.rate); + vlib_cli_output (vm, " MTU = %u bytes", cfg->port.mtu); + vlib_cli_output (vm, " Frame overhead = %u bytes", + cfg->port.frame_overhead); + vlib_cli_output (vm, " Number of subports = %u", + cfg->port.n_subports_per_port); + vlib_cli_output (vm, " Number of pipes per subport = %u", + cfg->port.n_pipes_per_subport); + vlib_cli_output (vm, + " Packet queue size: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u packets", + cfg->port.qsize[0], cfg->port.qsize[1], cfg->port.qsize[2], + cfg->port.qsize[3]); + vlib_cli_output (vm, " Number of pipe profiles = %u", + cfg->port.n_pipe_profiles); + + for (subport_id = 0; subport_id < vec_len (cfg->subport); subport_id++) + { + vlib_cli_output (vm, " Subport %u:", subport_id); + vlib_cli_output (vm, " Rate = %u bytes/second", + cfg->subport[subport_id].tb_rate); + vlib_cli_output (vm, " Token bucket size = %u bytes", + cfg->subport[subport_id].tb_size); + vlib_cli_output (vm, + " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", + cfg->subport[subport_id].tc_rate[0], + cfg->subport[subport_id].tc_rate[1], + cfg->subport[subport_id].tc_rate[2], + cfg->subport[subport_id].tc_rate[3]); + vlib_cli_output (vm, " TC period = %u milliseconds", + cfg->subport[subport_id].tc_period); + } + + for (profile_id = 0; profile_id < vec_len (cfg->pipe); profile_id++) + { + vlib_cli_output (vm, " Pipe profile %u:", profile_id); + vlib_cli_output (vm, " Rate = %u bytes/second", + cfg->pipe[profile_id].tb_rate); + vlib_cli_output (vm, " Token bucket size = %u bytes", + cfg->pipe[profile_id].tb_size); + vlib_cli_output (vm, + " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", + cfg->pipe[profile_id].tc_rate[0], + cfg->pipe[profile_id].tc_rate[1], + cfg->pipe[profile_id].tc_rate[2], + cfg->pipe[profile_id].tc_rate[3]); + vlib_cli_output (vm, " TC period = %u milliseconds", + cfg->pipe[profile_id].tc_period); +#ifdef RTE_SCHED_SUBPORT_TC_OV + vlib_cli_output (vm, " TC3 oversubscription_weight = %u", + cfg->pipe[profile_id].tc_ov_weight); +#endif + + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + { + vlib_cli_output (vm, + " TC%u WRR weights: Q0 = %u, Q1 = %u, Q2 = %u, Q3 = %u", + i, cfg->pipe[profile_id].wrr_weights[i * 4], + cfg->pipe[profile_id].wrr_weights[i * 4 + 1], + cfg->pipe[profile_id].wrr_weights[i * 4 + 2], + cfg->pipe[profile_id].wrr_weights[i * 4 + 3]); + } + } + +#ifdef RTE_SCHED_RED + vlib_cli_output (vm, " Weighted Random Early Detection (WRED):"); + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + { + vlib_cli_output (vm, " TC%u min: G = %u, Y = %u, R = %u", i, + cfg->port.red_params[i][e_RTE_METER_GREEN].min_th, + cfg->port.red_params[i][e_RTE_METER_YELLOW].min_th, + cfg->port.red_params[i][e_RTE_METER_RED].min_th); + + vlib_cli_output (vm, " TC%u max: G = %u, Y = %u, R = %u", i, + cfg->port.red_params[i][e_RTE_METER_GREEN].max_th, + cfg->port.red_params[i][e_RTE_METER_YELLOW].max_th, + cfg->port.red_params[i][e_RTE_METER_RED].max_th); + + vlib_cli_output (vm, + " TC%u inverted probability: G = %u, Y = %u, R = %u", + i, cfg->port.red_params[i][e_RTE_METER_GREEN].maxp_inv, + cfg->port.red_params[i][e_RTE_METER_YELLOW].maxp_inv, + cfg->port.red_params[i][e_RTE_METER_RED].maxp_inv); + + vlib_cli_output (vm, " TC%u weight: R = %u, Y = %u, R = %u", i, + cfg->port.red_params[i][e_RTE_METER_GREEN].wq_log2, + cfg->port.red_params[i][e_RTE_METER_YELLOW].wq_log2, + cfg->port.red_params[i][e_RTE_METER_RED].wq_log2); + } +#endif + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to display details of an output interface's HQoS + * settings. + * + * @cliexpar + * Example of how to display HQoS settings for an interfaces: + * @cliexstart{show dpdk interface hqos GigabitEthernet0/8/0} + * Thread: + * Input SWQ size = 4096 packets + * Enqueue burst size = 256 packets + * Dequeue burst size = 220 packets + * Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) + * Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) + * Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) + * Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) + * [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + * Port: + * Rate = 1250000000 bytes/second + * MTU = 1514 bytes + * Frame overhead = 24 bytes + * Number of subports = 1 + * Number of pipes per subport = 4096 + * Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets + * Number of pipe profiles = 2 + * Subport 0: + * Rate = 1250000000 bytes/second + * Token bucket size = 1000000 bytes + * Traffic class rate: TC0 = 1250000000, TC1 = 1250000000, TC2 = 1250000000, TC3 = 1250000000 bytes/second + * TC period = 10 milliseconds + * Pipe profile 0: + * Rate = 305175 bytes/second + * Token bucket size = 1000000 bytes + * Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second + * TC period = 40 milliseconds + * TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos, static) = { + .path = "show dpdk interface hqos", + .short_help = "show dpdk interface hqos ", + .function = show_dpdk_if_hqos, +}; + +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; +#ifdef RTE_SCHED_COLLECT_STATS + dpdk_main_t *dm = &dpdk_main; + u32 hw_if_index = (u32) ~ 0; + u32 subport = (u32) ~ 0; + u32 pipe = (u32) ~ 0; + u32 tc = (u32) ~ 0; + u32 tc_q = (u32) ~ 0; + vnet_hw_interface_t *hw; + dpdk_device_t *xd; + uword *p = 0; + struct rte_eth_dev_info dev_info; + dpdk_device_config_t *devconf = 0; + u32 qindex; + struct rte_sched_queue_stats stats; + u16 qlen; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, + &hw_if_index)) + ; + + else if (unformat (line_input, "subport %d", &subport)) + ; + + else if (unformat (line_input, "pipe %d", &pipe)) + ; + + else if (unformat (line_input, "tc %d", &tc)) + ; + + else if (unformat (line_input, "tc_q %d", &tc_q)) + ; + + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (hw_if_index == (u32) ~ 0) + { + error = clib_error_return (0, "please specify interface name!!"); + goto done; + } + + hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rte_eth_dev_info_get (xd->device_index, &dev_info); + if (dev_info.pci_dev) + { /* bonded interface has no pci info */ + vlib_pci_addr_t pci_addr; + + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + + p = + hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + if (devconf->hqos_enabled == 0) + { + vlib_cli_output (vm, "HQoS disabled for this interface"); + goto done; + } + + /* + * Figure out which queue to query. cf rte_sched_port_qindex. (Not sure why + * that method isn't made public by DPDK - how _should_ we get the queue ID?) + */ + qindex = subport * devconf->hqos.port.n_pipes_per_subport + pipe; + qindex = qindex * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + tc; + qindex = qindex * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + tc_q; + + if (rte_sched_queue_read_stats (xd->hqos_ht->hqos, qindex, &stats, &qlen) != + 0) + { + error = clib_error_return (0, "failed to read stats"); + goto done; + } + + vlib_cli_output (vm, "%=24s%=16s", "Stats Parameter", "Value"); + vlib_cli_output (vm, "%=24s%=16d", "Packets", stats.n_pkts); + vlib_cli_output (vm, "%=24s%=16d", "Packets dropped", stats.n_pkts_dropped); +#ifdef RTE_SCHED_RED + vlib_cli_output (vm, "%=24s%=16d", "Packets dropped (RED)", + stats.n_pkts_red_dropped); +#endif + vlib_cli_output (vm, "%=24s%=16d", "Bytes", stats.n_bytes); + vlib_cli_output (vm, "%=24s%=16d", "Bytes dropped", stats.n_bytes_dropped); + +#else + + /* Get a line of input */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + vlib_cli_output (vm, "RTE_SCHED_COLLECT_STATS disabled in DPDK"); + goto done; + +#endif + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to display statistics associated with a HQoS traffic class + * queue. + * + * @note + * Statistic collection by the scheduler is disabled by default in DPDK. In order to + * turn it on, add the following line to '../vpp/dpdk/Makefile': + * - $(call set,RTE_SCHED_COLLECT_STATS,y) + * + * @cliexpar + * Example of how to display statistics of HQoS a HQoS traffic class queue: + * @cliexstart{show dpdk hqos queue GigabitEthernet0/9/0 subport 0 pipe 3181 tc 0 tc_q 0} + * Stats Parameter Value + * Packets 140 + * Packets dropped 0 + * Bytes 8400 + * Bytes dropped 0 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = { + .path = "show dpdk hqos queue", + .short_help = "show dpdk hqos queue subport pipe tc tc_q ", + .function = show_dpdk_hqos_queue_stats, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_dpdk_version_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ +#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c); + _("DPDK Version", "%s", rte_version ()); + _("DPDK EAL init args", "%s", dpdk_config_main.eal_init_args_str); +#undef _ + return 0; +} + +/*? + * This command is used to display the current DPDK version and + * the list of arguments passed to DPDK when started. + * + * @cliexpar + * Example of how to display how many DPDK buffer test command has allcoated: + * @cliexstart{show dpdk version} + * DPDK Version: DPDK 16.11.0 + * DPDK EAL init args: -c 1 -n 4 --huge-dir /run/vpp/hugepages --file-prefix vpp -w 0000:00:08.0 -w 0000:00:09.0 --master-lcore 0 --socket-mem 256 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_vpe_version_command, static) = { + .path = "show dpdk version", + .short_help = "show dpdk version", + .function = show_dpdk_version_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +dpdk_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c new file mode 100644 index 00000000..50b26689 --- /dev/null +++ b/src/plugins/dpdk/device/device.c @@ -0,0 +1,852 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#define foreach_dpdk_tx_func_error \ + _(BAD_RETVAL, "DPDK tx function returned an error") \ + _(RING_FULL, "Tx packet drops (ring full)") \ + _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \ + _(REPL_FAIL, "Tx packet drops (replication failure)") + +typedef enum +{ +#define _(f,s) DPDK_TX_FUNC_ERROR_##f, + foreach_dpdk_tx_func_error +#undef _ + DPDK_TX_FUNC_N_ERROR, +} dpdk_tx_func_error_t; + +static char *dpdk_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_dpdk_tx_func_error +#undef _ +}; + +clib_error_t * +dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address) +{ + int error; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + error = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) address); + + if (error) + { + return clib_error_return (0, "mac address set failed: %d", error); + } + else + { + vec_reset_length (xd->default_mac_address); + vec_add (xd->default_mac_address, address, sizeof (address)); + return NULL; + } +} + +clib_error_t * +dpdk_set_mc_filter (vnet_hw_interface_t * hi, + struct ether_addr mc_addr_vec[], int naddr) +{ + int error; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + error = rte_eth_dev_set_mc_addr_list (xd->device_index, mc_addr_vec, naddr); + + if (error) + { + return clib_error_return (0, "mc addr list failed: %d", error); + } + else + { + return NULL; + } +} + +struct rte_mbuf * +dpdk_replicate_packet_mb (vlib_buffer_t * b) +{ + dpdk_main_t *dm = &dpdk_main; + struct rte_mbuf **mbufs = 0, *s, *d; + u8 nb_segs; + unsigned socket_id = rte_socket_id (); + int i; + + ASSERT (dm->pktmbuf_pools[socket_id]); + s = rte_mbuf_from_vlib_buffer (b); + nb_segs = s->nb_segs; + vec_validate (mbufs, nb_segs - 1); + + if (rte_pktmbuf_alloc_bulk (dm->pktmbuf_pools[socket_id], mbufs, nb_segs)) + { + vec_free (mbufs); + return 0; + } + + d = mbufs[0]; + d->nb_segs = s->nb_segs; + d->data_len = s->data_len; + d->pkt_len = s->pkt_len; + d->data_off = s->data_off; + clib_memcpy (d->buf_addr, s->buf_addr, RTE_PKTMBUF_HEADROOM + s->data_len); + + for (i = 1; i < nb_segs; i++) + { + d->next = mbufs[i]; + d = mbufs[i]; + s = s->next; + d->data_len = s->data_len; + clib_memcpy (d->buf_addr, s->buf_addr, + RTE_PKTMBUF_HEADROOM + s->data_len); + } + + d = mbufs[0]; + vec_free (mbufs); + return d; +} + +static void +dpdk_tx_trace_buffer (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 buffer_index, vlib_buffer_t * buffer) +{ + vlib_main_t *vm = vlib_get_main (); + dpdk_tx_dma_trace_t *t0; + struct rte_mbuf *mb; + + mb = rte_mbuf_from_vlib_buffer (buffer); + + t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0])); + t0->queue_index = queue_id; + t0->device_index = xd->device_index; + t0->buffer_index = buffer_index; + clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); + clib_memcpy (&t0->buffer, buffer, + sizeof (buffer[0]) - sizeof (buffer->pre_data)); + clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data, + sizeof (t0->buffer.pre_data)); +} + +static_always_inline void +dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b, + int maybe_multiseg) +{ + struct rte_mbuf *mb, *first_mb, *last_mb; + + /* buffer is coming from non-dpdk source so we need to init + rte_mbuf header */ + if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_EXT_HDR_VALID) == 0)) + { + vlib_buffer_t *b2 = b; + last_mb = mb = rte_mbuf_from_vlib_buffer (b2); + rte_pktmbuf_reset (mb); + while (maybe_multiseg && (b2->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + b2 = vlib_get_buffer (vm, b2->next_buffer); + mb = rte_mbuf_from_vlib_buffer (b2); + rte_pktmbuf_reset (mb); + } + } + + last_mb = first_mb = mb = rte_mbuf_from_vlib_buffer (b); + first_mb->nb_segs = 1; + mb->data_len = b->current_length; + mb->pkt_len = maybe_multiseg ? vlib_buffer_length_in_chain (vm, b) : + b->current_length; + mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; + + while (maybe_multiseg && (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + b = vlib_get_buffer (vm, b->next_buffer); + mb = rte_mbuf_from_vlib_buffer (b); + last_mb->next = mb; + last_mb = mb; + mb->data_len = b->current_length; + mb->pkt_len = b->current_length; + mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; + first_mb->nb_segs++; + if (PREDICT_FALSE (b->n_add_refs)) + { + rte_mbuf_refcnt_update (mb, b->n_add_refs); + b->n_add_refs = 0; + } + } +} + +/* + * This function calls the dpdk's tx_burst function to transmit the packets + * on the tx_vector. It manages a lock per-device if the device does not + * support multiple queues. It returns the number of packets untransmitted + * on the tx_vector. If all packets are transmitted (the normal case), the + * function returns 0. + * + * The function assumes there is at least one packet on the tx_vector. + */ +static_always_inline + u32 tx_burst_vector_internal (vlib_main_t * vm, + dpdk_device_t * xd, + struct rte_mbuf **tx_vector) +{ + dpdk_main_t *dm = &dpdk_main; + u32 n_packets; + u32 tx_head; + u32 tx_tail; + u32 n_retry; + int rv; + int queue_id; + tx_ring_hdr_t *ring; + + ring = vec_header (tx_vector, sizeof (*ring)); + + n_packets = ring->tx_head - ring->tx_tail; + + tx_head = ring->tx_head % xd->nb_tx_desc; + + /* + * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to + * unpredictable results. + */ + ASSERT (n_packets > 0); + + /* + * Check for tx_vector overflow. If this fails it is a system configuration + * error. The ring should be sized big enough to handle the largest un-flowed + * off burst from a traffic manager. A larger size also helps performance + * a bit because it decreases the probability of having to issue two tx_burst + * calls due to a ring wrap. + */ + ASSERT (n_packets < xd->nb_tx_desc); + ASSERT (ring->tx_tail == 0); + + n_retry = 16; + queue_id = vm->cpu_index; + + do + { + /* start the burst at the tail */ + tx_tail = ring->tx_tail % xd->nb_tx_desc; + + /* + * This device only supports one TX queue, + * and we're running multi-threaded... + */ + if (PREDICT_FALSE (xd->lockp != 0)) + { + queue_id = queue_id % xd->tx_q_used; + while (__sync_lock_test_and_set (xd->lockp[queue_id], 1)) + /* zzzz */ + queue_id = (queue_id + 1) % xd->tx_q_used; + } + + if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */ + { + /* no wrap, transmit in one burst */ + dpdk_device_hqos_per_worker_thread_t *hqos = + &xd->hqos_wt[vm->cpu_index]; + + ASSERT (hqos->swq != NULL); + + dpdk_hqos_metadata_set (hqos, + &tx_vector[tx_tail], tx_head - tx_tail); + rv = rte_ring_sp_enqueue_burst (hqos->swq, + (void **) &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); + } + else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) + { + /* no wrap, transmit in one burst */ + rv = rte_eth_tx_burst (xd->device_index, + (uint16_t) queue_id, + &tx_vector[tx_tail], + (uint16_t) (tx_head - tx_tail)); + } + else + { + ASSERT (0); + rv = 0; + } + + if (PREDICT_FALSE (xd->lockp != 0)) + *xd->lockp[queue_id] = 0; + + if (PREDICT_FALSE (rv < 0)) + { + // emit non-fatal message, bump counter + vnet_main_t *vnm = dm->vnet_main; + vnet_interface_main_t *im = &vnm->interface_main; + u32 node_index; + + node_index = vec_elt_at_index (im->hw_interfaces, + xd->vlib_hw_if_index)->tx_node_index; + + vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1); + clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index, + rv); + return n_packets; // untransmitted packets + } + ring->tx_tail += (u16) rv; + n_packets -= (uint16_t) rv; + } + while (rv && n_packets && (n_retry > 0)); + + return n_packets; +} + +static_always_inline void +dpdk_prefetch_buffer_by_index (vlib_main_t * vm, u32 bi) +{ + vlib_buffer_t *b; + struct rte_mbuf *mb; + b = vlib_get_buffer (vm, bi); + mb = rte_mbuf_from_vlib_buffer (b); + CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); +} + +static_always_inline void +dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp) +{ + dpdk_main_t *dm = &dpdk_main; + u32 my_cpu = vm->cpu_index; + struct rte_mbuf *mb_new; + + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0) + return; + + mb_new = dpdk_replicate_packet_mb (b); + if (PREDICT_FALSE (mb_new == 0)) + { + vlib_error_count (vm, node->node_index, + DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); + b->flags |= VLIB_BUFFER_REPL_FAIL; + } + else + *mbp = mb_new; + + vec_add1 (dm->recycle[my_cpu], bi); +} + +/* + * Transmits the packets on the frame to the interface associated with the + * node. It first copies packets on the frame to a tx_vector containing the + * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal + * which calls the dpdk tx_burst function. + */ +static uword +dpdk_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * f) +{ + dpdk_main_t *dm = &dpdk_main; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance); + u32 n_packets = f->n_vectors; + u32 n_left; + u32 *from; + struct rte_mbuf **tx_vector; + u16 i; + u16 nb_tx_desc = xd->nb_tx_desc; + int queue_id; + u32 my_cpu; + u32 tx_pkts = 0; + tx_ring_hdr_t *ring; + u32 n_on_ring; + + my_cpu = vm->cpu_index; + + queue_id = my_cpu; + + tx_vector = xd->tx_vectors[queue_id]; + ring = vec_header (tx_vector, sizeof (*ring)); + + n_on_ring = ring->tx_head - ring->tx_tail; + from = vlib_frame_vector_args (f); + + ASSERT (n_packets <= VLIB_FRAME_SIZE); + + if (PREDICT_FALSE (n_on_ring + n_packets > nb_tx_desc)) + { + /* + * Overflowing the ring should never happen. + * If it does then drop the whole frame. + */ + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL, + n_packets); + + while (n_packets--) + { + u32 bi0 = from[n_packets]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer (b0); + rte_pktmbuf_free (mb0); + } + return n_on_ring; + } + + if (PREDICT_FALSE (dm->tx_pcap_enable)) + { + n_left = n_packets; + while (n_left > 0) + { + u32 bi0 = from[0]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + if (dm->pcap_sw_if_index == 0 || + dm->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_TX]) + pcap_add_buffer (&dm->pcap_main, vm, bi0, 512); + from++; + n_left--; + } + } + + from = vlib_frame_vector_args (f); + n_left = n_packets; + i = ring->tx_head % nb_tx_desc; + + while (n_left >= 8) + { + u32 bi0, bi1, bi2, bi3; + struct rte_mbuf *mb0, *mb1, *mb2, *mb3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 or_flags; + + dpdk_prefetch_buffer_by_index (vm, from[4]); + dpdk_prefetch_buffer_by_index (vm, from[5]); + dpdk_prefetch_buffer_by_index (vm, from[6]); + dpdk_prefetch_buffer_by_index (vm, from[7]); + + bi0 = from[0]; + bi1 = from[1]; + bi2 = from[2]; + bi3 = from[3]; + from += 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + or_flags = b0->flags | b1->flags | b2->flags | b3->flags; + + if (or_flags & VLIB_BUFFER_NEXT_PRESENT) + { + dpdk_validate_rte_mbuf (vm, b0, 1); + dpdk_validate_rte_mbuf (vm, b1, 1); + dpdk_validate_rte_mbuf (vm, b2, 1); + dpdk_validate_rte_mbuf (vm, b3, 1); + } + else + { + dpdk_validate_rte_mbuf (vm, b0, 0); + dpdk_validate_rte_mbuf (vm, b1, 0); + dpdk_validate_rte_mbuf (vm, b2, 0); + dpdk_validate_rte_mbuf (vm, b3, 0); + } + + mb0 = rte_mbuf_from_vlib_buffer (b0); + mb1 = rte_mbuf_from_vlib_buffer (b1); + mb2 = rte_mbuf_from_vlib_buffer (b2); + mb3 = rte_mbuf_from_vlib_buffer (b3); + + if (PREDICT_FALSE (or_flags & VLIB_BUFFER_RECYCLE)) + { + dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); + dpdk_buffer_recycle (vm, node, b1, bi1, &mb1); + dpdk_buffer_recycle (vm, node, b2, bi2, &mb2); + dpdk_buffer_recycle (vm, node, b3, bi3, &mb3); + + /* dont enqueue packets if replication failed as they must + be sent back to recycle */ + if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb0; + if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb1; + if (PREDICT_TRUE ((b2->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb2; + if (PREDICT_TRUE ((b3->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + tx_vector[i++ % nb_tx_desc] = mb3; + } + else + { + if (PREDICT_FALSE (i + 3 >= nb_tx_desc)) + { + tx_vector[i++ % nb_tx_desc] = mb0; + tx_vector[i++ % nb_tx_desc] = mb1; + tx_vector[i++ % nb_tx_desc] = mb2; + tx_vector[i++ % nb_tx_desc] = mb3; + i %= nb_tx_desc; + } + else + { + tx_vector[i++] = mb0; + tx_vector[i++] = mb1; + tx_vector[i++] = mb2; + tx_vector[i++] = mb3; + } + } + + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); + if (b1->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1); + if (b2->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi2, b2); + if (b3->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi3, b3); + } + + n_left -= 4; + } + while (n_left > 0) + { + u32 bi0; + struct rte_mbuf *mb0; + vlib_buffer_t *b0; + + bi0 = from[0]; + from++; + + b0 = vlib_get_buffer (vm, bi0); + + dpdk_validate_rte_mbuf (vm, b0, 1); + + mb0 = rte_mbuf_from_vlib_buffer (b0); + dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + if (b0->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); + + if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) + { + tx_vector[i % nb_tx_desc] = mb0; + i++; + } + n_left--; + } + + /* account for additional packets in the ring */ + ring->tx_head += n_packets; + n_on_ring = ring->tx_head - ring->tx_tail; + + /* transmit as many packets as possible */ + n_packets = tx_burst_vector_internal (vm, xd, tx_vector); + + /* + * tx_pkts is the number of packets successfully transmitted + * This is the number originally on ring minus the number remaining on ring + */ + tx_pkts = n_on_ring - n_packets; + + { + /* If there is no callback then drop any non-transmitted packets */ + if (PREDICT_FALSE (n_packets)) + { + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + n_packets); + + vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, + n_packets); + + while (n_packets--) + rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]); + } + + /* Reset head/tail to avoid unnecessary wrap */ + ring->tx_head = 0; + ring->tx_tail = 0; + } + + /* Recycle replicated buffers */ + if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu]))) + { + vlib_buffer_free (vm, dm->recycle[my_cpu], + vec_len (dm->recycle[my_cpu])); + _vec_len (dm->recycle[my_cpu]) = 0; + } + + ASSERT (ring->tx_head >= ring->tx_tail); + + return tx_pkts; +} + +static void +dpdk_clear_hw_interface_counters (u32 instance) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, instance); + + /* + * Set the "last_cleared_stats" to the current stats, so that + * things appear to clear from a display perspective. + */ + dpdk_update_counters (xd, vlib_time_now (dm->vlib_main)); + + clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof (xd->stats)); + clib_memcpy (xd->last_cleared_xstats, xd->xstats, + vec_len (xd->last_cleared_xstats) * + sizeof (xd->last_cleared_xstats[0])); + +} + +static clib_error_t * +dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); + uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hif->dev_instance); + int rv = 0; + + if (is_up) + { + f64 now = vlib_time_now (dm->vlib_main); + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) + { + rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + } + + if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) + rte_eth_promiscuous_enable (xd->device_index); + else + rte_eth_promiscuous_disable (xd->device_index); + + rte_eth_allmulticast_enable (xd->device_index); + xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP; + dpdk_update_counters (xd, now); + dpdk_update_link_state (xd, now); + } + else + { + xd->flags &= ~DPDK_DEVICE_FLAG_ADMIN_UP; + + rte_eth_allmulticast_disable (xd->device_index); + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); + rte_eth_dev_stop (xd->device_index); + + /* For bonded interface, stop slave links */ + if (xd->pmd == VNET_DPDK_PMD_BOND) + { + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16); + while (nlink >= 1) + { + u8 dpdk_port = slink[--nlink]; + rte_eth_dev_stop (dpdk_port); + } + } + } + + if (rv < 0) + clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop", rv); + + return /* no error */ 0; +} + +/* + * Dynamically redirect all pkts from a specific interface + * to the specified node + */ +static void +dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + dpdk_main_t *xm = &dpdk_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + xd->per_interface_next_index = node_index; + return; + } + + xd->per_interface_next_index = + vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index); +} + + +static clib_error_t * +dpdk_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + dpdk_main_t *xm = &dpdk_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + vnet_sw_interface_t *t = (vnet_sw_interface_t *) st; + int r, vlan_offload; + u32 prev_subifs = xd->num_subifs; + clib_error_t *err = 0; + + if (is_add) + xd->num_subifs++; + else if (xd->num_subifs) + xd->num_subifs--; + + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + goto done; + + /* currently we program VLANS only for IXGBE VF and I40E VF */ + if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && (xd->pmd != VNET_DPDK_PMD_I40EVF)) + goto done; + + if (t->sub.eth.flags.no_tags == 1) + goto done; + + if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1)) + { + xd->num_subifs = prev_subifs; + err = clib_error_return (0, "unsupported VLAN setup"); + goto done; + } + + vlan_offload = rte_eth_dev_get_vlan_offload (xd->device_index); + vlan_offload |= ETH_VLAN_FILTER_OFFLOAD; + + if ((r = rte_eth_dev_set_vlan_offload (xd->device_index, vlan_offload))) + { + xd->num_subifs = prev_subifs; + err = clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d", + xd->device_index, r); + goto done; + } + + + if ((r = + rte_eth_dev_vlan_filter (xd->device_index, t->sub.eth.outer_vlan_id, + is_add))) + { + xd->num_subifs = prev_subifs; + err = clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d", + xd->device_index, r); + goto done; + } + +done: + if (xd->num_subifs) + xd->flags |= DPDK_DEVICE_FLAG_HAVE_SUBIF; + else + xd->flags &= ~DPDK_DEVICE_FLAG_HAVE_SUBIF; + + return err; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (dpdk_device_class) = { + .name = "dpdk", + .tx_function = dpdk_interface_tx, + .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR, + .tx_function_error_strings = dpdk_tx_func_error_strings, + .format_device_name = format_dpdk_device_name, + .format_device = format_dpdk_device, + .format_tx_trace = format_dpdk_tx_dma_trace, + .clear_counters = dpdk_clear_hw_interface_counters, + .admin_up_down_function = dpdk_interface_admin_up_down, + .subif_add_del_function = dpdk_subif_add_del_function, + .rx_redirect_to_node = dpdk_set_interface_next_node, + .mac_addr_change_function = dpdk_set_mac_address, +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx) +/* *INDENT-ON* */ + +#define UP_DOWN_FLAG_EVENT 1 + +uword +admin_up_down_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + clib_error_t *error = 0; + uword event_type; + uword *event_data = 0; + u32 sw_if_index; + u32 flags; + + while (1) + { + vlib_process_wait_for_event (vm); + + event_type = vlib_process_get_events (vm, &event_data); + + dpdk_main.admin_up_down_in_progress = 1; + + switch (event_type) + { + case UP_DOWN_FLAG_EVENT: + { + if (vec_len (event_data) == 2) + { + sw_if_index = event_data[0]; + flags = event_data[1]; + error = + vnet_sw_interface_set_flags (vnet_get_main (), sw_if_index, + flags); + clib_error_report (error); + } + } + break; + } + + vec_reset_length (event_data); + + dpdk_main.admin_up_down_in_progress = 0; + + } + return 0; /* or not */ +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (admin_up_down_process_node,static) = { + .function = admin_up_down_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "admin-up-down-process", + .process_log2_n_stack_bytes = 17, // 256KB +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h new file mode 100644 index 00000000..2a1a6205 --- /dev/null +++ b/src/plugins/dpdk/device/dpdk.h @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_dpdk_h__ +#define __included_dpdk_h__ + +/* $$$$ We should rename always_inline -> clib_always_inline */ +#undef always_inline + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if CLIB_DEBUG > 0 +#define always_inline static inline +#else +#define always_inline static inline __attribute__ ((__always_inline__)) +#endif + +#include + +#define NB_MBUF (16<<10) + +extern vnet_device_class_t dpdk_device_class; +extern vlib_node_registration_t dpdk_input_node; +extern vlib_node_registration_t handoff_dispatch_node; + +#define foreach_dpdk_pmd \ + _ ("net_thunderx", THUNDERX) \ + _ ("net_e1000_em", E1000EM) \ + _ ("net_e1000_igb", IGB) \ + _ ("net_e1000_igb_vf", IGBVF) \ + _ ("net_ixgbe", IXGBE) \ + _ ("net_ixgbe_vf", IXGBEVF) \ + _ ("net_i40e", I40E) \ + _ ("net_i40e_vf", I40EVF) \ + _ ("net_virtio", VIRTIO) \ + _ ("net_enic", ENIC) \ + _ ("net_vmxnet3", VMXNET3) \ + _ ("AF_PACKET PMD", AF_PACKET) \ + _ ("rte_bond_pmd", BOND) \ + _ ("net_fm10k", FM10K) \ + _ ("net_cxgbe", CXGBE) \ + _ ("net_mlx5", MLX5) \ + _ ("net_dpaa2", DPAA2) + +typedef enum +{ + VNET_DPDK_PMD_NONE, +#define _(s,f) VNET_DPDK_PMD_##f, + foreach_dpdk_pmd +#undef _ + VNET_DPDK_PMD_UNKNOWN, /* must be last */ +} dpdk_pmd_t; + +typedef enum +{ + VNET_DPDK_PORT_TYPE_ETH_1G, + VNET_DPDK_PORT_TYPE_ETH_10G, + VNET_DPDK_PORT_TYPE_ETH_40G, + VNET_DPDK_PORT_TYPE_ETH_100G, + VNET_DPDK_PORT_TYPE_ETH_BOND, + VNET_DPDK_PORT_TYPE_ETH_SWITCH, + VNET_DPDK_PORT_TYPE_AF_PACKET, + VNET_DPDK_PORT_TYPE_UNKNOWN, +} dpdk_port_type_t; + +/* + * The header for the tx_vector in dpdk_device_t. + * Head and tail are indexes into the tx_vector and are of type + * u64 so they never overflow. + */ +typedef struct +{ + u64 tx_head; + u64 tx_tail; +} tx_ring_hdr_t; + +typedef struct +{ + struct rte_ring *swq; + + u64 hqos_field0_slabmask; + u32 hqos_field0_slabpos; + u32 hqos_field0_slabshr; + u64 hqos_field1_slabmask; + u32 hqos_field1_slabpos; + u32 hqos_field1_slabshr; + u64 hqos_field2_slabmask; + u32 hqos_field2_slabpos; + u32 hqos_field2_slabshr; + u32 hqos_tc_table[64]; +} dpdk_device_hqos_per_worker_thread_t; + +typedef struct +{ + struct rte_ring **swq; + struct rte_mbuf **pkts_enq; + struct rte_mbuf **pkts_deq; + struct rte_sched_port *hqos; + u32 hqos_burst_enq; + u32 hqos_burst_deq; + u32 pkts_enq_len; + u32 swq_pos; + u32 flush_count; +} dpdk_device_hqos_per_hqos_thread_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + volatile u32 **lockp; + + /* Instance ID */ + u32 device_index; + + u32 vlib_hw_if_index; + u32 vlib_sw_if_index; + + /* next node index if we decide to steal the rx graph arc */ + u32 per_interface_next_index; + + /* dpdk rte_mbuf rx and tx vectors, VLIB_FRAME_SIZE */ + struct rte_mbuf ***tx_vectors; /* one per worker thread */ + struct rte_mbuf ***rx_vectors; + + /* vector of traced contexts, per device */ + u32 **d_trace_buffers; + + dpdk_pmd_t pmd:8; + i8 cpu_socket; + + u16 flags; +#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0) +#define DPDK_DEVICE_FLAG_PROMISC (1 << 1) +#define DPDK_DEVICE_FLAG_PMD (1 << 2) +#define DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE (1 << 3) +#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4) +#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5) +#define DPDK_DEVICE_FLAG_HQOS (1 << 6) + + u16 nb_tx_desc; + CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); + + u8 *interface_name_suffix; + + /* number of sub-interfaces */ + u16 num_subifs; + + /* PMD related */ + u16 tx_q_used; + u16 rx_q_used; + u16 nb_rx_desc; + u16 *cpu_socket_id_by_queue; + struct rte_eth_conf port_conf; + struct rte_eth_txconf tx_conf; + + /* HQoS related */ + dpdk_device_hqos_per_worker_thread_t *hqos_wt; + dpdk_device_hqos_per_hqos_thread_t *hqos_ht; + + /* af_packet */ + u8 af_packet_port_id; + + struct rte_eth_link link; + f64 time_last_link_update; + + struct rte_eth_stats stats; + struct rte_eth_stats last_stats; + struct rte_eth_stats last_cleared_stats; + struct rte_eth_xstat *xstats; + struct rte_eth_xstat *last_cleared_xstats; + f64 time_last_stats_update; + dpdk_port_type_t port_type; + + /* mac address */ + u8 *default_mac_address; +} dpdk_device_t; + +#define DPDK_STATS_POLL_INTERVAL (10.0) +#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */ + +#define DPDK_LINK_POLL_INTERVAL (3.0) +#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */ + +typedef struct +{ + u32 device; + u16 queue_id; +} dpdk_device_and_queue_t; + +#ifndef DPDK_HQOS_DBG_BYPASS +#define DPDK_HQOS_DBG_BYPASS 0 +#endif + +#ifndef HQOS_FLUSH_COUNT_THRESHOLD +#define HQOS_FLUSH_COUNT_THRESHOLD 100000 +#endif + +typedef struct dpdk_device_config_hqos_t +{ + u32 hqos_thread; + u32 hqos_thread_valid; + + u32 swq_size; + u32 burst_enq; + u32 burst_deq; + + u32 pktfield0_slabpos; + u32 pktfield1_slabpos; + u32 pktfield2_slabpos; + u64 pktfield0_slabmask; + u64 pktfield1_slabmask; + u64 pktfield2_slabmask; + u32 tc_table[64]; + + struct rte_sched_port_params port; + struct rte_sched_subport_params *subport; + struct rte_sched_pipe_params *pipe; + uint32_t *pipe_map; +} dpdk_device_config_hqos_t; + +int dpdk_hqos_validate_mask (u64 mask, u32 n); +void dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * + hqos, u32 pipe_profile_id); +void dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos); +clib_error_t *dpdk_port_setup_hqos (dpdk_device_t * xd, + dpdk_device_config_hqos_t * hqos); +void dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, + struct rte_mbuf **pkts, u32 n_pkts); + +#define foreach_dpdk_device_config_item \ + _ (num_rx_queues) \ + _ (num_tx_queues) \ + _ (num_rx_desc) \ + _ (num_tx_desc) \ + _ (rss_fn) + +typedef struct +{ + vlib_pci_addr_t pci_addr; + u8 is_blacklisted; + u8 vlan_strip_offload; +#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0 +#define DPDK_DEVICE_VLAN_STRIP_OFF 1 +#define DPDK_DEVICE_VLAN_STRIP_ON 2 + +#define _(x) uword x; + foreach_dpdk_device_config_item +#undef _ + clib_bitmap_t * workers; + u32 hqos_enabled; + dpdk_device_config_hqos_t hqos; +} dpdk_device_config_t; + +typedef struct +{ + + /* Config stuff */ + u8 **eal_init_args; + u8 *eal_init_args_str; + u8 *uio_driver_name; + u8 no_multi_seg; + u8 enable_tcp_udp_checksum; + u8 cryptodev; + + /* Required config parameters */ + u8 coremask_set_manually; + u8 nchannels_set_manually; + u32 coremask; + u32 nchannels; + u32 num_mbufs; + u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */ + + /* + * format interface names ala xxxEthernet%d/%d/%d instead of + * xxxEthernet%x/%x/%x. + */ + u8 interface_name_format_decimal; + + /* per-device config */ + dpdk_device_config_t default_devconf; + dpdk_device_config_t *dev_confs; + uword *device_config_index_by_pci_addr; + +} dpdk_config_main_t; + +dpdk_config_main_t dpdk_config_main; + +typedef struct +{ + + /* Devices */ + dpdk_device_t *devices; + dpdk_device_and_queue_t **devices_by_cpu; + dpdk_device_and_queue_t **devices_by_hqos_cpu; + + /* per-thread recycle lists */ + u32 **recycle; + + /* buffer flags template, configurable to enable/disable tcp / udp cksum */ + u32 buffer_flags_template; + + /* vlib buffer free list, must be same size as an rte_mbuf */ + u32 vlib_buffer_free_list_index; + + /* Ethernet input node index */ + u32 ethernet_input_node_index; + + /* pcap tracing [only works if (CLIB_DEBUG > 0)] */ + int tx_pcap_enable; + pcap_main_t pcap_main; + u8 *pcap_filename; + u32 pcap_sw_if_index; + u32 pcap_pkts_to_capture; + + /* hashes */ + uword *dpdk_device_by_kni_port_id; + uword *vu_sw_if_index_by_listener_fd; + uword *vu_sw_if_index_by_sock_fd; + u32 *vu_inactive_interfaces_device_index; + + /* + * flag indicating that a posted admin up/down + * (via post_sw_interface_set_flags) is in progress + */ + u8 admin_up_down_in_progress; + + u8 use_rss; + + /* which cpus are running dpdk-input */ + int input_cpu_first_index; + int input_cpu_count; + + /* which cpus are running I/O TX */ + int hqos_cpu_first_index; + int hqos_cpu_count; + + /* control interval of dpdk link state and stat polling */ + f64 link_state_poll_interval; + f64 stat_poll_interval; + + /* Sleep for this many MS after each device poll */ + u32 poll_sleep; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + dpdk_config_main_t *conf; + + /* mempool */ + struct rte_mempool **pktmbuf_pools; + + /* API message ID base */ + u16 msg_id_base; +} dpdk_main_t; + +dpdk_main_t dpdk_main; + +typedef struct +{ + u32 buffer_index; + u16 device_index; + u8 queue_index; + struct rte_mbuf mb; + /* Copy of VLIB buffer; packet data stored in pre_data. */ + vlib_buffer_t buffer; +} dpdk_tx_dma_trace_t; + +typedef struct +{ + u32 buffer_index; + u16 device_index; + u16 queue_index; + struct rte_mbuf mb; + vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */ + u8 data[256]; /* First 256 data bytes, used for hexdump */ +} dpdk_rx_dma_trace_t; + +void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b); + +clib_error_t *dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address); + +clib_error_t *dpdk_set_mc_filter (vnet_hw_interface_t * hi, + struct ether_addr mc_addr_vec[], int naddr); + +void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd); + +clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd); + +u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance); + +struct rte_mbuf *dpdk_replicate_packet_mb (vlib_buffer_t * b); +struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b); + +#define foreach_dpdk_error \ + _(NONE, "no error") \ + _(RX_PACKET_ERROR, "Rx packet errors") \ + _(RX_BAD_FCS, "Rx bad fcs") \ + _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \ + _(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \ + _(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \ + _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error") + +typedef enum +{ +#define _(f,s) DPDK_ERROR_##f, + foreach_dpdk_error +#undef _ + DPDK_N_ERROR, +} dpdk_error_t; + +int dpdk_set_stat_poll_interval (f64 interval); +int dpdk_set_link_state_poll_interval (f64 interval); +void dpdk_update_link_state (dpdk_device_t * xd, f64 now); +void dpdk_device_lock_init (dpdk_device_t * xd); +void dpdk_device_lock_free (dpdk_device_t * xd); + +void dpdk_rx_trace (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 * buffers, uword n_buffers); + +#define EFD_OPERATION_LESS_THAN 0 +#define EFD_OPERATION_GREATER_OR_EQUAL 1 + +format_function_t format_dpdk_device_name; +format_function_t format_dpdk_device; +format_function_t format_dpdk_tx_dma_trace; +format_function_t format_dpdk_rx_dma_trace; +format_function_t format_dpdk_rte_mbuf; +format_function_t format_dpdk_rx_rte_mbuf; +unformat_function_t unformat_socket_mem; +clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn); +clib_error_t *unformat_hqos (unformat_input_t * input, + dpdk_device_config_hqos_t * hqos); + +uword +admin_up_down_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f); + +#endif /* __included_dpdk_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h new file mode 100644 index 00000000..dd40ff48 --- /dev/null +++ b/src/plugins/dpdk/device/dpdk_priv.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1) +#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1)) + +#define DPDK_NB_RX_DESC_DEFAULT 1024 +#define DPDK_NB_TX_DESC_DEFAULT 1024 +#define DPDK_NB_RX_DESC_VIRTIO 256 +#define DPDK_NB_TX_DESC_VIRTIO 256 + +#define I40E_DEV_ID_SFP_XL710 0x1572 +#define I40E_DEV_ID_QSFP_A 0x1583 +#define I40E_DEV_ID_QSFP_B 0x1584 +#define I40E_DEV_ID_QSFP_C 0x1585 +#define I40E_DEV_ID_10G_BASE_T 0x1586 +#define I40E_DEV_ID_VF 0x154C + +/* These args appear by themselves */ +#define foreach_eal_double_hyphen_predicate_arg \ +_(no-shconf) \ +_(no-hpet) \ +_(no-huge) \ +_(vmware-tsc-map) + +#define foreach_eal_single_hyphen_mandatory_arg \ +_(coremask, c) \ +_(nchannels, n) \ + +#define foreach_eal_single_hyphen_arg \ +_(blacklist, b) \ +_(mem-alloc-request, m) \ +_(force-ranks, r) + +/* These args are preceeded by "--" and followed by a single string */ +#define foreach_eal_double_hyphen_arg \ +_(huge-dir) \ +_(proc-type) \ +_(file-prefix) \ +_(vdev) + +static inline void +dpdk_get_xstats (dpdk_device_t * xd) +{ + int len; + if ((len = rte_eth_xstats_get (xd->device_index, NULL, 0)) > 0) + { + vec_validate (xd->xstats, len - 1); + vec_validate (xd->last_cleared_xstats, len - 1); + + len = + rte_eth_xstats_get (xd->device_index, xd->xstats, + vec_len (xd->xstats)); + + ASSERT (vec_len (xd->xstats) == len); + ASSERT (vec_len (xd->last_cleared_xstats) == len); + + _vec_len (xd->xstats) = len; + _vec_len (xd->last_cleared_xstats) = len; + + } +} + + +static inline void +dpdk_update_counters (dpdk_device_t * xd, f64 now) +{ + vlib_simple_counter_main_t *cm; + vnet_main_t *vnm = vnet_get_main (); + u32 my_cpu = os_get_cpu_number (); + u64 rxerrors, last_rxerrors; + + /* only update counters for PMD interfaces */ + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + return; + + xd->time_last_stats_update = now ? now : xd->time_last_stats_update; + clib_memcpy (&xd->last_stats, &xd->stats, sizeof (xd->last_stats)); + rte_eth_stats_get (xd->device_index, &xd->stats); + + /* maybe bump interface rx no buffer counter */ + if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_NO_BUF); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + xd->stats.rx_nombuf - + xd->last_stats.rx_nombuf); + } + + /* missed pkt counter */ + if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_MISS); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + xd->stats.imissed - + xd->last_stats.imissed); + } + rxerrors = xd->stats.ierrors; + last_rxerrors = xd->last_stats.ierrors; + + if (PREDICT_FALSE (rxerrors != last_rxerrors)) + { + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_RX_ERROR); + + vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, + rxerrors - last_rxerrors); + } + + dpdk_get_xstats (xd); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/format.c b/src/plugins/dpdk/device/format.c new file mode 100644 index 00000000..25a8c5cb --- /dev/null +++ b/src/plugins/dpdk/device/format.c @@ -0,0 +1,754 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#define foreach_dpdk_counter \ + _ (tx_frames_ok, opackets) \ + _ (tx_bytes_ok, obytes) \ + _ (tx_errors, oerrors) \ + _ (rx_frames_ok, ipackets) \ + _ (rx_bytes_ok, ibytes) \ + _ (rx_errors, ierrors) \ + _ (rx_missed, imissed) \ + _ (rx_no_bufs, rx_nombuf) + +#define foreach_dpdk_q_counter \ + _ (rx_frames_ok, q_ipackets) \ + _ (tx_frames_ok, q_opackets) \ + _ (rx_bytes_ok, q_ibytes) \ + _ (tx_bytes_ok, q_obytes) \ + _ (rx_errors, q_errors) + +#define foreach_dpdk_rss_hf \ + _(ETH_RSS_FRAG_IPV4, "ipv4-frag") \ + _(ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \ + _(ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \ + _(ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \ + _(ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \ + _(ETH_RSS_IPV4, "ipv4") \ + _(ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \ + _(ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \ + _(ETH_RSS_FRAG_IPV6, "ipv6-frag") \ + _(ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \ + _(ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \ + _(ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \ + _(ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \ + _(ETH_RSS_L2_PAYLOAD, "l2-payload") \ + _(ETH_RSS_IPV6_EX, "ipv6-ex") \ + _(ETH_RSS_IPV6, "ipv6") + + +#define foreach_dpdk_rx_offload_caps \ + _(DEV_RX_OFFLOAD_VLAN_STRIP, "vlan-strip") \ + _(DEV_RX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ + _(DEV_RX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ + _(DEV_RX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ + _(DEV_RX_OFFLOAD_TCP_LRO , "rcp-lro") \ + _(DEV_RX_OFFLOAD_QINQ_STRIP, "qinq-strip") + +#define foreach_dpdk_tx_offload_caps \ + _(DEV_TX_OFFLOAD_VLAN_INSERT, "vlan-insert") \ + _(DEV_TX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ + _(DEV_TX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ + _(DEV_TX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ + _(DEV_TX_OFFLOAD_SCTP_CKSUM , "sctp-cksum") \ + _(DEV_TX_OFFLOAD_TCP_TSO , "tcp-tso") \ + _(DEV_TX_OFFLOAD_UDP_TSO , "udp-tso") \ + _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \ + _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert") + +#define foreach_dpdk_pkt_rx_offload_flag \ + _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \ + _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \ + _ (PKT_RX_FDIR, "RX packet with FDIR infos") \ + _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \ + _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \ + _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \ + _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \ + _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \ + _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \ + _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \ + _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped") + +#define foreach_dpdk_pkt_type \ + _ (L2, ETHER, "Ethernet packet") \ + _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \ + _ (L2, ETHER_ARP, "ARP packet") \ + _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet") \ + _ (L2, ETHER_NSH, "NSH (Network Service Header) packet") \ + _ (L2, ETHER_VLAN, "VLAN packet") \ + _ (L2, ETHER_QINQ, "QinQ packet") \ + _ (L3, IPV4, "IPv4 packet without extension headers") \ + _ (L3, IPV4_EXT, "IPv4 packet with extension headers") \ + _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \ + _ (L3, IPV6, "IPv6 packet without extension headers") \ + _ (L3, IPV6_EXT, "IPv6 packet with extension headers") \ + _ (L3, IPV6_EXT_UNKNOWN, "IPv6 packet with or without extension headers") \ + _ (L4, TCP, "TCP packet") \ + _ (L4, UDP, "UDP packet") \ + _ (L4, FRAG, "Fragmented IP packet") \ + _ (L4, SCTP, "SCTP (Stream Control Transmission Protocol) packet") \ + _ (L4, ICMP, "ICMP packet") \ + _ (L4, NONFRAG, "Non-fragmented IP packet") \ + _ (TUNNEL, GRE, "GRE tunneling packet") \ + _ (TUNNEL, VXLAN, "VXLAN tunneling packet") \ + _ (TUNNEL, NVGRE, "NVGRE Tunneling packet") \ + _ (TUNNEL, GENEVE, "GENEVE Tunneling packet") \ + _ (TUNNEL, GRENAT, "Teredo, VXLAN or GRE Tunneling packet") \ + _ (INNER_L2, ETHER, "Inner Ethernet packet") \ + _ (INNER_L2, ETHER_VLAN, "Inner Ethernet packet with VLAN") \ + _ (INNER_L3, IPV4, "Inner IPv4 packet without extension headers") \ + _ (INNER_L3, IPV4_EXT, "Inner IPv4 packet with extension headers") \ + _ (INNER_L3, IPV4_EXT_UNKNOWN, "Inner IPv4 packet with or without extension headers") \ + _ (INNER_L3, IPV6, "Inner IPv6 packet without extension headers") \ + _ (INNER_L3, IPV6_EXT, "Inner IPv6 packet with extension headers") \ + _ (INNER_L3, IPV6_EXT_UNKNOWN, "Inner IPv6 packet with or without extension headers") \ + _ (INNER_L4, TCP, "Inner TCP packet") \ + _ (INNER_L4, UDP, "Inner UDP packet") \ + _ (INNER_L4, FRAG, "Inner fagmented IP packet") \ + _ (INNER_L4, SCTP, "Inner SCTP (Stream Control Transmission Protocol) packet") \ + _ (INNER_L4, ICMP, "Inner ICMP packet") \ + _ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet") + +#define foreach_dpdk_pkt_tx_offload_flag \ + _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \ + _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \ + _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp") + +#define foreach_dpdk_pkt_offload_flag \ + foreach_dpdk_pkt_rx_offload_flag \ + foreach_dpdk_pkt_tx_offload_flag + +u8 * +format_dpdk_device_name (u8 * s, va_list * args) +{ + dpdk_main_t *dm = &dpdk_main; + char *devname_format; + char *device_name; + u32 i = va_arg (*args, u32); + struct rte_eth_dev_info dev_info; + u8 *ret; + + if (dm->conf->interface_name_format_decimal) + devname_format = "%s%d/%d/%d"; + else + devname_format = "%s%x/%x/%x"; + + switch (dm->devices[i].port_type) + { + case VNET_DPDK_PORT_TYPE_ETH_1G: + device_name = "GigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_10G: + device_name = "TenGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_40G: + device_name = "FortyGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_100G: + device_name = "HundredGigabitEthernet"; + break; + + case VNET_DPDK_PORT_TYPE_ETH_BOND: + return format (s, "BondEthernet%d", dm->devices[i].device_index); + + case VNET_DPDK_PORT_TYPE_ETH_SWITCH: + device_name = "EthernetSwitch"; + break; + + case VNET_DPDK_PORT_TYPE_AF_PACKET: + rte_eth_dev_info_get (i, &dev_info); + return format (s, "af_packet%d", dm->devices[i].af_packet_port_id); + + default: + case VNET_DPDK_PORT_TYPE_UNKNOWN: + device_name = "UnknownEthernet"; + break; + } + + rte_eth_dev_info_get (i, &dev_info); + + if (dev_info.pci_dev) + ret = format (s, devname_format, device_name, dev_info.pci_dev->addr.bus, + dev_info.pci_dev->addr.devid, + dev_info.pci_dev->addr.function); + else + ret = format (s, "%s%d", device_name, dm->devices[i].device_index); + + if (dm->devices[i].interface_name_suffix) + return format (ret, "/%s", dm->devices[i].interface_name_suffix); + return ret; +} + +static u8 * +format_dpdk_device_type (u8 * s, va_list * args) +{ + dpdk_main_t *dm = &dpdk_main; + char *dev_type; + u32 i = va_arg (*args, u32); + + switch (dm->devices[i].pmd) + { + case VNET_DPDK_PMD_E1000EM: + dev_type = "Intel 82540EM (e1000)"; + break; + + case VNET_DPDK_PMD_IGB: + dev_type = "Intel e1000"; + break; + + case VNET_DPDK_PMD_I40E: + dev_type = "Intel X710/XL710 Family"; + break; + + case VNET_DPDK_PMD_I40EVF: + dev_type = "Intel X710/XL710 Family VF"; + break; + + case VNET_DPDK_PMD_FM10K: + dev_type = "Intel FM10000 Family Ethernet Switch"; + break; + + case VNET_DPDK_PMD_IGBVF: + dev_type = "Intel e1000 VF"; + break; + + case VNET_DPDK_PMD_VIRTIO: + dev_type = "Red Hat Virtio"; + break; + + case VNET_DPDK_PMD_IXGBEVF: + dev_type = "Intel 82599 VF"; + break; + + case VNET_DPDK_PMD_IXGBE: + dev_type = "Intel 82599"; + break; + + case VNET_DPDK_PMD_ENIC: + dev_type = "Cisco VIC"; + break; + + case VNET_DPDK_PMD_CXGBE: + dev_type = "Chelsio T4/T5"; + break; + + case VNET_DPDK_PMD_MLX5: + dev_type = "Mellanox ConnectX-4 Family"; + break; + + case VNET_DPDK_PMD_VMXNET3: + dev_type = "VMware VMXNET3"; + break; + + case VNET_DPDK_PMD_AF_PACKET: + dev_type = "af_packet"; + break; + + case VNET_DPDK_PMD_BOND: + dev_type = "Ethernet Bonding"; + break; + + case VNET_DPDK_PMD_DPAA2: + dev_type = "NXP DPAA2 Mac"; + break; + + default: + case VNET_DPDK_PMD_UNKNOWN: + dev_type = "### UNKNOWN ###"; + break; + } + + return format (s, dev_type); +} + +static u8 * +format_dpdk_link_status (u8 * s, va_list * args) +{ + dpdk_device_t *xd = va_arg (*args, dpdk_device_t *); + struct rte_eth_link *l = &xd->link; + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index); + + s = format (s, "%s ", l->link_status ? "up" : "down"); + if (l->link_status) + { + u32 promisc = rte_eth_promiscuous_get (xd->device_index); + + s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ? + "full" : "half"); + s = format (s, "speed %u mtu %d %s\n", l->link_speed, + hi->max_packet_bytes, promisc ? " promisc" : ""); + } + else + s = format (s, "\n"); + + return s; +} + +#define _line_len 72 +#define _(v, str) \ +if (bitmap & v) { \ + if (format_get_indent (s) > next_split ) { \ + next_split += _line_len; \ + s = format(s,"\n%U", format_white_space, indent); \ + } \ + s = format(s, "%s ", str); \ +} + +static u8 * +format_dpdk_rss_hf_name (u8 * s, va_list * args) +{ + u64 bitmap = va_arg (*args, u64); + int next_split = _line_len; + int indent = format_get_indent (s); + + if (!bitmap) + return format (s, "none"); + + foreach_dpdk_rss_hf return s; +} + +static u8 * +format_dpdk_rx_offload_caps (u8 * s, va_list * args) +{ + u32 bitmap = va_arg (*args, u32); + int next_split = _line_len; + int indent = format_get_indent (s); + + if (!bitmap) + return format (s, "none"); + + foreach_dpdk_rx_offload_caps return s; +} + +static u8 * +format_dpdk_tx_offload_caps (u8 * s, va_list * args) +{ + u32 bitmap = va_arg (*args, u32); + int next_split = _line_len; + int indent = format_get_indent (s); + if (!bitmap) + return format (s, "none"); + + foreach_dpdk_tx_offload_caps return s; +} + +#undef _line_len +#undef _ + +u8 * +format_dpdk_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance); + uword indent = format_get_indent (s); + f64 now = vlib_time_now (dm->vlib_main); + struct rte_eth_dev_info di; + + dpdk_update_counters (xd, now); + dpdk_update_link_state (xd, now); + + s = format (s, "%U\n%Ucarrier %U", + format_dpdk_device_type, xd->device_index, + format_white_space, indent + 2, format_dpdk_link_status, xd); + + rte_eth_dev_info_get (xd->device_index, &di); + + if (verbose > 1 && xd->flags & DPDK_DEVICE_FLAG_PMD) + { + struct rte_pci_device *pci; + struct rte_eth_rss_conf rss_conf; + int vlan_off; + int retval; + + rss_conf.rss_key = 0; + retval = rte_eth_dev_rss_hash_conf_get (xd->device_index, &rss_conf); + if (retval < 0) + clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval); + pci = di.pci_dev; + + if (pci) + s = + format (s, + "%Upci id: device %04x:%04x subsystem %04x:%04x\n" + "%Upci address: %04x:%02x:%02x.%02x\n", + format_white_space, indent + 2, pci->id.vendor_id, + pci->id.device_id, pci->id.subsystem_vendor_id, + pci->id.subsystem_device_id, format_white_space, indent + 2, + pci->addr.domain, pci->addr.bus, pci->addr.devid, + pci->addr.function); + s = + format (s, "%Umax rx packet len: %d\n", format_white_space, + indent + 2, di.max_rx_pktlen); + s = + format (s, "%Umax num of queues: rx %d tx %d\n", format_white_space, + indent + 2, di.max_rx_queues, di.max_tx_queues); + s = + format (s, "%Upromiscuous: unicast %s all-multicast %s\n", + format_white_space, indent + 2, + rte_eth_promiscuous_get (xd->device_index) ? "on" : "off", + rte_eth_promiscuous_get (xd->device_index) ? "on" : "off"); + vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); + s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n", + format_white_space, indent + 2, + vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off", + vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off", + vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off"); + s = format (s, "%Urx offload caps: %U\n", + format_white_space, indent + 2, + format_dpdk_rx_offload_caps, di.rx_offload_capa); + s = format (s, "%Utx offload caps: %U\n", + format_white_space, indent + 2, + format_dpdk_tx_offload_caps, di.tx_offload_capa); + s = format (s, "%Urss active: %U\n" + "%Urss supported: %U\n", + format_white_space, indent + 2, + format_dpdk_rss_hf_name, rss_conf.rss_hf, + format_white_space, indent + 2, + format_dpdk_rss_hf_name, di.flow_type_rss_offloads); + } + + s = format (s, "%Urx queues %d, rx desc %d, tx queues %d, tx desc %d\n", + format_white_space, indent + 2, + xd->rx_q_used, xd->nb_rx_desc, xd->tx_q_used, xd->nb_tx_desc); + + if (xd->cpu_socket > -1) + s = format (s, "%Ucpu socket %d\n", + format_white_space, indent + 2, xd->cpu_socket); + + /* $$$ MIB counters */ + { +#define _(N, V) \ + if ((xd->stats.V - xd->last_cleared_stats.V) != 0) { \ + s = format (s, "\n%U%-40U%16Ld", \ + format_white_space, indent + 2, \ + format_c_identifier, #N, \ + xd->stats.V - xd->last_cleared_stats.V); \ + } \ + + foreach_dpdk_counter +#undef _ + } + + u8 *xs = 0; + u32 i = 0; + struct rte_eth_xstat *xstat, *last_xstat; + struct rte_eth_xstat_name *xstat_names = 0; + int len = rte_eth_xstats_get_names (xd->device_index, NULL, 0); + vec_validate (xstat_names, len - 1); + rte_eth_xstats_get_names (xd->device_index, xstat_names, len); + + ASSERT (vec_len (xd->xstats) == vec_len (xd->last_cleared_xstats)); + + /* *INDENT-OFF* */ + vec_foreach_index(i, xd->xstats) + { + u64 delta = 0; + xstat = vec_elt_at_index(xd->xstats, i); + last_xstat = vec_elt_at_index(xd->last_cleared_xstats, i); + + delta = xstat->value - last_xstat->value; + if (verbose == 2 || (verbose && delta)) + { + /* format_c_identifier doesn't like c strings inside vector */ + u8 * name = format(0,"%s", xstat_names[i].name); + xs = format(xs, "\n%U%-38U%16Ld", + format_white_space, indent + 4, + format_c_identifier, name, delta); + vec_free(name); + } + } + /* *INDENT-ON* */ + + vec_free (xstat_names); + + if (xs) + { + s = format (s, "\n%Uextended stats:%v", + format_white_space, indent + 2, xs); + vec_free (xs); + } + + return s; +} + +u8 * +format_dpdk_tx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); + dpdk_tx_dma_trace_t *t = va_arg (*va, dpdk_tx_dma_trace_t *); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); + uword indent = format_get_indent (s); + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + + s = format (s, "%U tx queue %d", + format_vnet_sw_interface_name, vnm, sw, t->queue_index); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U%U", format_white_space, indent, + format_ethernet_header_with_length, t->buffer.pre_data, + sizeof (t->buffer.pre_data)); + + return s; +} + +u8 * +format_dpdk_rx_dma_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); + dpdk_rx_dma_trace_t *t = va_arg (*va, dpdk_rx_dma_trace_t *); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); + format_function_t *f; + uword indent = format_get_indent (s); + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); + + s = format (s, "%U rx queue %d", + format_vnet_sw_interface_name, vnm, sw, t->queue_index); + + s = format (s, "\n%Ubuffer 0x%x: %U", + format_white_space, indent, + t->buffer_index, format_vlib_buffer, &t->buffer); + + s = format (s, "\n%U%U", + format_white_space, indent, + format_dpdk_rte_mbuf, &t->mb, &t->data); + + if (vm->trace_main.verbose) + { + s = format (s, "\n%UPacket Dump%s", format_white_space, indent + 2, + t->mb.data_len > sizeof (t->data) ? " (truncated)" : ""); + s = format (s, "\n%U%U", format_white_space, indent + 4, + format_hexdump, &t->data, + t->mb.data_len > + sizeof (t->data) ? sizeof (t->data) : t->mb.data_len); + } + f = node->format_buffer; + if (!f) + f = format_hex_bytes; + s = format (s, "\n%U%U", format_white_space, indent, + f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); + + return s; +} + + +static inline u8 * +format_dpdk_pkt_types (u8 * s, va_list * va) +{ + u32 *pkt_types = va_arg (*va, u32 *); + uword indent __attribute__ ((unused)) = format_get_indent (s) + 2; + + if (!*pkt_types) + return s; + + s = format (s, "Packet Types"); + +#define _(L, F, S) \ + if ((*pkt_types & RTE_PTYPE_##L##_MASK) == RTE_PTYPE_##L##_##F) \ + { \ + s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \ + "RTE_PTYPE_" #L "_" #F, RTE_PTYPE_##L##_##F, S); \ + } + + foreach_dpdk_pkt_type +#undef _ + return s; +} + +static inline u8 * +format_dpdk_pkt_offload_flags (u8 * s, va_list * va) +{ + u64 *ol_flags = va_arg (*va, u64 *); + uword indent = format_get_indent (s) + 2; + + if (!*ol_flags) + return s; + + s = format (s, "Packet Offload Flags"); + +#define _(F, S) \ + if (*ol_flags & F) \ + { \ + s = format (s, "\n%U%s (0x%04x) %s", \ + format_white_space, indent, #F, F, S); \ + } + + foreach_dpdk_pkt_offload_flag +#undef _ + return s; +} + +u8 * +format_dpdk_rte_mbuf_vlan (u8 * s, va_list * va) +{ + ethernet_vlan_header_tv_t *vlan_hdr = + va_arg (*va, ethernet_vlan_header_tv_t *); + + if (clib_net_to_host_u16 (vlan_hdr->type) == ETHERNET_TYPE_DOT1AD) + { + s = format (s, "%U 802.1q vlan ", + format_ethernet_vlan_tci, + clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); + vlan_hdr++; + } + + s = format (s, "%U", + format_ethernet_vlan_tci, + clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); + + return s; +} + +u8 * +format_dpdk_rte_mbuf (u8 * s, va_list * va) +{ + struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *); + ethernet_header_t *eth_hdr = va_arg (*va, ethernet_header_t *); + uword indent = format_get_indent (s) + 2; + + s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d" + "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x, data_off %d, phys_addr 0x%x" + "\n%Upacket_type 0x%x", + mb->port, mb->nb_segs, mb->pkt_len, + format_white_space, indent, + mb->buf_len, mb->data_len, mb->ol_flags, mb->data_off, + mb->buf_physaddr, format_white_space, indent, mb->packet_type); + + if (mb->ol_flags) + s = format (s, "\n%U%U", format_white_space, indent, + format_dpdk_pkt_offload_flags, &mb->ol_flags); + + if ((mb->ol_flags & PKT_RX_VLAN_PKT) && + ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0)) + { + ethernet_vlan_header_tv_t *vlan_hdr = + ((ethernet_vlan_header_tv_t *) & (eth_hdr->type)); + s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr); + } + + if (mb->packet_type) + s = format (s, "\n%U%U", format_white_space, indent, + format_dpdk_pkt_types, &mb->packet_type); + + return s; +} + +/* FIXME is this function used? */ +#if 0 +uword +unformat_socket_mem (unformat_input_t * input, va_list * va) +{ + uword **r = va_arg (*va, uword **); + int i = 0; + u32 mem; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, ",")) + hash_set (*r, i, 1024); + else if (unformat (input, "%u,", &mem)) + hash_set (*r, i, mem); + else if (unformat (input, "%u", &mem)) + hash_set (*r, i, mem); + else + { + unformat_put_input (input); + goto done; + } + i++; + } + +done: + return 1; +} +#endif + +clib_error_t * +unformat_rss_fn (unformat_input_t * input, uword * rss_fn) +{ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (0) + ; +#undef _ +#define _(f, s) \ + else if (unformat (input, s)) \ + *rss_fn |= f; + + foreach_dpdk_rss_hf +#undef _ + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + } + return 0; +} + +clib_error_t * +unformat_hqos (unformat_input_t * input, dpdk_device_config_hqos_t * hqos) +{ + clib_error_t *error = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "hqos-thread %u", &hqos->hqos_thread)) + hqos->hqos_thread_valid = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + break; + } + } + + return error; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c new file mode 100644 index 00000000..8824d789 --- /dev/null +++ b/src/plugins/dpdk/device/node.c @@ -0,0 +1,674 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +static char *dpdk_error_strings[] = { +#define _(n,s) s, + foreach_dpdk_error +#undef _ +}; + +always_inline int +vlib_buffer_is_ip4 (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); +} + +always_inline int +vlib_buffer_is_ip6 (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); +} + +always_inline int +vlib_buffer_is_mpls (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); +} + +always_inline u32 +dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0) +{ + if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0))) + if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0)) + return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; + else + return VNET_DEVICE_INPUT_NEXT_IP4_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0))) + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) + return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; + else + return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; +} + +always_inline int +dpdk_mbuf_is_vlan (struct rte_mbuf *mb) +{ + return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) == + RTE_PTYPE_L2_ETHER_VLAN; +} + +always_inline int +dpdk_mbuf_is_ip4 (struct rte_mbuf *mb) +{ + return RTE_ETH_IS_IPV4_HDR (mb->packet_type) != 0; +} + +always_inline int +dpdk_mbuf_is_ip6 (struct rte_mbuf *mb) +{ + return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0; +} + +always_inline u32 +dpdk_rx_next_from_mb (struct rte_mbuf * mb, vlib_buffer_t * b0) +{ + if (PREDICT_FALSE (dpdk_mbuf_is_vlan (mb))) + return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb))) + return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; + else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb))) + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) + return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; + else + return dpdk_rx_next_from_etype (mb, b0); +} + +always_inline void +dpdk_rx_error_from_mb (struct rte_mbuf *mb, u32 * next, u8 * error) +{ + if (mb->ol_flags & PKT_RX_IP_CKSUM_BAD) + { + *error = DPDK_ERROR_IP_CHECKSUM_ERROR; + *next = VNET_DEVICE_INPUT_NEXT_DROP; + } + else + *error = DPDK_ERROR_NONE; +} + +void +dpdk_rx_trace (dpdk_main_t * dm, + vlib_node_runtime_t * node, + dpdk_device_t * xd, + u16 queue_id, u32 * buffers, uword n_buffers) +{ + vlib_main_t *vm = vlib_get_main (); + u32 *b, n_left; + u32 next0; + + n_left = n_buffers; + b = buffers; + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t *b0; + dpdk_rx_dma_trace_t *t0; + struct rte_mbuf *mb; + u8 error0; + + bi0 = b[0]; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + mb = rte_mbuf_from_vlib_buffer (b0); + + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) + next0 = xd->per_interface_next_index; + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + next0 = dpdk_rx_next_from_mb (mb, b0); + else + next0 = dpdk_rx_next_from_etype (mb, b0); + + dpdk_rx_error_from_mb (mb, &next0, &error0); + + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->queue_index = queue_id; + t0->device_index = xd->device_index; + t0->buffer_index = bi0; + + clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); + clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); + clib_memcpy (t0->buffer.pre_data, b0->data, + sizeof (t0->buffer.pre_data)); + clib_memcpy (&t0->data, mb->buf_addr + mb->data_off, sizeof (t0->data)); + + b += 1; + } +} + +static inline u32 +dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id) +{ + u32 n_buffers; + u32 n_left; + u32 n_this_chunk; + + n_left = VLIB_FRAME_SIZE; + n_buffers = 0; + + if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) + { + while (n_left) + { + n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id, + xd->rx_vectors[queue_id] + + n_buffers, n_left); + n_buffers += n_this_chunk; + n_left -= n_this_chunk; + + /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */ + if (n_this_chunk < 32) + break; + } + } + else + { + ASSERT (0); + } + + return n_buffers; +} + + +static_always_inline void +dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, + struct rte_mbuf *mb, vlib_buffer_free_list_t * fl) +{ + u8 nb_seg = 1; + struct rte_mbuf *mb_seg = 0; + vlib_buffer_t *b_seg, *b_chain = 0; + mb_seg = mb->next; + b_chain = b; + + while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) + { + ASSERT (mb_seg != 0); + + b_seg = vlib_buffer_from_rte_mbuf (mb_seg); + vlib_buffer_init_for_free_list (b_seg, fl); + + ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); + ASSERT (b_seg->current_data == 0); + + /* + * The driver (e.g. virtio) may not put the packet data at the start + * of the segment, so don't assume b_seg->current_data == 0 is correct. + */ + b_seg->current_data = + (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data; + + b_seg->current_length = mb_seg->data_len; + b->total_length_not_including_first_buffer += mb_seg->data_len; + + b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT; + b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg); + + b_chain = b_seg; + mb_seg = mb_seg->next; + nb_seg++; + } +} + +static_always_inline void +dpdk_prefetch_buffer (struct rte_mbuf *mb) +{ + vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); + CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE); +} + +/* + * This function is used when there are no worker threads. + * The main thread performs IO and forwards the packets. + */ +static_always_inline u32 +dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, + vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id) +{ + u32 n_buffers; + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + u32 n_left_to_next, *to_next; + u32 mb_index; + vlib_main_t *vm = vlib_get_main (); + uword n_rx_bytes = 0; + u32 n_trace, trace_cnt __attribute__ ((unused)); + vlib_buffer_free_list_t *fl; + u32 buffer_flags_template; + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) + return 0; + + n_buffers = dpdk_rx_burst (dm, xd, queue_id); + + if (n_buffers == 0) + { + return 0; + } + + buffer_flags_template = dm->buffer_flags_template; + + vec_reset_length (xd->d_trace_buffers[cpu_index]); + trace_cnt = n_trace = vlib_get_trace_count (vm, node); + + if (n_trace > 0) + { + u32 n = clib_min (n_trace, n_buffers); + mb_index = 0; + + while (n--) + { + struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++]; + vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); + vec_add1 (xd->d_trace_buffers[cpu_index], + vlib_get_buffer_index (vm, b)); + } + } + + fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + mb_index = 0; + + while (n_buffers > 0) + { + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 bi0, next0, l3_offset0; + u32 bi1, next1, l3_offset1; + u32 bi2, next2, l3_offset2; + u32 bi3, next3, l3_offset3; + u8 error0, error1, error2, error3; + u64 or_ol_flags; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_buffers > 8 && n_left_to_next > 4) + { + struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; + struct rte_mbuf *mb1 = xd->rx_vectors[queue_id][mb_index + 1]; + struct rte_mbuf *mb2 = xd->rx_vectors[queue_id][mb_index + 2]; + struct rte_mbuf *mb3 = xd->rx_vectors[queue_id][mb_index + 3]; + + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 4]); + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 5]); + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 6]); + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 7]); + + if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) + { + if (PREDICT_FALSE (mb0->nb_segs > 1)) + dpdk_prefetch_buffer (mb0->next); + if (PREDICT_FALSE (mb1->nb_segs > 1)) + dpdk_prefetch_buffer (mb1->next); + if (PREDICT_FALSE (mb2->nb_segs > 1)) + dpdk_prefetch_buffer (mb2->next); + if (PREDICT_FALSE (mb3->nb_segs > 1)) + dpdk_prefetch_buffer (mb3->next); + } + + ASSERT (mb0); + ASSERT (mb1); + ASSERT (mb2); + ASSERT (mb3); + + or_ol_flags = (mb0->ol_flags | mb1->ol_flags | + mb2->ol_flags | mb3->ol_flags); + b0 = vlib_buffer_from_rte_mbuf (mb0); + b1 = vlib_buffer_from_rte_mbuf (mb1); + b2 = vlib_buffer_from_rte_mbuf (mb2); + b3 = vlib_buffer_from_rte_mbuf (mb3); + + vlib_buffer_init_for_free_list (b0, fl); + vlib_buffer_init_for_free_list (b1, fl); + vlib_buffer_init_for_free_list (b2, fl); + vlib_buffer_init_for_free_list (b3, fl); + + bi0 = vlib_get_buffer_index (vm, b0); + bi1 = vlib_get_buffer_index (vm, b1); + bi2 = vlib_get_buffer_index (vm, b2); + bi3 = vlib_get_buffer_index (vm, b3); + + to_next[0] = bi0; + to_next[1] = bi1; + to_next[2] = bi2; + to_next[3] = bi3; + to_next += 4; + n_left_to_next -= 4; + + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) + { + next0 = next1 = next2 = next3 = xd->per_interface_next_index; + } + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + { + next0 = dpdk_rx_next_from_mb (mb0, b0); + next1 = dpdk_rx_next_from_mb (mb1, b1); + next2 = dpdk_rx_next_from_mb (mb2, b2); + next3 = dpdk_rx_next_from_mb (mb3, b3); + } + else + { + next0 = dpdk_rx_next_from_etype (mb0, b0); + next1 = dpdk_rx_next_from_etype (mb1, b1); + next2 = dpdk_rx_next_from_etype (mb2, b2); + next3 = dpdk_rx_next_from_etype (mb3, b3); + } + + if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD)) + { + dpdk_rx_error_from_mb (mb0, &next0, &error0); + dpdk_rx_error_from_mb (mb1, &next1, &error1); + dpdk_rx_error_from_mb (mb2, &next2, &error2); + dpdk_rx_error_from_mb (mb3, &next3, &error3); + b0->error = node->errors[error0]; + b1->error = node->errors[error1]; + b2->error = node->errors[error2]; + b3->error = node->errors[error3]; + } + else + { + b0->error = b1->error = node->errors[DPDK_ERROR_NONE]; + b2->error = b3->error = node->errors[DPDK_ERROR_NONE]; + } + + l3_offset0 = device_input_next_node_advance[next0]; + l3_offset1 = device_input_next_node_advance[next1]; + l3_offset2 = device_input_next_node_advance[next2]; + l3_offset3 = device_input_next_node_advance[next3]; + + b0->current_data = l3_offset0 + mb0->data_off; + b1->current_data = l3_offset1 + mb1->data_off; + b2->current_data = l3_offset2 + mb2->data_off; + b3->current_data = l3_offset3 + mb3->data_off; + + b0->current_data -= RTE_PKTMBUF_HEADROOM; + b1->current_data -= RTE_PKTMBUF_HEADROOM; + b2->current_data -= RTE_PKTMBUF_HEADROOM; + b3->current_data -= RTE_PKTMBUF_HEADROOM; + + b0->current_length = mb0->data_len - l3_offset0; + b1->current_length = mb1->data_len - l3_offset1; + b2->current_length = mb2->data_len - l3_offset2; + b3->current_length = mb3->data_len - l3_offset3; + + b0->flags = buffer_flags_template; + b1->flags = buffer_flags_template; + b2->flags = buffer_flags_template; + b3->flags = buffer_flags_template; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b2)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b3)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b2)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b3)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + n_rx_bytes += mb0->pkt_len; + n_rx_bytes += mb1->pkt_len; + n_rx_bytes += mb2->pkt_len; + n_rx_bytes += mb3->pkt_len; + + /* Process subsequent segments of multi-segment packets */ + if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) + { + dpdk_process_subseq_segs (vm, b0, mb0, fl); + dpdk_process_subseq_segs (vm, b1, mb1, fl); + dpdk_process_subseq_segs (vm, b2, mb2, fl); + dpdk_process_subseq_segs (vm, b3, mb3, fl); + } + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); + + /* Do we have any driver RX features configured on the interface? */ + vnet_feature_start_device_input_x4 (xd->vlib_sw_if_index, + &next0, &next1, &next2, &next3, + b0, b1, b2, b3, + l3_offset0, l3_offset1, + l3_offset2, l3_offset3); + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + n_buffers -= 4; + mb_index += 4; + } + while (n_buffers > 0 && n_left_to_next > 0) + { + struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; + + ASSERT (mb0); + + b0 = vlib_buffer_from_rte_mbuf (mb0); + + /* Prefetch one next segment if it exists. */ + if (PREDICT_FALSE (mb0->nb_segs > 1)) + dpdk_prefetch_buffer (mb0->next); + + vlib_buffer_init_for_free_list (b0, fl); + + bi0 = vlib_get_buffer_index (vm, b0); + + to_next[0] = bi0; + to_next++; + n_left_to_next--; + + if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) + next0 = xd->per_interface_next_index; + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + next0 = dpdk_rx_next_from_mb (mb0, b0); + else + next0 = dpdk_rx_next_from_etype (mb0, b0); + + dpdk_rx_error_from_mb (mb0, &next0, &error0); + b0->error = node->errors[error0]; + + l3_offset0 = device_input_next_node_advance[next0]; + + b0->current_data = l3_offset0; + b0->current_data += mb0->data_off - RTE_PKTMBUF_HEADROOM; + b0->current_length = mb0->data_len - l3_offset0; + + b0->flags = buffer_flags_template; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + n_rx_bytes += mb0->pkt_len; + + /* Process subsequent segments of multi-segment packets */ + dpdk_process_subseq_segs (vm, b0, mb0, fl); + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See main.c... + */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + /* Do we have any driver RX features configured on the interface? */ + vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0, + b0, l3_offset0); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + n_buffers--; + mb_index++; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0)) + { + dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index], + vec_len (xd->d_trace_buffers[cpu_index])); + vlib_set_trace_count (vm, node, n_trace - + vec_len (xd->d_trace_buffers[cpu_index])); + } + + vlib_increment_combined_counter + (vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); + + vnet_device_increment_rx_packets (cpu_index, mb_index); + + return mb_index; +} + +static inline void +poll_rate_limit (dpdk_main_t * dm) +{ + /* Limit the poll rate by sleeping for N msec between polls */ + if (PREDICT_FALSE (dm->poll_sleep != 0)) + { + struct timespec ts, tsrem; + + ts.tv_sec = 0; + ts.tv_nsec = 1000 * 1000 * dm->poll_sleep; /* 1ms */ + + while (nanosleep (&ts, &tsrem) < 0) + { + ts = tsrem; + } + } +} + +/** \brief Main DPDK input node + @node dpdk-input + + This is the main DPDK input node: across each assigned interface, + call rte_eth_rx_burst(...) or similar to obtain a vector of + packets to process. Handle early packet discard. Derive @c + vlib_buffer_t metadata from struct rte_mbuf metadata, + Depending on the resulting metadata: adjust b->current_data, + b->current_length and dispatch directly to + ip4-input-no-checksum, or ip6-input. Trace the packet if required. + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param f vlib_frame_t input-node, not used. + + @par Graph mechanics: buffer metadata, next index usage + + @em Uses: + - struct rte_mbuf mb->ol_flags + - PKT_RX_IP_CKSUM_BAD + - RTE_ETH_IS_xxx_HDR(mb->packet_type) + - packet classification result + + @em Sets: + - b->error if the packet is to be dropped immediately + - b->current_data, b->current_length + - adjusted as needed to skip the L2 header in direct-dispatch cases + - vnet_buffer(b)->sw_if_index[VLIB_RX] + - rx interface sw_if_index + - vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0 + - required by ipX-lookup + - b->flags + - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc. + + Next Nodes: + - Static arcs to: error-drop, ethernet-input, + ip4-input-no-checksum, ip6-input, mpls-input + - per-interface redirection, controlled by + xd->per_interface_next_index +*/ + +static uword +dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + uword n_rx_packets = 0; + dpdk_device_and_queue_t *dq; + u32 cpu_index = os_get_cpu_number (); + + /* + * Poll all devices on this cpu for input/interrupts. + */ + /* *INDENT-OFF* */ + vec_foreach (dq, dm->devices_by_cpu[cpu_index]) + { + xd = vec_elt_at_index(dm->devices, dq->device); + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id); + } + /* *INDENT-ON* */ + + poll_rate_limit (dm); + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_input_node) = { + .function = dpdk_input, + .type = VLIB_NODE_TYPE_INPUT, + .name = "dpdk-input", + .sibling_of = "device-input", + + /* Will be enabled if/when hardware is detected. */ + .state = VLIB_NODE_STATE_DISABLED, + + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_dpdk_rx_dma_trace, + + .n_errors = DPDK_N_ERROR, + .error_strings = dpdk_error_strings, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input); +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/dir.dox b/src/plugins/dpdk/dir.dox new file mode 100644 index 00000000..43e36753 --- /dev/null +++ b/src/plugins/dpdk/dir.dox @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Doxygen directory documentation */ + +/** +@dir +@brief DPDK Abstraction Layer. + +This directory contains the source code for the DPDK abstraction layer. + +*/ +/*? %%clicmd:group_label DPDK and pcap tx %% ?*/ +/*? %%syscfg:group_label DPDK and pcap tx %% ?*/ diff --git a/src/plugins/dpdk/hqos/hqos.c b/src/plugins/dpdk/hqos/hqos.c new file mode 100644 index 00000000..a288fca7 --- /dev/null +++ b/src/plugins/dpdk/hqos/hqos.c @@ -0,0 +1,775 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include /* enumerate all vlib messages */ + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +dpdk_main_t dpdk_main; + +/*** + * + * HQoS default configuration values + * + ***/ + +static dpdk_device_config_hqos_t hqos_params_default = { + .hqos_thread_valid = 0, + + .swq_size = 4096, + .burst_enq = 256, + .burst_deq = 220, + + /* + * Packet field to identify the subport. + * + * Default value: Since only one subport is defined by default (see below: + * n_subports_per_port = 1), the subport ID is hardcoded to 0. + */ + .pktfield0_slabpos = 0, + .pktfield0_slabmask = 0, + + /* + * Packet field to identify the pipe. + * + * Default value: Assuming Ethernet/IPv4/UDP packets, UDP payload bits 12 .. 23 + */ + .pktfield1_slabpos = 40, + .pktfield1_slabmask = 0x0000000FFF000000LLU, + + /* Packet field used as index into TC translation table to identify the traffic + * class and queue. + * + * Default value: Assuming Ethernet/IPv4 packets, IPv4 DSCP field + */ + .pktfield2_slabpos = 8, + .pktfield2_slabmask = 0x00000000000000FCLLU, + .tc_table = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + }, + + /* port */ + .port = { + .name = NULL, /* Set at init */ + .socket = 0, /* Set at init */ + .rate = 1250000000, /* Assuming 10GbE port */ + .mtu = 14 + 1500, /* Assuming Ethernet/IPv4 pkt (Ethernet FCS not included) */ + .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT, + .n_subports_per_port = 1, + .n_pipes_per_subport = 4096, + .qsize = {64, 64, 64, 64}, + .pipe_profiles = NULL, /* Set at config */ + .n_pipe_profiles = 1, + +#ifdef RTE_SCHED_RED + .red_params = { + /* Traffic Class 0 Colors Green / Yellow / Red */ + [0][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [0][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [0][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + + /* Traffic Class 1 - Colors Green / Yellow / Red */ + [1][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [1][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [1][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + + /* Traffic Class 2 - Colors Green / Yellow / Red */ + [2][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [2][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [2][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + + /* Traffic Class 3 - Colors Green / Yellow / Red */ + [3][0] = {.min_th = 48,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [3][1] = {.min_th = 40,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9}, + [3][2] = {.min_th = 32,.max_th = 64,.maxp_inv = + 10,.wq_log2 = 9} + }, +#endif /* RTE_SCHED_RED */ + }, +}; + +static struct rte_sched_subport_params hqos_subport_params_default = { + .tb_rate = 1250000000, /* 10GbE line rate (measured in bytes/second) */ + .tb_size = 1000000, + .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000}, + .tc_period = 10, +}; + +static struct rte_sched_pipe_params hqos_pipe_params_default = { + .tb_rate = 305175, /* 10GbE line rate divided by 4K pipes */ + .tb_size = 1000000, + .tc_rate = {305175, 305175, 305175, 305175}, + .tc_period = 40, +#ifdef RTE_SCHED_SUBPORT_TC_OV + .tc_ov_weight = 1, +#endif + .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, +}; + +/*** + * + * HQoS configuration + * + ***/ + +int +dpdk_hqos_validate_mask (u64 mask, u32 n) +{ + int count = __builtin_popcountll (mask); + int pos_lead = sizeof (u64) * 8 - __builtin_clzll (mask); + int pos_trail = __builtin_ctzll (mask); + int count_expected = __builtin_popcount (n - 1); + + /* Handle the exceptions */ + if (n == 0) + return -1; /* Error */ + + if ((mask == 0) && (n == 1)) + return 0; /* OK */ + + if (((mask == 0) && (n != 1)) || ((mask != 0) && (n == 1))) + return -2; /* Error */ + + /* Check that mask is contiguous */ + if ((pos_lead - pos_trail) != count) + return -3; /* Error */ + + /* Check that mask contains the expected number of bits set */ + if (count != count_expected) + return -4; /* Error */ + + return 0; /* OK */ +} + +void +dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * + hqos, u32 pipe_profile_id) +{ + memcpy (&hqos->pipe[pipe_profile_id], &hqos_pipe_params_default, + sizeof (hqos_pipe_params_default)); +} + +void +dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos) +{ + struct rte_sched_subport_params *subport_params; + struct rte_sched_pipe_params *pipe_params; + u32 *pipe_map; + u32 i; + + memcpy (hqos, &hqos_params_default, sizeof (hqos_params_default)); + + /* pipe */ + vec_add2 (hqos->pipe, pipe_params, hqos->port.n_pipe_profiles); + + for (i = 0; i < vec_len (hqos->pipe); i++) + memcpy (&pipe_params[i], + &hqos_pipe_params_default, sizeof (hqos_pipe_params_default)); + + hqos->port.pipe_profiles = hqos->pipe; + + /* subport */ + vec_add2 (hqos->subport, subport_params, hqos->port.n_subports_per_port); + + for (i = 0; i < vec_len (hqos->subport); i++) + memcpy (&subport_params[i], + &hqos_subport_params_default, + sizeof (hqos_subport_params_default)); + + /* pipe profile */ + vec_add2 (hqos->pipe_map, + pipe_map, + hqos->port.n_subports_per_port * hqos->port.n_pipes_per_subport); + + for (i = 0; i < vec_len (hqos->pipe_map); i++) + pipe_map[i] = 0; +} + +/*** + * + * HQoS init + * + ***/ + +clib_error_t * +dpdk_port_setup_hqos (dpdk_device_t * xd, dpdk_device_config_hqos_t * hqos) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + char name[32]; + u32 subport_id, i; + int rv; + + /* Detect the set of worker threads */ + int worker_thread_first = 0; + int worker_thread_count = 0; + + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + vlib_thread_registration_t *tr = + p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + worker_thread_first = tr->first_index; + worker_thread_count = tr->count; + } + + /* Allocate the per-thread device data array */ + vec_validate_aligned (xd->hqos_wt, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + memset (xd->hqos_wt, 0, tm->n_vlib_mains * sizeof (xd->hqos_wt[0])); + + vec_validate_aligned (xd->hqos_ht, 0, CLIB_CACHE_LINE_BYTES); + memset (xd->hqos_ht, 0, sizeof (xd->hqos_ht[0])); + + /* Allocate space for one SWQ per worker thread in the I/O TX thread data structure */ + vec_validate (xd->hqos_ht->swq, worker_thread_count); + + /* SWQ */ + for (i = 0; i < worker_thread_count + 1; i++) + { + u32 swq_flags = RING_F_SP_ENQ | RING_F_SC_DEQ; + + snprintf (name, sizeof (name), "SWQ-worker%u-to-device%u", i, + xd->device_index); + xd->hqos_ht->swq[i] = + rte_ring_create (name, hqos->swq_size, xd->cpu_socket, swq_flags); + if (xd->hqos_ht->swq[i] == NULL) + return clib_error_return (0, + "SWQ-worker%u-to-device%u: rte_ring_create err", + i, xd->device_index); + } + + /* + * HQoS + */ + + /* HQoS port */ + snprintf (name, sizeof (name), "HQoS%u", xd->device_index); + hqos->port.name = strdup (name); + if (hqos->port.name == NULL) + return clib_error_return (0, "HQoS%u: strdup err", xd->device_index); + + hqos->port.socket = rte_eth_dev_socket_id (xd->device_index); + if (hqos->port.socket == SOCKET_ID_ANY) + hqos->port.socket = 0; + + xd->hqos_ht->hqos = rte_sched_port_config (&hqos->port); + if (xd->hqos_ht->hqos == NULL) + return clib_error_return (0, "HQoS%u: rte_sched_port_config err", + xd->device_index); + + /* HQoS subport */ + for (subport_id = 0; subport_id < hqos->port.n_subports_per_port; + subport_id++) + { + u32 pipe_id; + + rv = + rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, + &hqos->subport[subport_id]); + if (rv) + return clib_error_return (0, + "HQoS%u subport %u: rte_sched_subport_config err (%d)", + xd->device_index, subport_id, rv); + + /* HQoS pipe */ + for (pipe_id = 0; pipe_id < hqos->port.n_pipes_per_subport; pipe_id++) + { + u32 pos = subport_id * hqos->port.n_pipes_per_subport + pipe_id; + u32 profile_id = hqos->pipe_map[pos]; + + rv = + rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, + profile_id); + if (rv) + return clib_error_return (0, + "HQoS%u subport %u pipe %u: rte_sched_pipe_config err (%d)", + xd->device_index, subport_id, pipe_id, + rv); + } + } + + /* Set up per-thread device data for the I/O TX thread */ + xd->hqos_ht->hqos_burst_enq = hqos->burst_enq; + xd->hqos_ht->hqos_burst_deq = hqos->burst_deq; + vec_validate (xd->hqos_ht->pkts_enq, 2 * hqos->burst_enq - 1); + vec_validate (xd->hqos_ht->pkts_deq, hqos->burst_deq - 1); + xd->hqos_ht->pkts_enq_len = 0; + xd->hqos_ht->swq_pos = 0; + xd->hqos_ht->flush_count = 0; + + /* Set up per-thread device data for each worker thread */ + for (i = 0; i < worker_thread_count + 1; i++) + { + u32 tid; + if (i) + tid = worker_thread_first + (i - 1); + else + tid = i; + + xd->hqos_wt[tid].swq = xd->hqos_ht->swq[i]; + xd->hqos_wt[tid].hqos_field0_slabpos = hqos->pktfield0_slabpos; + xd->hqos_wt[tid].hqos_field0_slabmask = hqos->pktfield0_slabmask; + xd->hqos_wt[tid].hqos_field0_slabshr = + __builtin_ctzll (hqos->pktfield0_slabmask); + xd->hqos_wt[tid].hqos_field1_slabpos = hqos->pktfield1_slabpos; + xd->hqos_wt[tid].hqos_field1_slabmask = hqos->pktfield1_slabmask; + xd->hqos_wt[tid].hqos_field1_slabshr = + __builtin_ctzll (hqos->pktfield1_slabmask); + xd->hqos_wt[tid].hqos_field2_slabpos = hqos->pktfield2_slabpos; + xd->hqos_wt[tid].hqos_field2_slabmask = hqos->pktfield2_slabmask; + xd->hqos_wt[tid].hqos_field2_slabshr = + __builtin_ctzll (hqos->pktfield2_slabmask); + memcpy (xd->hqos_wt[tid].hqos_tc_table, hqos->tc_table, + sizeof (hqos->tc_table)); + } + + return 0; +} + +/*** + * + * HQoS run-time + * + ***/ +/* + * dpdk_hqos_thread - Contains the main loop of an HQoS thread. + * + * w + * Information for the current thread + */ +static_always_inline void +dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + u32 cpu_index = vm->cpu_index; + u32 dev_pos; + + dev_pos = 0; + while (1) + { + vlib_worker_thread_barrier_check (); + + u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); + if (dev_pos >= n_devs) + dev_pos = 0; + + dpdk_device_and_queue_t *dq = + vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); + + dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; + u32 device_index = xd->device_index; + u16 queue_id = dq->queue_id; + + struct rte_mbuf **pkts_enq = hqos->pkts_enq; + u32 pkts_enq_len = hqos->pkts_enq_len; + u32 swq_pos = hqos->swq_pos; + u32 n_swq = vec_len (hqos->swq), i; + u32 flush_count = hqos->flush_count; + + for (i = 0; i < n_swq; i++) + { + /* Get current SWQ for this device */ + struct rte_ring *swq = hqos->swq[swq_pos]; + + /* Read SWQ burst to packet buffer of this device */ + pkts_enq_len += rte_ring_sc_dequeue_burst (swq, + (void **) + &pkts_enq[pkts_enq_len], + hqos->hqos_burst_enq); + + /* Get next SWQ for this device */ + swq_pos++; + if (swq_pos >= n_swq) + swq_pos = 0; + hqos->swq_pos = swq_pos; + + /* HWQ TX enqueue when burst available */ + if (pkts_enq_len >= hqos->hqos_burst_enq) + { + u32 n_pkts = rte_eth_tx_burst (device_index, + (uint16_t) queue_id, + pkts_enq, + (uint16_t) pkts_enq_len); + + for (; n_pkts < pkts_enq_len; n_pkts++) + rte_pktmbuf_free (pkts_enq[n_pkts]); + + pkts_enq_len = 0; + flush_count = 0; + break; + } + } + if (pkts_enq_len) + { + flush_count++; + if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) + { + rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); + + pkts_enq_len = 0; + flush_count = 0; + } + } + hqos->pkts_enq_len = pkts_enq_len; + hqos->flush_count = flush_count; + + /* Advance to next device */ + dev_pos++; + } +} + +static_always_inline void +dpdk_hqos_thread_internal (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + u32 cpu_index = vm->cpu_index; + u32 dev_pos; + + dev_pos = 0; + while (1) + { + vlib_worker_thread_barrier_check (); + + u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); + if (PREDICT_FALSE (n_devs == 0)) + { + dev_pos = 0; + continue; + } + if (dev_pos >= n_devs) + dev_pos = 0; + + dpdk_device_and_queue_t *dq = + vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); + + dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; + u32 device_index = xd->device_index; + u16 queue_id = dq->queue_id; + + struct rte_mbuf **pkts_enq = hqos->pkts_enq; + struct rte_mbuf **pkts_deq = hqos->pkts_deq; + u32 pkts_enq_len = hqos->pkts_enq_len; + u32 swq_pos = hqos->swq_pos; + u32 n_swq = vec_len (hqos->swq), i; + u32 flush_count = hqos->flush_count; + + /* + * SWQ dequeue and HQoS enqueue for current device + */ + for (i = 0; i < n_swq; i++) + { + /* Get current SWQ for this device */ + struct rte_ring *swq = hqos->swq[swq_pos]; + + /* Read SWQ burst to packet buffer of this device */ + pkts_enq_len += rte_ring_sc_dequeue_burst (swq, + (void **) + &pkts_enq[pkts_enq_len], + hqos->hqos_burst_enq); + + /* Get next SWQ for this device */ + swq_pos++; + if (swq_pos >= n_swq) + swq_pos = 0; + hqos->swq_pos = swq_pos; + + /* HQoS enqueue when burst available */ + if (pkts_enq_len >= hqos->hqos_burst_enq) + { + rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); + + pkts_enq_len = 0; + flush_count = 0; + break; + } + } + if (pkts_enq_len) + { + flush_count++; + if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) + { + rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); + + pkts_enq_len = 0; + flush_count = 0; + } + } + hqos->pkts_enq_len = pkts_enq_len; + hqos->flush_count = flush_count; + + /* + * HQoS dequeue and HWQ TX enqueue for current device + */ + { + u32 pkts_deq_len, n_pkts; + + pkts_deq_len = rte_sched_port_dequeue (hqos->hqos, + pkts_deq, + hqos->hqos_burst_deq); + + for (n_pkts = 0; n_pkts < pkts_deq_len;) + n_pkts += rte_eth_tx_burst (device_index, + (uint16_t) queue_id, + &pkts_deq[n_pkts], + (uint16_t) (pkts_deq_len - n_pkts)); + } + + /* Advance to next device */ + dev_pos++; + } +} + +void +dpdk_hqos_thread (vlib_worker_thread_t * w) +{ + vlib_main_t *vm; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_main_t *dm = &dpdk_main; + + vm = vlib_get_main (); + + ASSERT (vm->cpu_index == os_get_cpu_number ()); + + clib_time_init (&vm->clib_time); + clib_mem_set_heap (w->thread_mheap); + + /* Wait until the dpdk init sequence is complete */ + while (tm->worker_thread_release == 0) + vlib_worker_thread_barrier_check (); + + if (vec_len (dm->devices_by_hqos_cpu[vm->cpu_index]) == 0) + return + clib_error + ("current I/O TX thread does not have any devices assigned to it"); + + if (DPDK_HQOS_DBG_BYPASS) + dpdk_hqos_thread_internal_hqos_dbg_bypass (vm); + else + dpdk_hqos_thread_internal (vm); +} + +void +dpdk_hqos_thread_fn (void *arg) +{ + vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; + vlib_worker_thread_init (w); + dpdk_hqos_thread (w); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_THREAD (hqos_thread_reg, static) = +{ + .name = "hqos-threads", + .short_name = "hqos-threads", + .function = dpdk_hqos_thread_fn, +}; +/* *INDENT-ON* */ + +/* + * HQoS run-time code to be called by the worker threads + */ +#define BITFIELD(byte_array, slab_pos, slab_mask, slab_shr) \ +({ \ + u64 slab = *((u64 *) &byte_array[slab_pos]); \ + u64 val = (rte_be_to_cpu_64(slab) & slab_mask) >> slab_shr; \ + val; \ +}) + +#define RTE_SCHED_PORT_HIERARCHY(subport, pipe, traffic_class, queue, color) \ + ((((u64) (queue)) & 0x3) | \ + ((((u64) (traffic_class)) & 0x3) << 2) | \ + ((((u64) (color)) & 0x3) << 4) | \ + ((((u64) (subport)) & 0xFFFF) << 16) | \ + ((((u64) (pipe)) & 0xFFFFFFFF) << 32)) + +void +dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, + struct rte_mbuf **pkts, u32 n_pkts) +{ + u32 i; + + for (i = 0; i < (n_pkts & (~0x3)); i += 4) + { + struct rte_mbuf *pkt0 = pkts[i]; + struct rte_mbuf *pkt1 = pkts[i + 1]; + struct rte_mbuf *pkt2 = pkts[i + 2]; + struct rte_mbuf *pkt3 = pkts[i + 3]; + + u8 *pkt0_data = rte_pktmbuf_mtod (pkt0, u8 *); + u8 *pkt1_data = rte_pktmbuf_mtod (pkt1, u8 *); + u8 *pkt2_data = rte_pktmbuf_mtod (pkt2, u8 *); + u8 *pkt3_data = rte_pktmbuf_mtod (pkt3, u8 *); + + u64 pkt0_subport = BITFIELD (pkt0_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt0_pipe = BITFIELD (pkt0_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt0_dscp = BITFIELD (pkt0_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt0_tc = hqos->hqos_tc_table[pkt0_dscp & 0x3F] >> 2; + u32 pkt0_tc_q = hqos->hqos_tc_table[pkt0_dscp & 0x3F] & 0x3; + + u64 pkt1_subport = BITFIELD (pkt1_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt1_pipe = BITFIELD (pkt1_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt1_dscp = BITFIELD (pkt1_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt1_tc = hqos->hqos_tc_table[pkt1_dscp & 0x3F] >> 2; + u32 pkt1_tc_q = hqos->hqos_tc_table[pkt1_dscp & 0x3F] & 0x3; + + u64 pkt2_subport = BITFIELD (pkt2_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt2_pipe = BITFIELD (pkt2_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt2_dscp = BITFIELD (pkt2_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt2_tc = hqos->hqos_tc_table[pkt2_dscp & 0x3F] >> 2; + u32 pkt2_tc_q = hqos->hqos_tc_table[pkt2_dscp & 0x3F] & 0x3; + + u64 pkt3_subport = BITFIELD (pkt3_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt3_pipe = BITFIELD (pkt3_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt3_dscp = BITFIELD (pkt3_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt3_tc = hqos->hqos_tc_table[pkt3_dscp & 0x3F] >> 2; + u32 pkt3_tc_q = hqos->hqos_tc_table[pkt3_dscp & 0x3F] & 0x3; + + u64 pkt0_sched = RTE_SCHED_PORT_HIERARCHY (pkt0_subport, + pkt0_pipe, + pkt0_tc, + pkt0_tc_q, + 0); + u64 pkt1_sched = RTE_SCHED_PORT_HIERARCHY (pkt1_subport, + pkt1_pipe, + pkt1_tc, + pkt1_tc_q, + 0); + u64 pkt2_sched = RTE_SCHED_PORT_HIERARCHY (pkt2_subport, + pkt2_pipe, + pkt2_tc, + pkt2_tc_q, + 0); + u64 pkt3_sched = RTE_SCHED_PORT_HIERARCHY (pkt3_subport, + pkt3_pipe, + pkt3_tc, + pkt3_tc_q, + 0); + + pkt0->hash.sched.lo = pkt0_sched & 0xFFFFFFFF; + pkt0->hash.sched.hi = pkt0_sched >> 32; + pkt1->hash.sched.lo = pkt1_sched & 0xFFFFFFFF; + pkt1->hash.sched.hi = pkt1_sched >> 32; + pkt2->hash.sched.lo = pkt2_sched & 0xFFFFFFFF; + pkt2->hash.sched.hi = pkt2_sched >> 32; + pkt3->hash.sched.lo = pkt3_sched & 0xFFFFFFFF; + pkt3->hash.sched.hi = pkt3_sched >> 32; + } + + for (; i < n_pkts; i++) + { + struct rte_mbuf *pkt = pkts[i]; + + u8 *pkt_data = rte_pktmbuf_mtod (pkt, u8 *); + + u64 pkt_subport = BITFIELD (pkt_data, hqos->hqos_field0_slabpos, + hqos->hqos_field0_slabmask, + hqos->hqos_field0_slabshr); + u64 pkt_pipe = BITFIELD (pkt_data, hqos->hqos_field1_slabpos, + hqos->hqos_field1_slabmask, + hqos->hqos_field1_slabshr); + u64 pkt_dscp = BITFIELD (pkt_data, hqos->hqos_field2_slabpos, + hqos->hqos_field2_slabmask, + hqos->hqos_field2_slabshr); + u32 pkt_tc = hqos->hqos_tc_table[pkt_dscp & 0x3F] >> 2; + u32 pkt_tc_q = hqos->hqos_tc_table[pkt_dscp & 0x3F] & 0x3; + + u64 pkt_sched = RTE_SCHED_PORT_HIERARCHY (pkt_subport, + pkt_pipe, + pkt_tc, + pkt_tc_q, + 0); + + pkt->hash.sched.lo = pkt_sched & 0xFFFFFFFF; + pkt->hash.sched.hi = pkt_sched >> 32; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/hqos/qos_doc.md b/src/plugins/dpdk/hqos/qos_doc.md new file mode 100644 index 00000000..7c064246 --- /dev/null +++ b/src/plugins/dpdk/hqos/qos_doc.md @@ -0,0 +1,411 @@ +# QoS Hierarchical Scheduler {#qos_doc} + +The Quality-of-Service (QoS) scheduler performs egress-traffic management by +prioritizing the transmission of the packets of different type services and +subcribers based on the Service Level Agreements (SLAs). The QoS scheduler can +be enabled on one or more NIC output interfaces depending upon the +requirement. + + +## Overview + +The QoS schdeuler supports a number of scheduling and shaping levels which +construct hierarchical-tree. The first level in the hierarchy is port (i.e. +the physical interface) that constitutes the root node of the tree. The +subsequent level is subport which represents the group of the +users/subscribers. The individual user/subscriber is represented by the pipe +at the next level. Each user can have different traffic type based on the +criteria of specific loss rate, jitter, and latency. These traffic types are +represented at the traffic-class level in the form of different traffic- +classes. The last level contains number of queues which are grouped together +to host the packets of the specific class type traffic. + +The QoS scheduler implementation requires flow classification, enqueue and +dequeue operations. The flow classification is mandatory stage for HQoS where +incoming packets are classified by mapping the packet fields information to +5-tuple (HQoS subport, pipe, traffic class, queue within traffic class, and +color) and storing that information in mbuf sched field. The enqueue operation +uses this information to determine the queue for storing the packet, and at +this stage, if the specific queue is full, QoS drops the packet. The dequeue +operation consists of scheduling the packet based on its length and available +credits, and handing over the scheduled packet to the output interface. + +For more information on QoS Scheduler, please refer DPDK Programmer's Guide- +http://dpdk.org/doc/guides/prog_guide/qos_framework.html + + +### QoS Schdeuler Parameters + +Following illustrates the default HQoS configuration for each 10GbE output +port: + +Single subport (subport 0): + - Subport rate set to 100% of port rate + - Each of the 4 traffic classes has rate set to 100% of port rate + +4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration: + - Pipe rate set to 1/4K of port rate + - Each of the 4 traffic classes has rate set to 100% of pipe rate + - Within each traffic class, the byte-level WRR weights for the 4 queues are set to 1:1:1:1 + + +#### Port configuration + +``` +port { + rate 1250000000 /* Assuming 10GbE port */ + frame_overhead 24 /* Overhead fields per Ethernet frame: + * 7B (Preamble) + + * 1B (Start of Frame Delimiter (SFD)) + + * 4B (Frame Check Sequence (FCS)) + + * 12B (Inter Frame Gap (IFG)) + */ + mtu 1522 /* Assuming Ethernet/IPv4 pkt (FCS not included) */ + n_subports_per_port 1 /* Number of subports per output interface */ + n_pipes_per_subport 4096 /* Number of pipes (users/subscribers) */ + queue_sizes 64 64 64 64 /* Packet queue size for each traffic class. + * All queues within the same pipe traffic class + * have the same size. Queues from different + * pipes serving the same traffic class have + * the same size. */ +} +``` + + +#### Subport configuration + +``` +subport 0 { + tb_rate 1250000000 /* Subport level token bucket rate (bytes per second) */ + tb_size 1000000 /* Subport level token bucket size (bytes) */ + tc0_rate 1250000000 /* Subport level token bucket rate for traffic class 0 (bytes per second) */ + tc1_rate 1250000000 /* Subport level token bucket rate for traffic class 1 (bytes per second) */ + tc2_rate 1250000000 /* Subport level token bucket rate for traffic class 2 (bytes per second) */ + tc3_rate 1250000000 /* Subport level token bucket rate for traffic class 3 (bytes per second) */ + tc_period 10 /* Time interval for refilling the token bucket associated with traffic class (Milliseconds) */ + pipe 0 4095 profile 0 /* pipes (users/subscribers) configured with pipe profile 0 */ +} +``` + + +#### Pipe configuration + +``` +pipe_profile 0 { + tb_rate 305175 /* Pipe level token bucket rate (bytes per second) */ + tb_size 1000000 /* Pipe level token bucket size (bytes) */ + tc0_rate 305175 /* Pipe level token bucket rate for traffic class 0 (bytes per second) */ + tc1_rate 305175 /* Pipe level token bucket rate for traffic class 1 (bytes per second) */ + tc2_rate 305175 /* Pipe level token bucket rate for traffic class 2 (bytes per second) */ + tc3_rate 305175 /* Pipe level token bucket rate for traffic class 3 (bytes per second) */ + tc_period 40 /* Time interval for refilling the token bucket associated with traffic class at pipe level (Milliseconds) */ + tc3_oversubscription_weight 1 /* Weight traffic class 3 oversubscription */ + tc0_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 0 */ + tc1_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 1 */ + tc2_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 2 */ + tc3_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 3 */ +} +``` + + +#### Random Early Detection (RED) parameters per traffic class and color (Green / Yellow / Red) + +``` +red { + tc0_wred_min 48 40 32 /* Minimum threshold for traffic class 0 queue (min_th) in number of packets */ + tc0_wred_max 64 64 64 /* Maximum threshold for traffic class 0 queue (max_th) in number of packets */ + tc0_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 0 queue (maxp = 1 / maxp_inv) */ + tc0_wred_weight 9 9 9 /* Traffic Class 0 queue weight */ + tc1_wred_min 48 40 32 /* Minimum threshold for traffic class 1 queue (min_th) in number of packets */ + tc1_wred_max 64 64 64 /* Maximum threshold for traffic class 1 queue (max_th) in number of packets */ + tc1_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 1 queue (maxp = 1 / maxp_inv) */ + tc1_wred_weight 9 9 9 /* Traffic Class 1 queue weight */ + tc2_wred_min 48 40 32 /* Minimum threshold for traffic class 2 queue (min_th) in number of packets */ + tc2_wred_max 64 64 64 /* Maximum threshold for traffic class 2 queue (max_th) in number of packets */ + tc2_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 2 queue (maxp = 1 / maxp_inv) */ + tc2_wred_weight 9 9 9 /* Traffic Class 2 queue weight */ + tc3_wred_min 48 40 32 /* Minimum threshold for traffic class 3 queue (min_th) in number of packets */ + tc3_wred_max 64 64 64 /* Maximum threshold for traffic class 3 queue (max_th) in number of packets */ + tc3_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 3 queue (maxp = 1 / maxp_inv) */ + tc3_wred_weight 9 9 9 /* Traffic Class 3 queue weight */ +} +``` + + +### DPDK QoS Scheduler Integration in VPP + +The Hierarchical Quaity-of-Service (HQoS) scheduler object could be seen as +part of the logical NIC output interface. To enable HQoS on specific output +interface, vpp startup.conf file has to be configured accordingly. The output +interface that requires HQoS, should have "hqos" parameter specified in dpdk +section. Another optional parameter "hqos-thread" has been defined which can +be used to associate the output interface with specific hqos thread. In cpu +section of the config file, "corelist-hqos-threads" is introduced to assign +logical cpu cores to run the HQoS threads. A HQoS thread can run multiple HQoS +objects each associated with different output interfaces. All worker threads +instead of writing packets to NIC TX queue directly, write the packets to a +software queues. The hqos_threads read the software queues, and enqueue the +packets to HQoS objects, as well as dequeue packets from HQOS objects and +write them to NIC output interfaces. The worker threads need to be able to +send the packets to any output interface, therefore, each HQoS object +associated with NIC output interface should have software queues equal to +worker threads count. + +Following illustrates the sample startup configuration file with 4x worker +threads feeding 2x hqos threads that handle each QoS scheduler for 1x output +interface. + +``` +dpdk { + socket-mem 16384,16384 + + dev 0000:02:00.0 { + num-rx-queues 2 + hqos + } + dev 0000:06:00.0 { + num-rx-queues 2 + hqos + } + + num-mbufs 1000000 +} + +cpu { + main-core 0 + corelist-workers 1, 2, 3, 4 + corelist-hqos-threads 5, 6 +} +``` + + +### QoS scheduler CLI Commands + +Each QoS scheduler instance is initialised with default parameters required to +configure hqos port, subport, pipe and queues. Some of the parameters can be +re-configured in run-time through CLI commands. + + +#### Configuration + +Following commands can be used to configure QoS scheduler parameters. + +The command below can be used to set the subport level parameters such as +token bucket rate (bytes per seconds), token bucket size (bytes), traffic +class rates (bytes per seconds) and token update period (Milliseconds). + +``` +set dpdk interface hqos subport subport [rate ] + [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] +``` + +For setting the pipe profile, following command can be used. + +``` +set dpdk interface hqos pipe subport pipe + profile +``` + +To assign QoS scheduler instance to the specific thread, following command can +be used. + +``` +set dpdk interface hqos placement thread +``` + +The command below is used to set the packet fields required for classifiying +the incoming packet. As a result of classification process, packet field +information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, +color) and stored in packet mbuf. + +``` +set dpdk interface hqos pktfield id subport|pipe|tc offset + mask +``` + +The DSCP table entries used for idenfiying the traffic class and queue can be set using the command below; + +``` +set dpdk interface hqos tctbl entry tc queue +``` + + +#### Show Command + +The QoS Scheduler configuration can displayed using the command below. + +``` + vpp# show dpdk interface hqos TenGigabitEthernet2/0/0 + Thread: + Input SWQ size = 4096 packets + Enqueue burst size = 256 packets + Dequeue burst size = 220 packets + Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) + Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) + Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) + Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) + [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 + Port: + Rate = 1250000000 bytes/second + MTU = 1514 bytes + Frame overhead = 24 bytes + Number of subports = 1 + Number of pipes per subport = 4096 + Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets + Number of pipe profiles = 1 + Subport 0: + Rate = 120000000 bytes/second + Token bucket size = 1000000 bytes + Traffic class rate: TC0 = 120000000, TC1 = 120000000, TC2 = 120000000, TC3 = 120000000 bytes/second + TC period = 10 milliseconds + Pipe profile 0: + Rate = 305175 bytes/second + Token bucket size = 1000000 bytes + Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second + TC period = 40 milliseconds + TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 + TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 +``` + +The QoS Scheduler placement over the logical cpu cores can be displayed using +below command. + +``` + vpp# show dpdk interface hqos placement + Thread 5 (vpp_hqos-threads_0 at lcore 5): + TenGigabitEthernet2/0/0 queue 0 + Thread 6 (vpp_hqos-threads_1 at lcore 6): + TenGigabitEthernet4/0/1 queue 0 +``` + + +### QoS Scheduler Binary APIs + +This section explans the available binary APIs for configuring QoS scheduler +parameters in run-time. + +The following API can be used to set the pipe profile of a pipe that belongs +to a given subport: + +``` +sw_interface_set_dpdk_hqos_pipe rx | sw_if_index + subport pipe profile +``` + +The data structures used for set the pipe profile parameter are as follows; + +``` + /** \\brief DPDK interface HQoS pipe profile set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param pipe - pipe ID within its subport + @param profile - pipe profile ID + */ + define sw_interface_set_dpdk_hqos_pipe { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 pipe; + u32 profile; + }; + + /** \\brief DPDK interface HQoS pipe profile set reply + @param context - sender context, to match reply w/ request + @param retval - request return code + */ + define sw_interface_set_dpdk_hqos_pipe_reply { + u32 context; + i32 retval; + }; +``` + +The following API can be used to set the subport level parameters, for +example- token bucket rate (bytes per seconds), token bucket size (bytes), +traffic class rate (bytes per seconds) and tokens update period. + +``` +sw_interface_set_dpdk_hqos_subport rx | sw_if_index + subport [rate ] [bktsize ] + [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] +``` + +The data structures used for set the subport level parameter are as follows; + +``` + /** \\brief DPDK interface HQoS subport parameters set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param subport - subport ID + @param tb_rate - subport token bucket rate (measured in bytes/second) + @param tb_size - subport token bucket size (measured in credits) + @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) + @param tc_period - enforcement period for rates (measured in milliseconds) + */ + define sw_interface_set_dpdk_hqos_subport { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 subport; + u32 tb_rate; + u32 tb_size; + u32 tc_rate[4]; + u32 tc_period; + }; + + /** \\brief DPDK interface HQoS subport parameters set reply + @param context - sender context, to match reply w/ request + @param retval - request return code + */ + define sw_interface_set_dpdk_hqos_subport_reply { + u32 context; + i32 retval; + }; +``` + +The following API can be used set the DSCP table entry. The DSCP table have +64 entries to map the packet DSCP field onto traffic class and hqos input +queue. + +``` +sw_interface_set_dpdk_hqos_tctbl rx | sw_if_index + entry tc queue +``` + +The data structures used for setting DSCP table entries are given below. + +``` + /** \\brief DPDK interface HQoS tctbl entry set request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface + @param entry - entry index ID + @param tc - traffic class (0 .. 3) + @param queue - traffic class queue (0 .. 3) + */ + define sw_interface_set_dpdk_hqos_tctbl { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 entry; + u32 tc; + u32 queue; + }; + + /** \\brief DPDK interface HQoS tctbl entry set reply + @param context - sender context, to match reply w/ request + @param retval - request return code + */ + define sw_interface_set_dpdk_hqos_tctbl_reply { + u32 context; + i32 retval; + }; +``` diff --git a/src/plugins/dpdk/init.c b/src/plugins/dpdk/init.c new file mode 100755 index 00000000..e009ef3e --- /dev/null +++ b/src/plugins/dpdk/init.c @@ -0,0 +1,2074 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +dpdk_main_t dpdk_main; + +#include +#include + +/* define message IDs */ +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include +#undef vl_api_version + +/* Macro to finish up custom dump fns */ +#define FINISH \ + vec_add1 (s, 0); \ + vl_print (handle, (char *)s); \ + vec_free (s); \ + return handle; + +#include + +static void + vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + u32 pipe = ntohl (mp->pipe); + u32 profile = ntohl (mp->profile); + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_pipe "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = format (s, "subport %u pipe %u profile %u ", + ntohl (mp->subport), ntohl (mp->pipe), ntohl (mp->profile)); + + FINISH; +} + +static void + vl_api_sw_interface_set_dpdk_hqos_subport_t_handler + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + struct rte_sched_subport_params p; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + p.tb_rate = ntohl (mp->tb_rate); + p.tb_size = ntohl (mp->tb_size); + p.tc_rate[0] = ntohl (mp->tc_rate[0]); + p.tc_rate[1] = ntohl (mp->tc_rate[1]); + p.tc_rate[2] = ntohl (mp->tc_rate[2]); + p.tc_rate[3] = ntohl (mp->tc_rate[3]); + p.tc_period = ntohl (mp->tc_period); + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_subport_t_print + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_subport "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = + format (s, + "subport %u rate %u bkt_size %u tc0 %u tc1 %u tc2 %u tc3 %u period %u", + ntohl (mp->subport), ntohl (mp->tb_rate), ntohl (mp->tb_size), + ntohl (mp->tc_rate[0]), ntohl (mp->tc_rate[1]), + ntohl (mp->tc_rate[2]), ntohl (mp->tc_rate[3]), + ntohl (mp->tc_period)); + + FINISH; +} + +static void + vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 entry = ntohl (mp->entry); + u32 tc = ntohl (mp->tc); + u32 queue = ntohl (mp->queue); + u32 val, i; + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + { + clib_warning ("invalid traffic class !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + { + clib_warning ("invalid queue !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + + if (p == 0) + { + clib_warning ("worker thread registration AWOL !!"); + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto done; + } + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + for (i = 0; i < worker_thread_count; i++) + xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; + + BAD_SW_IF_INDEX_LABEL; +done: + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_tctbl "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = format (s, "entry %u tc %u queue %u", + ntohl (mp->entry), ntohl (mp->tc), ntohl (mp->queue)); + + FINISH; +} + +#define foreach_dpdk_plugin_api_msg \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) + +/* Set up the API message handling tables */ +static clib_error_t * +dpdk_plugin_api_hookup (vlib_main_t * vm) +{ + dpdk_main_t *dm __attribute__ ((unused)) = &dpdk_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + dm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_dpdk_plugin_api_msg; +#undef _ + return 0; +} + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (dpdk_main_t * dm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + dm->msg_id_base); + foreach_vl_msg_name_crc_dpdk; +#undef _ +} + +// TODO +/* +static void plugin_custom_dump_configure (dpdk_main_t * dm) +{ +#define _(n,f) dm->api_main->msg_print_handlers \ + [VL_API_##n + dm->msg_id_base] \ + = (void *) vl_api_##f##_t_print; + foreach_dpdk_plugin_api_msg; +#undef _ +} +*/ +/* force linker to link functions used by vlib and declared weak */ +void *vlib_weakly_linked_functions[] = { + &rte_pktmbuf_init, + &rte_pktmbuf_pool_init, +}; + +#define LINK_STATE_ELOGS 0 + +#define DEFAULT_HUGE_DIR "/run/vpp/hugepages" +#define VPP_RUN_DIR "/run/vpp" + +/* Port configuration, mildly modified Intel app values */ + +static struct rte_eth_conf port_conf_template = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +clib_error_t * +dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) +{ + int rv; + int j; + + ASSERT (os_get_cpu_number () == 0); + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0); + rte_eth_dev_stop (xd->device_index); + } + + rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, + xd->tx_q_used, &xd->port_conf); + + if (rv < 0) + return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", + xd->device_index, rv); + + /* Set up one TX-queue per worker thread */ + for (j = 0; j < xd->tx_q_used; j++) + { + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + xd->cpu_socket, &xd->tx_conf); + + /* retry with any other CPU socket */ + if (rv < 0) + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + SOCKET_ID_ANY, &xd->tx_conf); + if (rv < 0) + break; + } + + if (rv < 0) + return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", + xd->device_index, rv); + + for (j = 0; j < xd->rx_q_used; j++) + { + + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + xd->cpu_socket, 0, + dm-> + pktmbuf_pools[xd->cpu_socket_id_by_queue + [j]]); + + /* retry with any other CPU socket */ + if (rv < 0) + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + SOCKET_ID_ANY, 0, + dm-> + pktmbuf_pools[xd->cpu_socket_id_by_queue + [j]]); + if (rv < 0) + return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", + xd->device_index, rv); + } + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + int rv; + rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + if (rv < 0) + clib_warning ("rte_eth_dev_start %d returned %d", + xd->device_index, rv); + } + return 0; +} + +static u32 +dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + u32 old = 0; + + if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC (flags)) + { + old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0; + + if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) + xd->flags |= DPDK_DEVICE_FLAG_PROMISC; + else + xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC; + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) + rte_eth_promiscuous_enable (xd->device_index); + else + rte_eth_promiscuous_disable (xd->device_index); + } + } + else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags)) + { + /* + * DAW-FIXME: The Cisco VIC firmware does not provide an api for a + * driver to dynamically change the mtu. If/when the + * VIC firmware gets fixed, then this should be removed. + */ + if (xd->pmd == VNET_DPDK_PMD_ENIC) + { + struct rte_eth_dev_info dev_info; + + /* + * Restore mtu to what has been set by CIMC in the firmware cfg. + */ + rte_eth_dev_info_get (xd->device_index, &dev_info); + hi->max_packet_bytes = dev_info.max_rx_pktlen; + + vlib_cli_output (vlib_get_main (), + "Cisco VIC mtu can only be changed " + "using CIMC then rebooting the server!"); + } + else + { + int rv; + + xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + rte_eth_dev_stop (xd->device_index); + + rv = rte_eth_dev_configure + (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); + + if (rv < 0) + vlib_cli_output (vlib_get_main (), + "rte_eth_dev_configure[%d]: err %d", + xd->device_index, rv); + + rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + int rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + if (rv < 0) + clib_warning ("rte_eth_dev_start %d returned %d", + xd->device_index, rv); + } + } + } + return old; +} + +void +dpdk_device_lock_init (dpdk_device_t * xd) +{ + int q; + vec_validate (xd->lockp, xd->tx_q_used - 1); + for (q = 0; q < xd->tx_q_used; q++) + { + xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES); + } +} + +void +dpdk_device_lock_free (dpdk_device_t * xd) +{ + int q; + + for (q = 0; q < vec_len (xd->lockp); q++) + clib_mem_free ((void *) xd->lockp[q]); + vec_free (xd->lockp); + xd->lockp = 0; +} + +static clib_error_t * +dpdk_lib_init (dpdk_main_t * dm) +{ + u32 nports; + u32 nb_desc = 0; + int i; + clib_error_t *error; + vlib_main_t *vm = vlib_get_main (); + vlib_thread_main_t *tm = vlib_get_thread_main (); + vnet_sw_interface_t *sw; + vnet_hw_interface_t *hi; + dpdk_device_t *xd; + vlib_pci_addr_t last_pci_addr; + u32 last_pci_addr_port = 0; + vlib_thread_registration_t *tr, *tr_hqos; + uword *p, *p_hqos; + + u32 next_cpu = 0, next_hqos_cpu = 0; + u8 af_packet_port_id = 0; + last_pci_addr.as_u32 = ~0; + + dm->input_cpu_first_index = 0; + dm->input_cpu_count = 1; + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + dm->input_cpu_first_index = tr->first_index; + dm->input_cpu_count = tr->count; + } + + vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + dm->hqos_cpu_first_index = 0; + dm->hqos_cpu_count = 0; + + /* find out which cpus will be used for I/O TX */ + p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads"); + tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0; + + if (tr_hqos && tr_hqos->count > 0) + { + dm->hqos_cpu_first_index = tr_hqos->first_index; + dm->hqos_cpu_count = tr_hqos->count; + } + + vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + nports = rte_eth_dev_count (); + if (nports < 1) + { + clib_warning ("DPDK drivers found no ports..."); + } + + if (CLIB_DEBUG > 0) + clib_warning ("DPDK drivers found %d ports...", nports); + + /* + * All buffers are all allocated from the same rte_mempool. + * Thus they all have the same number of data bytes. + */ + dm->vlib_buffer_free_list_index = + vlib_buffer_get_or_create_free_list (vm, + VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, + "dpdk rx"); + + if (dm->conf->enable_tcp_udp_checksum) + dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT + | IP_BUFFER_L4_CHECKSUM_COMPUTED); + + for (i = 0; i < nports; i++) + { + u8 addr[6]; + u8 vlan_strip = 0; + int j; + struct rte_eth_dev_info dev_info; + clib_error_t *rv; + struct rte_eth_link l; + dpdk_device_config_t *devconf = 0; + vlib_pci_addr_t pci_addr; + uword *p = 0; + + rte_eth_dev_info_get (i, &dev_info); + if (dev_info.pci_dev) /* bonded interface has no pci info */ + { + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + p = + hash_get (dm->conf->device_config_index_by_pci_addr, + pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + /* Create vnet interface */ + vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); + xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; + xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; + xd->cpu_socket = (i8) rte_eth_dev_socket_id (i); + + /* Handle interface naming for devices with multiple ports sharing same PCI ID */ + if (dev_info.pci_dev) + { + struct rte_eth_dev_info di = { 0 }; + rte_eth_dev_info_get (i + 1, &di); + if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 && + memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr, + sizeof (struct rte_pci_addr)) == 0) + { + xd->interface_name_suffix = format (0, "0"); + last_pci_addr.as_u32 = pci_addr.as_u32; + last_pci_addr_port = i; + } + else if (pci_addr.as_u32 == last_pci_addr.as_u32) + { + xd->interface_name_suffix = + format (0, "%u", i - last_pci_addr_port); + } + else + { + last_pci_addr.as_u32 = ~0; + } + } + else + last_pci_addr.as_u32 = ~0; + + clib_memcpy (&xd->tx_conf, &dev_info.default_txconf, + sizeof (struct rte_eth_txconf)); + if (dm->conf->no_multi_seg) + { + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 0; + } + else + { + xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 1; + xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG; + } + + clib_memcpy (&xd->port_conf, &port_conf_template, + sizeof (struct rte_eth_conf)); + + xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains); + + if (devconf->num_tx_queues > 0 + && devconf->num_tx_queues < xd->tx_q_used) + xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues); + + if (devconf->num_rx_queues > 1 && dm->use_rss == 0) + { + dm->use_rss = 1; + } + + if (devconf->num_rx_queues > 1 + && dev_info.max_rx_queues >= devconf->num_rx_queues) + { + xd->rx_q_used = devconf->num_rx_queues; + xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; + if (devconf->rss_fn == 0) + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = + ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; + else + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn; + } + else + xd->rx_q_used = 1; + + xd->flags |= DPDK_DEVICE_FLAG_PMD; + + /* workaround for drivers not setting driver_name */ + if ((!dev_info.driver_name) && (dev_info.pci_dev)) + dev_info.driver_name = dev_info.pci_dev->driver->driver.name; + + ASSERT (dev_info.driver_name); + + if (!xd->pmd) + { + + +#define _(s,f) else if (dev_info.driver_name && \ + !strcmp(dev_info.driver_name, s)) \ + xd->pmd = VNET_DPDK_PMD_##f; + if (0) + ; + foreach_dpdk_pmd +#undef _ + else + xd->pmd = VNET_DPDK_PMD_UNKNOWN; + + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; + xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; + + switch (xd->pmd) + { + /* 1G adapters */ + case VNET_DPDK_PMD_E1000EM: + case VNET_DPDK_PMD_IGB: + case VNET_DPDK_PMD_IGBVF: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + break; + + /* 10G adapters */ + case VNET_DPDK_PMD_IXGBE: + case VNET_DPDK_PMD_IXGBEVF: + case VNET_DPDK_PMD_THUNDERX: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + case VNET_DPDK_PMD_DPAA2: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + + /* Cisco VIC */ + case VNET_DPDK_PMD_ENIC: + rte_eth_link_get_nowait (i, &l); + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + if (l.link_speed == 40000) + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + else + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + + /* Intel Fortville */ + case VNET_DPDK_PMD_I40E: + case VNET_DPDK_PMD_I40EVF: + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + + switch (dev_info.pci_dev->id.device_id) + { + case I40E_DEV_ID_10G_BASE_T: + case I40E_DEV_ID_SFP_XL710: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + case I40E_DEV_ID_QSFP_A: + case I40E_DEV_ID_QSFP_B: + case I40E_DEV_ID_QSFP_C: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case I40E_DEV_ID_VF: + rte_eth_link_get_nowait (i, &l); + xd->port_type = l.link_speed == 10000 ? + VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; + break; + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + case VNET_DPDK_PMD_CXGBE: + switch (dev_info.pci_dev->id.device_id) + { + case 0x540d: /* T580-CR */ + case 0x5410: /* T580-LP-cr */ + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case 0x5403: /* T540-CR */ + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + case VNET_DPDK_PMD_MLX5: + { + char *pn_100g[] = { "MCX415A-CCAT", "MCX416A-CCAT", 0 }; + char *pn_40g[] = { "MCX413A-BCAT", "MCX414A-BCAT", + "MCX415A-BCAT", "MCX416A-BCAT", "MCX4131A-BCAT", 0 + }; + char *pn_10g[] = { "MCX4111A-XCAT", "MCX4121A-XCAT", 0 }; + + vlib_pci_device_t *pd = vlib_get_pci_device (&pci_addr); + u8 *pn = 0; + char **c; + int found = 0; + pn = format (0, "%U%c", + format_vlib_pci_vpd, pd->vpd_r, "PN", 0); + + if (!pn) + break; + + c = pn_100g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_100G; + break; + } + c++; + } + + c = pn_40g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + } + c++; + } + + c = pn_10g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + } + c++; + } + + vec_free (pn); + } + + break; + /* Intel Red Rock Canyon */ + case VNET_DPDK_PMD_FM10K: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; + break; + + /* virtio */ + case VNET_DPDK_PMD_VIRTIO: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO; + xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO; + break; + + /* vmxnet3 */ + case VNET_DPDK_PMD_VMXNET3: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + break; + + case VNET_DPDK_PMD_AF_PACKET: + xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET; + xd->af_packet_port_id = af_packet_port_id++; + break; + + case VNET_DPDK_PMD_BOND: + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; + break; + + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + + if (devconf->num_rx_desc) + xd->nb_rx_desc = devconf->num_rx_desc; + + if (devconf->num_tx_desc) + xd->nb_tx_desc = devconf->num_tx_desc; + } + + /* + * Ensure default mtu is not > the mtu read from the hardware. + * Otherwise rte_eth_dev_configure() will fail and the port will + * not be available. + */ + if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen) + { + /* + * This device does not support the platforms's max frame + * size. Use it's advertised mru instead. + */ + xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen; + } + else + { + xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES; + + /* + * Some platforms do not account for Ethernet FCS (4 bytes) in + * MTU calculations. To interop with them increase mru but only + * if the device's settings can support it. + */ + if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) && + xd->port_conf.rxmode.hw_strip_crc) + { + /* + * Allow additional 4 bytes (for Ethernet FCS). These bytes are + * stripped by h/w and so will not consume any buffer memory. + */ + xd->port_conf.rxmode.max_rx_pkt_len += 4; + } + } + + if (xd->pmd == VNET_DPDK_PMD_AF_PACKET) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + clib_memcpy (addr + 2, &rnd, sizeof (rnd)); + addr[0] = 2; + addr[1] = 0xfe; + } + else + rte_eth_macaddr_get (i, (struct ether_addr *) addr); + + if (xd->tx_q_used < tm->n_vlib_mains) + dpdk_device_lock_init (xd); + + xd->device_index = xd - dm->devices; + ASSERT (i == xd->device_index); + xd->per_interface_next_index = ~0; + + /* assign interface to input thread */ + dpdk_device_and_queue_t *dq; + int q; + + if (devconf->workers) + { + int i; + q = 0; + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, devconf->workers, ({ + int cpu = dm->input_cpu_first_index + i; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; + vec_validate(xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q++; + })); + /* *INDENT-ON* */ + } + else + for (q = 0; q < xd->rx_q_used; q++) + { + int cpu = dm->input_cpu_first_index + next_cpu; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; + + /* + * numa node for worker thread handling this queue + * needed for taking buffers from the right mempool + */ + vec_validate (xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore); + + /* + * construct vector of (device,queue) pairs for each worker thread + */ + vec_add2 (dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q; + + next_cpu++; + if (next_cpu == dm->input_cpu_count) + next_cpu = 0; + } + + + if (devconf->hqos_enabled) + { + xd->flags |= DPDK_DEVICE_FLAG_HQOS; + + if (devconf->hqos.hqos_thread_valid) + { + int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread; + + if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count) + return clib_error_return (0, "invalid HQoS thread index"); + + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + } + else + { + int cpu = dm->hqos_cpu_first_index + next_hqos_cpu; + + if (dm->hqos_cpu_count == 0) + return clib_error_return (0, "no HQoS threads available"); + + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + + next_hqos_cpu++; + if (next_hqos_cpu == dm->hqos_cpu_count) + next_hqos_cpu = 0; + + devconf->hqos.hqos_thread_valid = 1; + devconf->hqos.hqos_thread = cpu; + } + } + + vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < tm->n_vlib_mains; j++) + { + vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc, + sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } + + vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < xd->rx_q_used; j++) + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } + + vec_validate_aligned (xd->d_trace_buffers, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + + rv = dpdk_port_setup (dm, xd); + + if (rv) + return rv; + + if (devconf->hqos_enabled) + { + rv = dpdk_port_setup_hqos (xd, &devconf->hqos); + if (rv) + return rv; + } + + /* count the number of descriptors used for this device */ + nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; + + error = ethernet_register_interface + (dm->vnet_main, dpdk_device_class.index, xd->device_index, + /* ethernet address */ addr, + &xd->vlib_hw_if_index, dpdk_flag_change); + if (error) + return error; + + sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); + xd->vlib_sw_if_index = sw->sw_if_index; + hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); + + /* + * DAW-FIXME: The Cisco VIC firmware does not provide an api for a + * driver to dynamically change the mtu. If/when the + * VIC firmware gets fixed, then this should be removed. + */ + if (xd->pmd == VNET_DPDK_PMD_ENIC) + { + /* + * Initialize mtu to what has been set by CIMC in the firmware cfg. + */ + hi->max_packet_bytes = dev_info.max_rx_pktlen; + if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF) + vlan_strip = 1; /* remove vlan tag from VIC port by default */ + else + clib_warning ("VLAN strip disabled for interface\n"); + } + else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON) + vlan_strip = 1; + + if (vlan_strip) + { + int vlan_off; + vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); + vlan_off |= ETH_VLAN_STRIP_OFFLOAD; + xd->port_conf.rxmode.hw_vlan_strip = vlan_off; + if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0) + clib_warning ("VLAN strip enabled for interface\n"); + else + clib_warning ("VLAN strip cannot be supported by interface\n"); + } + + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = + xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t); + + rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); + } + + if (nb_desc > dm->conf->num_mbufs) + clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", + dm->conf->num_mbufs, nb_desc); + + return 0; +} + +static void +dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) +{ + vlib_pci_main_t *pm = &pci_main; + clib_error_t *error; + vlib_pci_device_t *d; + u8 *pci_addr = 0; + int num_whitelisted = vec_len (conf->dev_confs); + + /* *INDENT-OFF* */ + pool_foreach (d, pm->pci_devs, ({ + dpdk_device_config_t * devconf = 0; + vec_reset_length (pci_addr); + pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); + + if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO) + continue; + + if (num_whitelisted) + { + uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32); + + if (!p) + continue; + + devconf = pool_elt_at_index (conf->dev_confs, p[0]); + } + + /* virtio */ + if (d->vendor_id == 0x1af4 && d->device_id == 0x1000) + ; + /* vmxnet3 */ + else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0) + ; + /* all Intel devices */ + else if (d->vendor_id == 0x8086) + ; + /* Cisco VIC */ + else if (d->vendor_id == 0x1137 && d->device_id == 0x0043) + ; + /* Chelsio T4/T5 */ + else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000) + ; + else + { + clib_warning ("Unsupported Ethernet PCI device 0x%04x:0x%04x found " + "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id, + pci_addr); + continue; + } + + error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name); + + if (error) + { + if (devconf == 0) + { + pool_get (conf->dev_confs, devconf); + hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32, + devconf - conf->dev_confs); + devconf->pci_addr.as_u32 = d->bus_address.as_u32; + } + devconf->is_blacklisted = 1; + clib_error_report (error); + } + })); + /* *INDENT-ON* */ + vec_free (pci_addr); +} + +static clib_error_t * +dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, + unformat_input_t * input, u8 is_default) +{ + clib_error_t *error = 0; + uword *p; + dpdk_device_config_t *devconf; + unformat_input_t sub_input; + + if (is_default) + { + devconf = &conf->default_devconf; + } + else + { + p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32); + + if (!p) + { + pool_get (conf->dev_confs, devconf); + hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32, + devconf - conf->dev_confs); + } + else + return clib_error_return (0, + "duplicate configuration for PCI address %U", + format_vlib_pci_addr, &pci_addr); + } + + devconf->pci_addr.as_u32 = pci_addr.as_u32; + devconf->hqos_enabled = 0; + dpdk_device_config_hqos_default (&devconf->hqos); + + if (!input) + return 0; + + unformat_skip_white_space (input); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues)) + ; + else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues)) + ; + else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc)) + ; + else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc)) + ; + else if (unformat (input, "workers %U", unformat_bitmap_list, + &devconf->workers)) + ; + else + if (unformat + (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input)) + { + error = unformat_rss_fn (&sub_input, &devconf->rss_fn); + if (error) + break; + } + else if (unformat (input, "vlan-strip-offload off")) + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; + else if (unformat (input, "vlan-strip-offload on")) + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; + else + if (unformat + (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input)) + { + devconf->hqos_enabled = 1; + error = unformat_hqos (&sub_input, &devconf->hqos); + if (error) + break; + } + else if (unformat (input, "hqos")) + { + devconf->hqos_enabled = 1; + } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + break; + } + } + + if (error) + return error; + + if (devconf->workers && devconf->num_rx_queues == 0) + devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers); + else if (devconf->workers && + clib_bitmap_count_set_bits (devconf->workers) != + devconf->num_rx_queues) + error = + clib_error_return (0, + "%U: number of worker threadds must be " + "equal to number of rx queues", format_vlib_pci_addr, + &pci_addr); + + return error; +} + +static clib_error_t * +dpdk_config (vlib_main_t * vm, unformat_input_t * input) +{ + clib_error_t *error = 0; + dpdk_main_t *dm = &dpdk_main; + dpdk_config_main_t *conf = &dpdk_config_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_config_t *devconf; + vlib_pci_addr_t pci_addr; + unformat_input_t sub_input; + u8 *s, *tmp = 0; + u8 *rte_cmd = 0, *ethname = 0; + u32 log_level; + int ret, i; + int num_whitelisted = 0; + u8 no_pci = 0; + u8 no_huge = 0; + u8 huge_dir = 0; + u8 file_prefix = 0; + u8 *socket_mem = 0; + + conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + /* Prime the pump */ + if (unformat (input, "no-hugetlb")) + { + vec_add1 (conf->eal_init_args, (u8 *) "no-huge"); + no_huge = 1; + } + + else if (unformat (input, "enable-tcp-udp-checksum")) + conf->enable_tcp_udp_checksum = 1; + + else if (unformat (input, "decimal-interface-names")) + conf->interface_name_format_decimal = 1; + + else if (unformat (input, "no-multi-seg")) + conf->no_multi_seg = 1; + + else if (unformat (input, "enable-cryptodev")) + conf->cryptodev = 1; + + else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input, + &sub_input)) + { + error = + dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input, + 1); + + if (error) + return error; + } + else + if (unformat + (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr, + unformat_vlib_cli_sub_input, &sub_input)) + { + error = dpdk_device_config (conf, pci_addr, &sub_input, 0); + + if (error) + return error; + + num_whitelisted++; + } + else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr)) + { + error = dpdk_device_config (conf, pci_addr, 0, 0); + + if (error) + return error; + + num_whitelisted++; + } + else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) + ; + else if (unformat (input, "kni %d", &conf->num_kni)) + ; + else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) + ; + else if (unformat (input, "socket-mem %s", &socket_mem)) + ; + else if (unformat (input, "no-pci")) + { + no_pci = 1; + tmp = format (0, "--no-pci%c", 0); + vec_add1 (conf->eal_init_args, tmp); + } + else if (unformat (input, "poll-sleep %d", &dm->poll_sleep)) + ; + +#define _(a) \ + else if (unformat(input, #a)) \ + { \ + tmp = format (0, "--%s%c", #a, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + } + foreach_eal_double_hyphen_predicate_arg +#undef _ +#define _(a) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + if (!strncmp(#a, "huge-dir", 8)) \ + huge_dir = 1; \ + else if (!strncmp(#a, "file-prefix", 11)) \ + file_prefix = 1; \ + tmp = format (0, "--%s%c", #a, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + if (!strncmp(#a, "vdev", 4)) \ + if (strstr((char*)s, "af_packet")) \ + clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \ + vec_add1 (conf->eal_init_args, s); \ + } + foreach_eal_double_hyphen_arg +#undef _ +#define _(a,b) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + tmp = format (0, "-%s%c", #b, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (conf->eal_init_args, s); \ + } + foreach_eal_single_hyphen_arg +#undef _ +#define _(a,b) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + tmp = format (0, "-%s%c", #b, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (conf->eal_init_args, s); \ + conf->a##_set_manually = 1; \ + } + foreach_eal_single_hyphen_mandatory_arg +#undef _ + else if (unformat (input, "default")) + ; + + else if (unformat_skip_white_space (input)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (!conf->uio_driver_name) + conf->uio_driver_name = format (0, "uio_pci_generic%c", 0); + + /* + * Use 1G huge pages if available. + */ + if (!no_huge && !huge_dir) + { + u32 x, *mem_by_socket = 0; + uword c = 0; + u8 use_1g = 1; + u8 use_2m = 1; + u8 less_than_1g = 1; + int rv; + + umount (DEFAULT_HUGE_DIR); + + /* Process "socket-mem" parameter value */ + if (vec_len (socket_mem)) + { + unformat_input_t in; + unformat_init_vector (&in, socket_mem); + while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT) + { + if (unformat (&in, "%u,", &x)) + ; + else if (unformat (&in, "%u", &x)) + ; + else if (unformat (&in, ",")) + x = 0; + else + break; + + vec_add1 (mem_by_socket, x); + + if (x > 1023) + less_than_1g = 0; + } + /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ + unformat_free (&in); + socket_mem = 0; + } + else + { + /* *INDENT-OFF* */ + clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( + { + vec_validate(mem_by_socket, c); + mem_by_socket[c] = 256; /* default per-socket mem */ + } + )); + /* *INDENT-ON* */ + } + + /* check if available enough 1GB pages for each socket */ + /* *INDENT-OFF* */ + clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( + { + int pages_avail, page_size, mem; + + vec_validate(mem_by_socket, c); + mem = mem_by_socket[c]; + + page_size = 1024; + pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); + + if (pages_avail < 0 || page_size * pages_avail < mem) + use_1g = 0; + + page_size = 2; + pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); + + if (pages_avail < 0 || page_size * pages_avail < mem) + use_2m = 0; + })); + /* *INDENT-ON* */ + + if (mem_by_socket == 0) + { + error = clib_error_return (0, "mem_by_socket NULL"); + goto done; + } + _vec_len (mem_by_socket) = c + 1; + + /* regenerate socket_mem string */ + vec_foreach_index (x, mem_by_socket) + socket_mem = format (socket_mem, "%s%u", + socket_mem ? "," : "", mem_by_socket[x]); + socket_mem = format (socket_mem, "%c", 0); + + vec_free (mem_by_socket); + + rv = mkdir (VPP_RUN_DIR, 0755); + if (rv && errno != EEXIST) + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + VPP_RUN_DIR, errno); + goto done; + } + + rv = mkdir (DEFAULT_HUGE_DIR, 0755); + if (rv && errno != EEXIST) + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + DEFAULT_HUGE_DIR, errno); + goto done; + } + + if (use_1g && !(less_than_1g && use_2m)) + { + rv = + mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); + } + else if (use_2m) + { + rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); + } + else + { + return clib_error_return (0, "not enough free huge pages"); + } + + if (rv) + { + error = clib_error_return (0, "mount failed %d", errno); + goto done; + } + + tmp = format (0, "--huge-dir%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); + vec_add1 (conf->eal_init_args, tmp); + if (!file_prefix) + { + tmp = format (0, "--file-prefix%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "vpp%c", 0); + vec_add1 (conf->eal_init_args, tmp); + } + } + + vec_free (rte_cmd); + vec_free (ethname); + + if (error) + return error; + + /* I'll bet that -c and -n must be the first and second args... */ + if (!conf->coremask_set_manually) + { + vlib_thread_registration_t *tr; + uword *coremask = 0; + int i; + + /* main thread core */ + coremask = clib_bitmap_set (coremask, tm->main_lcore, 1); + + for (i = 0; i < vec_len (tm->registrations); i++) + { + tr = tm->registrations[i]; + coremask = clib_bitmap_or (coremask, tr->coremask); + } + + vec_insert (conf->eal_init_args, 2, 1); + conf->eal_init_args[1] = (u8 *) "-c"; + tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0); + conf->eal_init_args[2] = tmp; + clib_bitmap_free (coremask); + } + + if (!conf->nchannels_set_manually) + { + vec_insert (conf->eal_init_args, 2, 3); + conf->eal_init_args[3] = (u8 *) "-n"; + tmp = format (0, "%d", conf->nchannels); + conf->eal_init_args[4] = tmp; + } + + if (no_pci == 0 && geteuid () == 0) + dpdk_bind_devices_to_uio (conf); + +#define _(x) \ + if (devconf->x == 0 && conf->default_devconf.x > 0) \ + devconf->x = conf->default_devconf.x ; + + /* *INDENT-OFF* */ + pool_foreach (devconf, conf->dev_confs, ({ + + /* default per-device config items */ + foreach_dpdk_device_config_item + + /* add DPDK EAL whitelist/blacklist entry */ + if (num_whitelisted > 0 && devconf->is_blacklisted == 0) + { + tmp = format (0, "-w%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); + vec_add1 (conf->eal_init_args, tmp); + } + else if (num_whitelisted == 0 && devconf->is_blacklisted != 0) + { + tmp = format (0, "-b%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); + vec_add1 (conf->eal_init_args, tmp); + } + })); + /* *INDENT-ON* */ + +#undef _ + + /* set master-lcore */ + tmp = format (0, "--master-lcore%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%u%c", tm->main_lcore, 0); + vec_add1 (conf->eal_init_args, tmp); + + /* set socket-mem */ + tmp = format (0, "--socket-mem%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%s%c", socket_mem, 0); + vec_add1 (conf->eal_init_args, tmp); + + /* NULL terminate the "argv" vector, in case of stupidity */ + vec_add1 (conf->eal_init_args, 0); + _vec_len (conf->eal_init_args) -= 1; + + /* Set up DPDK eal and packet mbuf pool early. */ + + log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE; + + rte_set_log_level (log_level); + + vm = vlib_get_main (); + + /* make copy of args as rte_eal_init tends to mess up with arg array */ + for (i = 1; i < vec_len (conf->eal_init_args); i++) + conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", + conf->eal_init_args[i]); + + ret = + rte_eal_init (vec_len (conf->eal_init_args), + (char **) conf->eal_init_args); + + /* lazy umount hugepages */ + umount2 (DEFAULT_HUGE_DIR, MNT_DETACH); + + if (ret < 0) + return clib_error_return (0, "rte_eal_init returned %d", ret); + + /* Dump the physical memory layout prior to creating the mbuf_pool */ + fprintf (stdout, "DPDK physical memory layout:\n"); + rte_dump_physmem_layout (stdout); + + /* main thread 1st */ + error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); + if (error) + return error; + + for (i = 0; i < RTE_MAX_LCORE; i++) + { + error = vlib_buffer_pool_create (vm, conf->num_mbufs, + rte_lcore_to_socket_id (i)); + if (error) + return error; + } + +done: + return error; +} + +VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk"); + +void +dpdk_update_link_state (dpdk_device_t * xd, f64 now) +{ + vnet_main_t *vnm = vnet_get_main (); + struct rte_eth_link prev_link = xd->link; + u32 hw_flags = 0; + u8 hw_flags_chg = 0; + + /* only update link state for PMD interfaces */ + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + return; + + xd->time_last_link_update = now ? now : xd->time_last_link_update; + memset (&xd->link, 0, sizeof (xd->link)); + rte_eth_link_get_nowait (xd->device_index, &xd->link); + + if (LINK_STATE_ELOGS) + { + vlib_main_t *vm = vlib_get_main (); + ELOG_TYPE_DECLARE (e) = + { + .format = + "update-link-state: sw_if_index %d, admin_up %d," + "old link_state %d new link_state %d",.format_args = "i4i1i1i1",}; + + struct + { + u32 sw_if_index; + u8 admin_up; + u8 old_link_state; + u8 new_link_state; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0; + ed->old_link_state = (u8) + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index); + ed->new_link_state = (u8) xd->link.link_status; + } + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) && + ((xd->link.link_status != 0) ^ + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index))) + { + hw_flags_chg = 1; + hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + } + + if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex)) + { + hw_flags_chg = 1; + switch (xd->link.link_duplex) + { + case ETH_LINK_HALF_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX; + break; + case ETH_LINK_FULL_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX; + break; + default: + break; + } + } + if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) + { + hw_flags_chg = 1; + switch (xd->link.link_speed) + { + case ETH_SPEED_NUM_10M: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; + break; + case ETH_SPEED_NUM_100M: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; + break; + case ETH_SPEED_NUM_1G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; + break; + case ETH_SPEED_NUM_10G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; + break; + case ETH_SPEED_NUM_40G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; + break; + case 0: + break; + default: + clib_warning ("unknown link speed %d", xd->link.link_speed); + break; + } + } + if (hw_flags_chg) + { + if (LINK_STATE_ELOGS) + { + vlib_main_t *vm = vlib_get_main (); + + ELOG_TYPE_DECLARE (e) = + { + .format = + "update-link-state: sw_if_index %d, new flags %d",.format_args + = "i4i4",}; + + struct + { + u32 sw_if_index; + u32 flags; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->flags = hw_flags; + } + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags); + } +} + +static uword +dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + clib_error_t *error; + vnet_main_t *vnm = vnet_get_main (); + dpdk_main_t *dm = &dpdk_main; + ethernet_main_t *em = ðernet_main; + dpdk_device_t *xd; + vlib_thread_main_t *tm = vlib_get_thread_main (); + int i; + + error = dpdk_lib_init (dm); + + /* + * Turn on the input node if we found some devices to drive + * and we're not running worker threads or i/o threads + */ + + if (error == 0 && vec_len (dm->devices) > 0) + { + if (tm->n_vlib_mains == 1) + vlib_node_set_state (vm, dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + else + for (i = 0; i < tm->n_vlib_mains; i++) + if (vec_len (dm->devices_by_cpu[i]) > 0) + vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + } + + if (error) + clib_error_report (error); + + tm->worker_thread_release = 1; + + f64 now = vlib_time_now (vm); + vec_foreach (xd, dm->devices) + { + dpdk_update_link_state (xd, now); + } + + { + /* + * Extra set up for bond interfaces: + * 1. Setup MACs for bond interfaces and their slave links which was set + * in dpdk_port_setup() but needs to be done again here to take effect. + * 2. Set up info for bond interface related CLI support. + */ + int nports = rte_eth_dev_count (); + if (nports > 0) + { + for (i = 0; i < nports; i++) + { + struct rte_eth_dev_info dev_info; + rte_eth_dev_info_get (i, &dev_info); + if (!dev_info.driver_name) + dev_info.driver_name = dev_info.pci_dev->driver->driver.name; + + ASSERT (dev_info.driver_name); + if (strncmp (dev_info.driver_name, "rte_bond_pmd", 12) == 0) + { + u8 addr[6]; + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get (i, slink, 16); + if (nlink > 0) + { + vnet_hw_interface_t *bhi; + ethernet_interface_t *bei; + int rv; + + /* Get MAC of 1st slave link */ + rte_eth_macaddr_get (slink[0], + (struct ether_addr *) addr); + /* Set MAC of bounded interface to that of 1st slave link */ + rv = + rte_eth_bond_mac_address_set (i, + (struct ether_addr *) + addr); + if (rv < 0) + clib_warning ("Failed to set MAC address"); + + /* Populate MAC of bonded interface in VPP hw tables */ + bhi = + vnet_get_hw_interface (vnm, + dm->devices[i].vlib_hw_if_index); + bei = + pool_elt_at_index (em->interfaces, bhi->hw_instance); + clib_memcpy (bhi->hw_address, addr, 6); + clib_memcpy (bei->address, addr, 6); + /* Init l3 packet size allowed on bonded interface */ + bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES; + bhi->max_l3_packet_bytes[VLIB_RX] = + bhi->max_l3_packet_bytes[VLIB_TX] = + ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t); + while (nlink >= 1) + { /* for all slave links */ + int slave = slink[--nlink]; + dpdk_device_t *sdev = &dm->devices[slave]; + vnet_hw_interface_t *shi; + vnet_sw_interface_t *ssi; + /* Add MAC to all slave links except the first one */ + if (nlink) + rte_eth_dev_mac_addr_add (slave, + (struct ether_addr *) + addr, 0); + /* Set slaves bitmap for bonded interface */ + bhi->bond_info = + clib_bitmap_set (bhi->bond_info, + sdev->vlib_hw_if_index, 1); + /* Set slave link flags on slave interface */ + shi = + vnet_get_hw_interface (vnm, sdev->vlib_hw_if_index); + ssi = + vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); + shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; + ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; + + /* Set l3 packet size allowed as the lowest of slave */ + if (bhi->max_l3_packet_bytes[VLIB_RX] > + shi->max_l3_packet_bytes[VLIB_RX]) + bhi->max_l3_packet_bytes[VLIB_RX] = + bhi->max_l3_packet_bytes[VLIB_TX] = + shi->max_l3_packet_bytes[VLIB_RX]; + + /* Set max packet size allowed as the lowest of slave */ + if (bhi->max_packet_bytes > shi->max_packet_bytes) + bhi->max_packet_bytes = shi->max_packet_bytes; + } + } + } + } + } + } + + while (1) + { + /* + * check each time through the loop in case intervals are changed + */ + f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ? + dm->link_state_poll_interval : dm->stat_poll_interval; + + vlib_process_wait_for_event_or_clock (vm, min_wait); + + if (dm->admin_up_down_in_progress) + /* skip the poll if an admin up down is in progress (on any interface) */ + continue; + + vec_foreach (xd, dm->devices) + { + f64 now = vlib_time_now (vm); + if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval) + dpdk_update_counters (xd, now); + if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) + dpdk_update_link_state (xd, now); + + } + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_process_node,static) = { + .function = dpdk_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dpdk-process", + .process_log2_n_stack_bytes = 17, +}; +/* *INDENT-ON* */ + +int +dpdk_set_stat_poll_interval (f64 interval) +{ + if (interval < DPDK_MIN_STATS_POLL_INTERVAL) + return (VNET_API_ERROR_INVALID_VALUE); + + dpdk_main.stat_poll_interval = interval; + + return 0; +} + +int +dpdk_set_link_state_poll_interval (f64 interval) +{ + if (interval < DPDK_MIN_LINK_POLL_INTERVAL) + return (VNET_API_ERROR_INVALID_VALUE); + + dpdk_main.link_state_poll_interval = interval; + + return 0; +} + +clib_error_t * +dpdk_init (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_node_t *ei; + clib_error_t *error = 0; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + /* verify that structs are cacheline aligned */ + STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0, + "Cache line marker must be 1st element in dpdk_device_t"); + STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) == + CLIB_CACHE_LINE_BYTES, + "Data in cache line 0 is bigger than cache line size"); + STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, + "Cache line marker must be 1st element in frame_queue_trace_t"); + + u8 *name; + name = format (0, "dpdk_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + dm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + vec_free (name); + + dm->vlib_main = vm; + dm->vnet_main = vnet_get_main (); + dm->conf = &dpdk_config_main; + + error = dpdk_plugin_api_hookup (vm); + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (dm, &api_main); + +// TODO +// plugin_custom_dump_configure (dm); + + ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); + if (ei == 0) + return clib_error_return (0, "ethernet-input node AWOL"); + + dm->ethernet_input_node_index = ei->index; + + dm->conf->nchannels = 4; + dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; + vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); + + dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword)); + dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword)); + dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword)); + + /* $$$ use n_thread_stacks since it's known-good at this point */ + vec_validate (dm->recycle, tm->n_thread_stacks - 1); + + /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ + dm->buffer_flags_template = + (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID + | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); + + dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; + dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL; + + /* init CLI */ + if ((error = vlib_call_init_function (vm, dpdk_cli_init))) + return error; + + return error; +} + +VLIB_INIT_FUNCTION (dpdk_init); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/cli.c b/src/plugins/dpdk/ipsec/cli.c new file mode 100644 index 00000000..40cee39b --- /dev/null +++ b/src/plugins/dpdk/ipsec/cli.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +static void +dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display) +{ + dpdk_config_main_t *conf = &dpdk_config_main; + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 i, skip_master; + + if (!conf->cryptodev) + { + vlib_cli_output (vm, "DPDK Cryptodev support is disabled\n"); + return; + } + + if (detail_display) + vlib_cli_output (vm, "worker\t%10s\t%15s\tdir\tdev\tqp\n", + "cipher", "auth"); + else + vlib_cli_output (vm, "worker\tcrypto device id(type)\n"); + + skip_master = vlib_num_workers () > 0; + + for (i = 0; i < tm->n_vlib_mains; i++) + { + uword key, data; + u32 cpu_index = vlib_mains[i]->cpu_index; + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + u8 *s = 0; + + if (skip_master) + { + skip_master = 0; + continue; + } + + if (!detail_display) + { + i32 last_cdev = -1; + crypto_qp_data_t *qpd; + + s = format (s, "%u\t", cpu_index); + + /* *INDENT-OFF* */ + vec_foreach (qpd, cwm->qp_data) + { + u32 dev_id = qpd->dev_id; + + if ((u16) last_cdev != dev_id) + { + struct rte_cryptodev_info cdev_info; + + rte_cryptodev_info_get (dev_id, &cdev_info); + + s = format(s, "%u(%s)\t", dev_id, cdev_info.feature_flags & + RTE_CRYPTODEV_FF_HW_ACCELERATED ? "HW" : "SW"); + } + last_cdev = dev_id; + } + /* *INDENT-ON* */ + vlib_cli_output (vm, "%s", s); + } + else + { + char cipher_str[15], auth_str[15]; + struct rte_cryptodev_capabilities cap; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; + /* *INDENT-OFF* */ + hash_foreach (key, data, cwm->algo_qp_map, + ({ + cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER; + cap.sym.cipher.algo = p_key->cipher_algo; + check_algo_is_supported (&cap, cipher_str); + cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; + cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AUTH; + cap.sym.auth.algo = p_key->auth_algo; + check_algo_is_supported (&cap, auth_str); + vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n", + vlib_mains[i]->cpu_index, cipher_str, auth_str, + p_key->is_outbound ? "out" : "in", + cwm->qp_data[data].dev_id, + cwm->qp_data[data].qp_id); + })); + /* *INDENT-ON* */ + } + } +} + +static clib_error_t * +lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u16 detail = 0; + clib_error_t *error = NULL; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "verbose")) + detail = 1; + else + { + error = clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + dpdk_ipsec_show_mapping (vm, detail); + +done: + unformat_free (line_input); + + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (lcore_cryptodev_map, static) = { + .path = "show crypto device mapping", + .short_help = + "show cryptodev device mapping ", + .function = lcore_cryptodev_map_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/crypto_node.c b/src/plugins/dpdk/ipsec/crypto_node.c new file mode 100644 index 00000000..dc3452b2 --- /dev/null +++ b/src/plugins/dpdk/ipsec/crypto_node.c @@ -0,0 +1,215 @@ +/* + *------------------------------------------------------------------ + * crypto_node.c - DPDK Cryptodev input node + * + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include + +#include +#include +#include + +#define foreach_dpdk_crypto_input_next \ + _(DROP, "error-drop") \ + _(ENCRYPT_POST, "dpdk-esp-encrypt-post") \ + _(DECRYPT_POST, "dpdk-esp-decrypt-post") + +typedef enum +{ +#define _(f,s) DPDK_CRYPTO_INPUT_NEXT_##f, + foreach_dpdk_crypto_input_next +#undef _ + DPDK_CRYPTO_INPUT_N_NEXT, +} dpdk_crypto_input_next_t; + +#define foreach_dpdk_crypto_input_error \ + _(DQ_COPS, "Crypto ops dequeued") \ + _(COP_FAILED, "Crypto op failed") + +typedef enum +{ +#define _(f,s) DPDK_CRYPTO_INPUT_ERROR_##f, + foreach_dpdk_crypto_input_error +#undef _ + DPDK_CRYPTO_INPUT_N_ERROR, +} dpdk_crypto_input_error_t; + +static char *dpdk_crypto_input_error_strings[] = { +#define _(n, s) s, + foreach_dpdk_crypto_input_error +#undef _ +}; + +vlib_node_registration_t dpdk_crypto_input_node; + +typedef struct +{ + u32 cdev; + u32 qp; + u32 status; + u32 sa_idx; + u32 next_index; +} dpdk_crypto_input_trace_t; + +static u8 * +format_dpdk_crypto_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + dpdk_crypto_input_trace_t *t = va_arg (*args, dpdk_crypto_input_trace_t *); + + s = format (s, "dpdk_crypto: cryptodev-id %u queue-pair %u next-index %d", + t->cdev, t->qp, t->next_index); + + s = format (s, " status %u sa-idx %u\n", t->status, t->sa_idx); + + return s; +} + +static_always_inline u32 +dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, + crypto_qp_data_t * qpd) +{ + u32 n_deq, *to_next = 0, next_index, n_cops, def_next_index; + struct rte_crypto_op **cops = qpd->cops; + + if (qpd->inflights == 0) + return 0; + + if (qpd->is_outbound) + def_next_index = DPDK_CRYPTO_INPUT_NEXT_ENCRYPT_POST; + else + def_next_index = DPDK_CRYPTO_INPUT_NEXT_DECRYPT_POST; + + n_cops = rte_cryptodev_dequeue_burst (qpd->dev_id, qpd->qp_id, + cops, VLIB_FRAME_SIZE); + n_deq = n_cops; + next_index = def_next_index; + + qpd->inflights -= n_cops; + ASSERT (qpd->inflights >= 0); + + while (n_cops > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_cops > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t *b0 = 0; + struct rte_crypto_op *cop; + struct rte_crypto_sym_op *sym_cop; + + cop = cops[0]; + cops += 1; + n_cops -= 1; + n_left_to_next -= 1; + + next0 = def_next_index; + + if (PREDICT_FALSE (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) + { + next0 = DPDK_CRYPTO_INPUT_NEXT_DROP; + vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, + DPDK_CRYPTO_INPUT_ERROR_COP_FAILED, + 1); + } + cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; + + sym_cop = (struct rte_crypto_sym_op *) (cop + 1); + b0 = vlib_buffer_from_rte_mbuf (sym_cop->m_src); + bi0 = vlib_get_buffer_index (vm, b0); + + to_next[0] = bi0; + to_next += 1; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_next_frame (vm, node, next0); + dpdk_crypto_input_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->cdev = qpd->dev_id; + tr->qp = qpd->qp_id; + tr->status = cop->status; + tr->next_index = next0; + tr->sa_idx = vnet_buffer (b0)->ipsec.sad_index; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + crypto_free_cop (qpd, qpd->cops, n_deq); + + vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, + DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, n_deq); + return n_deq; +} + +static uword +dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 cpu_index = os_get_cpu_number (); + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + crypto_qp_data_t *qpd; + u32 n_deq = 0; + + /* *INDENT-OFF* */ + vec_foreach (qpd, cwm->qp_data) + n_deq += dpdk_crypto_dequeue(vm, node, qpd); + /* *INDENT-ON* */ + + return n_deq; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_crypto_input_node) = +{ + .function = dpdk_crypto_input_fn, + .name = "dpdk-crypto-input", + .format_trace = format_dpdk_crypto_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, + .n_errors = DPDK_CRYPTO_INPUT_N_ERROR, + .error_strings = dpdk_crypto_input_error_strings, + .n_next_nodes = DPDK_CRYPTO_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [DPDK_CRYPTO_INPUT_NEXT_##s] = n, + foreach_dpdk_crypto_input_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_crypto_input_node, dpdk_crypto_input_fn) +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/dir.dox b/src/plugins/dpdk/ipsec/dir.dox new file mode 100644 index 00000000..ffebfc4d --- /dev/null +++ b/src/plugins/dpdk/ipsec/dir.dox @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + @dir vnet/vnet/devices/dpdk/ipsec + @brief IPSec ESP encrypt/decrypt using DPDK Cryptodev API +*/ diff --git a/src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md b/src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md new file mode 100644 index 00000000..fed2fe0e --- /dev/null +++ b/src/plugins/dpdk/ipsec/dpdk_crypto_ipsec_doc.md @@ -0,0 +1,86 @@ +# VPP IPSec implementation using DPDK Cryptodev API {#dpdk_crypto_ipsec_doc} + +This document is meant to contain all related information about implementation and usability. + + +## VPP IPsec with DPDK Cryptodev + +DPDK Cryptodev is an asynchronous crypto API that supports both Hardware and Software implementations (for more details refer to [DPDK Cryptography Device Library documentation](http://dpdk.org/doc/guides/prog_guide/cryptodev_lib.html)). + +When DPDK support is enabled and there are enough Cryptodev resources for all workers, the node graph is reconfigured by adding and changing default next nodes. + +The following nodes are added: +* dpdk-crypto-input : polling input node, basically dequeuing from crypto devices. +* dpdk-esp-encrypt : internal node. +* dpdk-esp-decrypt : internal node. +* dpdk-esp-encrypt-post : internal node. +* dpdk-esp-decrypt-post : internal node. + +Set new default next nodes: +* for esp encryption: esp-encrypt -> dpdk-esp-encrypt +* for esp decryption: esp-decrypt -> dpdk-esp-decrypt + + +### How to enable VPP IPSec with DPDK Cryptodev support + +DPDK Cryptodev is supported in DPDK enabled VPP. +By default, only HW Cryptodev is supported but needs to be explicetly enabled with the following config option: + +``` +dpdk { + enable-cryptodev +} +``` + +To enable SW Cryptodev support (AESNI-MB-PMD and GCM-PMD), we need the following env option: + + vpp_uses_dpdk_cryptodev_sw=yes + +A couple of ways to achive this: +* uncomment/add it in the platforms config (ie. build-data/platforms/vpp.mk) +* set the option when building vpp (ie. make vpp_uses_dpdk_cryptodev_sw=yes build-release) + +When enabling SW Cryptodev support, it means that you need to pre-build the required crypto libraries needed by those SW Cryptodev PMDs. + + +### Crypto Resources allocation + +VPP allocates crypto resources based on a best effort approach: +* first allocate Hardware crypto resources, then Software. +* if there are not enough crypto resources for all workers, the graph node is not modifed, therefore the default VPP IPsec implementation based in OpenSSL is used. The following message is displayed: + + 0: dpdk_ipsec_init: not enough cryptodevs for ipsec + + +### Configuration example + +To enable DPDK Cryptodev the user just need to provide the startup.conf option +as mentioned previously. + +Example startup.conf: + +``` +dpdk { + socket-mem 1024,1024 + num-mbufs 131072 + dev 0000:81:00.0 + dev 0000:81:00.1 + enable-cryptodev + dev 0000:85:01.0 + dev 0000:85:01.1 + vdev cryptodev_aesni_mb_pmd,socket_id=1 + vdev cryptodev_aesni_mb_pmd,socket_id=1 +} +``` + +In the above configuration: +* 0000:85:01.0 and 0000:85:01.1 are crypto BDFs and they require the same driver binding as DPDK Ethernet devices but they do not support any extra configuration options. +* Two AESNI-MB Software Cryptodev PMDs are created in NUMA node 1. + +For further details refer to [DPDK Crypto Device Driver documentation](http://dpdk.org/doc/guides/cryptodevs/index.html) + +### Operational data + +The following CLI command displays the Cryptodev/Worker mapping: + + show crypto device mapping [verbose] diff --git a/src/plugins/dpdk/ipsec/esp.h b/src/plugins/dpdk/ipsec/esp.h new file mode 100644 index 00000000..320295b1 --- /dev/null +++ b/src/plugins/dpdk/ipsec/esp.h @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __DPDK_ESP_H__ +#define __DPDK_ESP_H__ + +#include +#include +#include + +typedef struct +{ + enum rte_crypto_cipher_algorithm algo; + u8 key_len; + u8 iv_len; +} dpdk_esp_crypto_alg_t; + +typedef struct +{ + enum rte_crypto_auth_algorithm algo; + u8 trunc_size; +} dpdk_esp_integ_alg_t; + +typedef struct +{ + dpdk_esp_crypto_alg_t *esp_crypto_algs; + dpdk_esp_integ_alg_t *esp_integ_algs; +} dpdk_esp_main_t; + +dpdk_esp_main_t dpdk_esp_main; + +static_always_inline void +dpdk_esp_init () +{ + dpdk_esp_main_t *em = &dpdk_esp_main; + dpdk_esp_integ_alg_t *i; + dpdk_esp_crypto_alg_t *c; + + vec_validate (em->esp_crypto_algs, IPSEC_CRYPTO_N_ALG - 1); + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_128]; + c->algo = RTE_CRYPTO_CIPHER_AES_CBC; + c->key_len = 16; + c->iv_len = 16; + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_192]; + c->algo = RTE_CRYPTO_CIPHER_AES_CBC; + c->key_len = 24; + c->iv_len = 16; + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_256]; + c->algo = RTE_CRYPTO_CIPHER_AES_CBC; + c->key_len = 32; + c->iv_len = 16; + + c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_GCM_128]; + c->algo = RTE_CRYPTO_CIPHER_AES_GCM; + c->key_len = 16; + c->iv_len = 8; + + vec_validate (em->esp_integ_algs, IPSEC_INTEG_N_ALG - 1); + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA1_96]; + i->algo = RTE_CRYPTO_AUTH_SHA1_HMAC; + i->trunc_size = 12; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_96]; + i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + i->trunc_size = 12; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_128]; + i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + i->trunc_size = 16; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_384_192]; + i->algo = RTE_CRYPTO_AUTH_SHA384_HMAC; + i->trunc_size = 24; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256]; + i->algo = RTE_CRYPTO_AUTH_SHA512_HMAC; + i->trunc_size = 32; + + i = &em->esp_integ_algs[IPSEC_INTEG_ALG_AES_GCM_128]; + i->algo = RTE_CRYPTO_AUTH_AES_GCM; + i->trunc_size = 16; +} + +static_always_inline int +translate_crypto_algo (ipsec_crypto_alg_t crypto_algo, + struct rte_crypto_sym_xform *cipher_xform) +{ + switch (crypto_algo) + { + case IPSEC_CRYPTO_ALG_NONE: + cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL; + break; + case IPSEC_CRYPTO_ALG_AES_CBC_128: + case IPSEC_CRYPTO_ALG_AES_CBC_192: + case IPSEC_CRYPTO_ALG_AES_CBC_256: + cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; + break; + case IPSEC_CRYPTO_ALG_AES_GCM_128: + cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_GCM; + break; + default: + return -1; + } + + cipher_xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + + return 0; +} + +static_always_inline int +translate_integ_algo (ipsec_integ_alg_t integ_alg, + struct rte_crypto_sym_xform *auth_xform, int use_esn) +{ + switch (integ_alg) + { + case IPSEC_INTEG_ALG_NONE: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_NULL; + auth_xform->auth.digest_length = 0; + break; + case IPSEC_INTEG_ALG_SHA1_96: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC; + auth_xform->auth.digest_length = 12; + break; + case IPSEC_INTEG_ALG_SHA_256_96: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + auth_xform->auth.digest_length = 12; + break; + case IPSEC_INTEG_ALG_SHA_256_128: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; + auth_xform->auth.digest_length = 16; + break; + case IPSEC_INTEG_ALG_SHA_384_192: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA384_HMAC; + auth_xform->auth.digest_length = 24; + break; + case IPSEC_INTEG_ALG_SHA_512_256: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA512_HMAC; + auth_xform->auth.digest_length = 32; + break; + case IPSEC_INTEG_ALG_AES_GCM_128: + auth_xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM; + auth_xform->auth.digest_length = 16; + auth_xform->auth.add_auth_data_length = use_esn ? 12 : 8; + break; + default: + return -1; + } + + auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; + + return 0; +} + +static_always_inline int +create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, + u8 is_outbound) +{ + u32 cpu_index = os_get_cpu_number (); + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + struct rte_crypto_sym_xform cipher_xform = { 0 }; + struct rte_crypto_sym_xform auth_xform = { 0 }; + struct rte_crypto_sym_xform *xfs; + uword key = 0, *data; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; + + if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + sa->crypto_key_len -= 4; + clib_memcpy (&sa->salt, &sa->crypto_key[sa->crypto_key_len], 4); + } + else + { + u32 seed = (u32) clib_cpu_time_now (); + sa->salt = random_u32 (&seed); + } + + cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; + cipher_xform.cipher.key.data = sa->crypto_key; + cipher_xform.cipher.key.length = sa->crypto_key_len; + + auth_xform.type = RTE_CRYPTO_SYM_XFORM_AUTH; + auth_xform.auth.key.data = sa->integ_key; + auth_xform.auth.key.length = sa->integ_key_len; + + if (translate_crypto_algo (sa->crypto_alg, &cipher_xform) < 0) + return -1; + p_key->cipher_algo = cipher_xform.cipher.algo; + + if (translate_integ_algo (sa->integ_alg, &auth_xform, sa->use_esn) < 0) + return -1; + p_key->auth_algo = auth_xform.auth.algo; + + if (is_outbound) + { + cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; + auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; + cipher_xform.next = &auth_xform; + xfs = &cipher_xform; + } + else + { + cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; + auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_VERIFY; + auth_xform.next = &cipher_xform; + xfs = &auth_xform; + } + + p_key->is_outbound = is_outbound; + + data = hash_get (cwm->algo_qp_map, key); + if (!data) + return -1; + + sa_sess->sess = + rte_cryptodev_sym_session_create (cwm->qp_data[*data].dev_id, xfs); + + if (!sa_sess->sess) + return -1; + + sa_sess->qp_index = (u8) * data; + + return 0; +} + +#endif /* __DPDK_ESP_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/esp_decrypt.c b/src/plugins/dpdk/ipsec/esp_decrypt.c new file mode 100644 index 00000000..286e03f8 --- /dev/null +++ b/src/plugins/dpdk/ipsec/esp_decrypt.c @@ -0,0 +1,594 @@ +/* + * esp_decrypt.c : IPSec ESP Decrypt node using DPDK Cryptodev + * + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define foreach_esp_decrypt_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(IP6_INPUT, "ip6-input") + +#define _(v, s) ESP_DECRYPT_NEXT_##v, +typedef enum { + foreach_esp_decrypt_next +#undef _ + ESP_DECRYPT_N_NEXT, +} esp_decrypt_next_t; + +#define foreach_esp_decrypt_error \ + _(RX_PKTS, "ESP pkts received") \ + _(DECRYPTION_FAILED, "ESP decryption failed") \ + _(REPLAY, "SA replayed packet") \ + _(NOT_IP, "Not IP packet (dropped)") \ + _(ENQ_FAIL, "Enqueue failed (buffer full)") \ + _(NO_CRYPTODEV, "Cryptodev not configured") \ + _(BAD_LEN, "Invalid ciphertext length") \ + _(UNSUPPORTED, "Cipher/Auth not supported") + + +typedef enum { +#define _(sym,str) ESP_DECRYPT_ERROR_##sym, + foreach_esp_decrypt_error +#undef _ + ESP_DECRYPT_N_ERROR, +} esp_decrypt_error_t; + +static char * esp_decrypt_error_strings[] = { +#define _(sym,string) string, + foreach_esp_decrypt_error +#undef _ +}; + +vlib_node_registration_t dpdk_esp_decrypt_node; + +typedef struct { + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; +} esp_decrypt_trace_t; + +/* packet trace format function */ +static u8 * format_esp_decrypt_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *); + + s = format (s, "esp: crypto %U integrity %U", + format_ipsec_crypto_alg, t->crypto_alg, + format_ipsec_integ_alg, t->integ_alg); + return s; +} + +static uword +dpdk_esp_decrypt_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next, next_index; + ipsec_main_t *im = &ipsec_main; + u32 cpu_index = os_get_cpu_number(); + dpdk_crypto_main_t * dcm = &dpdk_crypto_main; + dpdk_esp_main_t * em = &dpdk_esp_main; + u32 i; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (PREDICT_FALSE(!dcm->workers_main)) + { + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_NO_CRYPTODEV, n_left_from); + vlib_buffer_free(vm, from, n_left_from); + return n_left_from; + } + + crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, cpu_index); + u32 n_qps = vec_len(cwm->qp_data); + struct rte_crypto_op ** cops_to_enq[n_qps]; + u32 n_cop_qp[n_qps], * bi_to_enq[n_qps]; + + for (i = 0; i < n_qps; i++) + { + bi_to_enq[i] = cwm->qp_data[i].bi; + cops_to_enq[i] = cwm->qp_data[i].cops; + } + + memset(n_cop_qp, 0, n_qps * sizeof(u32)); + + crypto_alloc_cops(); + + next_index = ESP_DECRYPT_NEXT_DROP; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, sa_index0 = ~0, seq, icv_size, iv_size; + vlib_buffer_t * b0; + esp_header_t * esp0; + ipsec_sa_t * sa0; + struct rte_mbuf * mb0 = 0; + const int BLOCK_SIZE = 16; + crypto_sa_session_t * sa_sess; + void * sess; + u16 qp_index; + struct rte_crypto_op * cop = 0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + esp0 = vlib_buffer_get_current (b0); + + sa_index0 = vnet_buffer(b0)->ipsec.sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + seq = clib_host_to_net_u32(esp0->seq); + + /* anti-replay check */ + if (sa0->use_anti_replay) + { + int rv = 0; + + if (PREDICT_TRUE(sa0->use_esn)) + rv = esp_replay_check_esn(sa0, seq); + else + rv = esp_replay_check(sa0, seq); + + if (PREDICT_FALSE(rv)) + { + clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_REPLAY, 1); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + } + + sa0->total_data_size += b0->current_length; + + if (PREDICT_FALSE(sa0->integ_alg == IPSEC_INTEG_ALG_NONE) || + PREDICT_FALSE(sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE)) + { + clib_warning ("SPI %u : only cipher + auth supported", sa0->spi); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_UNSUPPORTED, 1); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + + sa_sess = pool_elt_at_index(cwm->sa_sess_d[0], sa_index0); + + if (PREDICT_FALSE(!sa_sess->sess)) + { + int ret = create_sym_sess(sa0, sa_sess, 0); + + if (PREDICT_FALSE (ret)) + { + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + } + + sess = sa_sess->sess; + qp_index = sa_sess->qp_index; + + ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); + cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); + ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); + + cops_to_enq[qp_index][0] = cop; + cops_to_enq[qp_index] += 1; + n_cop_qp[qp_index] += 1; + bi_to_enq[qp_index][0] = bi0; + bi_to_enq[qp_index] += 1; + + rte_crypto_op_attach_sym_session(cop, sess); + + icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; + iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + + /* Convert vlib buffer to mbuf */ + mb0 = rte_mbuf_from_vlib_buffer(b0); + mb0->data_len = b0->current_length; + mb0->pkt_len = b0->current_length; + mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; + + /* Outer IP header has already been stripped */ + u16 payload_len = rte_pktmbuf_pkt_len(mb0) - sizeof (esp_header_t) - + iv_size - icv_size; + + if ((payload_len & (BLOCK_SIZE - 1)) || (payload_len <= 0)) + { + clib_warning ("payload %u not multiple of %d\n", + payload_len, BLOCK_SIZE); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_BAD_LEN, 1); + vec_add (vec_elt (cwm->qp_data, qp_index).free_cops, &cop, 1); + bi_to_enq[qp_index] -= 1; + cops_to_enq[qp_index] -= 1; + n_cop_qp[qp_index] -= 1; + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + + struct rte_crypto_sym_op *sym_cop = (struct rte_crypto_sym_op *)(cop + 1); + + sym_cop->m_src = mb0; + sym_cop->cipher.data.offset = sizeof (esp_header_t) + iv_size; + sym_cop->cipher.data.length = payload_len; + + u8 *iv = rte_pktmbuf_mtod_offset(mb0, void*, sizeof (esp_header_t)); + dpdk_cop_priv_t * priv = (dpdk_cop_priv_t *)(sym_cop + 1); + + if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + dpdk_gcm_cnt_blk *icb = &priv->cb; + icb->salt = sa0->salt; + clib_memcpy(icb->iv, iv, 8); + icb->cnt = clib_host_to_net_u32(1); + sym_cop->cipher.iv.data = (u8 *)icb; + sym_cop->cipher.iv.phys_addr = cop->phys_addr + + (uintptr_t)icb - (uintptr_t)cop; + sym_cop->cipher.iv.length = 16; + + u8 *aad = priv->aad; + clib_memcpy(aad, iv - sizeof(esp_header_t), 8); + sym_cop->auth.aad.data = aad; + sym_cop->auth.aad.phys_addr = cop->phys_addr + + (uintptr_t)aad - (uintptr_t)cop; + if (sa0->use_esn) + { + *((u32*)&aad[8]) = sa0->seq_hi; + sym_cop->auth.aad.length = 12; + } + else + { + sym_cop->auth.aad.length = 8; + } + + sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.length = icv_size; + + } + else + { + sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(mb0, void*, + sizeof (esp_header_t)); + sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(mb0, + sizeof (esp_header_t)); + sym_cop->cipher.iv.length = iv_size; + + if (sa0->use_esn) + { + dpdk_cop_priv_t* priv = (dpdk_cop_priv_t*) (sym_cop + 1); + u8* payload_end = rte_pktmbuf_mtod_offset( + mb0, u8*, sizeof(esp_header_t) + iv_size + payload_len); + + clib_memcpy (priv->icv, payload_end, icv_size); + *((u32*) payload_end) = sa0->seq_hi; + sym_cop->auth.data.offset = 0; + sym_cop->auth.data.length = sizeof(esp_header_t) + iv_size + + payload_len + sizeof(sa0->seq_hi); + sym_cop->auth.digest.data = priv->icv; + sym_cop->auth.digest.phys_addr = cop->phys_addr + + (uintptr_t) priv->icv - (uintptr_t) cop; + sym_cop->auth.digest.length = icv_size; + } + else + { + sym_cop->auth.data.offset = 0; + sym_cop->auth.data.length = sizeof(esp_header_t) + + iv_size + payload_len; + + sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, + rte_pktmbuf_pkt_len(mb0) - icv_size); + sym_cop->auth.digest.length = icv_size; + } + } + +trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_RX_PKTS, + from_frame->n_vectors); + crypto_qp_data_t *qpd; + /* *INDENT-OFF* */ + vec_foreach_index (i, cwm->qp_data) + { + u32 enq; + + qpd = vec_elt_at_index(cwm->qp_data, i); + enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, + qpd->cops, n_cop_qp[i]); + qpd->inflights += enq; + + if (PREDICT_FALSE(enq < n_cop_qp[i])) + { + crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); + vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); + + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_ENQ_FAIL, + n_cop_qp[i] - enq); + } + } + /* *INDENT-ON* */ + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_decrypt_node) = { + .function = dpdk_esp_decrypt_node_fn, + .name = "dpdk-esp-decrypt", + .vector_size = sizeof (u32), + .format_trace = format_esp_decrypt_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(esp_decrypt_error_strings), + .error_strings = esp_decrypt_error_strings, + + .n_next_nodes = ESP_DECRYPT_N_NEXT, + .next_nodes = { +#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, + foreach_esp_decrypt_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_node, dpdk_esp_decrypt_node_fn) + +/* + * Decrypt Post Node + */ + +#define foreach_esp_decrypt_post_error \ + _(PKTS, "ESP post pkts") + +typedef enum { +#define _(sym,str) ESP_DECRYPT_POST_ERROR_##sym, + foreach_esp_decrypt_post_error +#undef _ + ESP_DECRYPT_POST_N_ERROR, +} esp_decrypt_post_error_t; + +static char * esp_decrypt_post_error_strings[] = { +#define _(sym,string) string, + foreach_esp_decrypt_post_error +#undef _ +}; + +vlib_node_registration_t dpdk_esp_decrypt_post_node; + +static u8 * format_esp_decrypt_post_trace (u8 * s, va_list * args) +{ + return s; +} + +static uword +dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next = 0, next_index; + ipsec_sa_t * sa0; + u32 sa_index0 = ~0; + ipsec_main_t *im = &ipsec_main; + dpdk_esp_main_t *em = &dpdk_esp_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + esp_footer_t * f0; + u32 bi0, next0, icv_size, iv_size; + vlib_buffer_t * b0 = 0; + ip4_header_t *ih4 = 0, *oh4 = 0; + ip6_header_t *ih6 = 0, *oh6 = 0; + u8 tunnel_mode = 1; + u8 transport_ip6 = 0; + + next0 = ESP_DECRYPT_NEXT_DROP; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sa_index0 = vnet_buffer(b0)->ipsec.sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + to_next[0] = bi0; + to_next += 1; + + icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; + iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + + if (sa0->use_anti_replay) + { + esp_header_t * esp0 = vlib_buffer_get_current (b0); + u32 seq; + seq = clib_host_to_net_u32(esp0->seq); + if (PREDICT_TRUE(sa0->use_esn)) + esp_replay_advance_esn(sa0, seq); + else + esp_replay_advance(sa0, seq); + } + + ih4 = (ip4_header_t *) (b0->data + sizeof(ethernet_header_t)); + vlib_buffer_advance (b0, sizeof (esp_header_t) + iv_size); + + b0->current_length -= (icv_size + 2); + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (b0) + + b0->current_length); + b0->current_length -= f0->pad_length; + + /* transport mode */ + if (PREDICT_FALSE(!sa0->is_tunnel && !sa0->is_tunnel_ip6)) + { + tunnel_mode = 0; + + if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) != 0x40)) + { + if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) == 0x60)) + transport_ip6 = 1; + else + { + clib_warning("next header: 0x%x", f0->next_header); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_NOT_IP, 1); + goto trace; + } + } + } + + if (PREDICT_TRUE (tunnel_mode)) + { + if (PREDICT_TRUE(f0->next_header == IP_PROTOCOL_IP_IN_IP)) + next0 = ESP_DECRYPT_NEXT_IP4_INPUT; + else if (f0->next_header == IP_PROTOCOL_IPV6) + next0 = ESP_DECRYPT_NEXT_IP6_INPUT; + else + { + clib_warning("next header: 0x%x", f0->next_header); + vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, + ESP_DECRYPT_ERROR_DECRYPTION_FAILED, + 1); + goto trace; + } + } + /* transport mode */ + else + { + if (PREDICT_FALSE(transport_ip6)) + { + ih6 = (ip6_header_t *) (b0->data + sizeof(ethernet_header_t)); + vlib_buffer_advance (b0, -sizeof(ip6_header_t)); + oh6 = vlib_buffer_get_current (b0); + memmove(oh6, ih6, sizeof(ip6_header_t)); + + next0 = ESP_DECRYPT_NEXT_IP6_INPUT; + oh6->protocol = f0->next_header; + oh6->payload_length = + clib_host_to_net_u16 ( + vlib_buffer_length_in_chain(vm, b0) - + sizeof (ip6_header_t)); + } + else + { + vlib_buffer_advance (b0, -sizeof(ip4_header_t)); + oh4 = vlib_buffer_get_current (b0); + memmove(oh4, ih4, sizeof(ip4_header_t)); + + next0 = ESP_DECRYPT_NEXT_IP4_INPUT; + oh4->ip_version_and_header_length = 0x45; + oh4->fragment_id = 0; + oh4->flags_and_fragment_offset = 0; + oh4->protocol = f0->next_header; + oh4->length = clib_host_to_net_u16 ( + vlib_buffer_length_in_chain (vm, b0)); + oh4->checksum = ip4_header_checksum (oh4); + } + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0; + +trace: + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, dpdk_esp_decrypt_post_node.index, + ESP_DECRYPT_POST_ERROR_PKTS, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_decrypt_post_node) = { + .function = dpdk_esp_decrypt_post_node_fn, + .name = "dpdk-esp-decrypt-post", + .vector_size = sizeof (u32), + .format_trace = format_esp_decrypt_post_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(esp_decrypt_post_error_strings), + .error_strings = esp_decrypt_post_error_strings, + + .n_next_nodes = ESP_DECRYPT_N_NEXT, + .next_nodes = { +#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, + foreach_esp_decrypt_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_post_node, dpdk_esp_decrypt_post_node_fn) diff --git a/src/plugins/dpdk/ipsec/esp_encrypt.c b/src/plugins/dpdk/ipsec/esp_encrypt.c new file mode 100644 index 00000000..5b03de73 --- /dev/null +++ b/src/plugins/dpdk/ipsec/esp_encrypt.c @@ -0,0 +1,609 @@ +/* + * esp_encrypt.c : IPSec ESP encrypt node using DPDK Cryptodev + * + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define foreach_esp_encrypt_next \ +_(DROP, "error-drop") \ +_(IP4_LOOKUP, "ip4-lookup") \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(INTERFACE_OUTPUT, "interface-output") + +#define _(v, s) ESP_ENCRYPT_NEXT_##v, +typedef enum +{ + foreach_esp_encrypt_next +#undef _ + ESP_ENCRYPT_N_NEXT, +} esp_encrypt_next_t; + +#define foreach_esp_encrypt_error \ + _(RX_PKTS, "ESP pkts received") \ + _(SEQ_CYCLED, "sequence number cycled") \ + _(ENQ_FAIL, "Enqueue failed (buffer full)") \ + _(NO_CRYPTODEV, "Cryptodev not configured") \ + _(UNSUPPORTED, "Cipher/Auth not supported") + + +typedef enum +{ +#define _(sym,str) ESP_ENCRYPT_ERROR_##sym, + foreach_esp_encrypt_error +#undef _ + ESP_ENCRYPT_N_ERROR, +} esp_encrypt_error_t; + +static char *esp_encrypt_error_strings[] = { +#define _(sym,string) string, + foreach_esp_encrypt_error +#undef _ +}; + +vlib_node_registration_t dpdk_esp_encrypt_node; + +typedef struct +{ + u32 spi; + u32 seq; + ipsec_crypto_alg_t crypto_alg; + ipsec_integ_alg_t integ_alg; +} esp_encrypt_trace_t; + +/* packet trace format function */ +static u8 * +format_esp_encrypt_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *); + + s = format (s, "esp: spi %u seq %u crypto %U integrity %U", + t->spi, t->seq, + format_ipsec_crypto_alg, t->crypto_alg, + format_ipsec_integ_alg, t->integ_alg); + return s; +} + +static uword +dpdk_esp_encrypt_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next, next_index; + ipsec_main_t *im = &ipsec_main; + u32 cpu_index = os_get_cpu_number (); + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + dpdk_esp_main_t *em = &dpdk_esp_main; + u32 i; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (PREDICT_FALSE (!dcm->workers_main)) + { + /* Likely there are not enough cryptodevs, so drop frame */ + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_NO_CRYPTODEV, + n_left_from); + vlib_buffer_free (vm, from, n_left_from); + return n_left_from; + } + + crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, cpu_index); + u32 n_qps = vec_len (cwm->qp_data); + struct rte_crypto_op **cops_to_enq[n_qps]; + u32 n_cop_qp[n_qps], *bi_to_enq[n_qps]; + + for (i = 0; i < n_qps; i++) + { + bi_to_enq[i] = cwm->qp_data[i].bi; + cops_to_enq[i] = cwm->qp_data[i].cops; + } + + memset (n_cop_qp, 0, n_qps * sizeof (u32)); + + crypto_alloc_cops (); + + next_index = ESP_ENCRYPT_NEXT_DROP; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t *b0 = 0; + u32 sa_index0; + ipsec_sa_t *sa0; + ip4_and_esp_header_t *ih0, *oh0 = 0; + ip6_and_esp_header_t *ih6_0, *oh6_0 = 0; + struct rte_mbuf *mb0 = 0; + esp_footer_t *f0; + u8 is_ipv6; + u8 ip_hdr_size; + u8 next_hdr_type; + u8 transport_mode = 0; + const int BLOCK_SIZE = 16; + u32 iv_size; + u16 orig_sz; + crypto_sa_session_t *sa_sess; + void *sess; + struct rte_crypto_op *cop = 0; + u16 qp_index; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sa_index0 = vnet_buffer (b0)->ipsec.sad_index; + sa0 = pool_elt_at_index (im->sad, sa_index0); + + if (PREDICT_FALSE (esp_seq_advance (sa0))) + { + clib_warning ("sequence number counter has cycled SPI %u", + sa0->spi); + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1); + //TODO: rekey SA + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + + sa0->total_data_size += b0->current_length; + + sa_sess = pool_elt_at_index (cwm->sa_sess_d[1], sa_index0); + if (PREDICT_FALSE (!sa_sess->sess)) + { + int ret = create_sym_sess (sa0, sa_sess, 1); + + if (PREDICT_FALSE (ret)) + { + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + goto trace; + } + } + + qp_index = sa_sess->qp_index; + sess = sa_sess->sess; + + ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); + cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); + ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); + + cops_to_enq[qp_index][0] = cop; + cops_to_enq[qp_index] += 1; + n_cop_qp[qp_index] += 1; + bi_to_enq[qp_index][0] = bi0; + bi_to_enq[qp_index] += 1; + + ssize_t adv; + iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + ih0 = vlib_buffer_get_current (b0); + orig_sz = b0->current_length; + is_ipv6 = (ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60; + /* is ipv6 */ + if (PREDICT_TRUE (sa0->is_tunnel)) + { + if (PREDICT_TRUE (!is_ipv6)) + adv = -sizeof (ip4_and_esp_header_t); + else + adv = -sizeof (ip6_and_esp_header_t); + } + else + { + adv = -sizeof (esp_header_t); + if (PREDICT_TRUE (!is_ipv6)) + orig_sz -= sizeof (ip4_header_t); + else + orig_sz -= sizeof (ip6_header_t); + } + + /*transport mode save the eth header before it is overwritten */ + if (PREDICT_FALSE (!sa0->is_tunnel)) + { + ethernet_header_t *ieh0 = (ethernet_header_t *) + ((u8 *) vlib_buffer_get_current (b0) - + sizeof (ethernet_header_t)); + ethernet_header_t *oeh0 = + (ethernet_header_t *) ((u8 *) ieh0 + (adv - iv_size)); + clib_memcpy (oeh0, ieh0, sizeof (ethernet_header_t)); + } + + vlib_buffer_advance (b0, adv - iv_size); + + /* XXX IP6/ip4 and IP4/IP6 not supported, only IP4/IP4 and IP6/IP6 */ + + /* is ipv6 */ + if (PREDICT_FALSE (is_ipv6)) + { + ih6_0 = (ip6_and_esp_header_t *) ih0; + ip_hdr_size = sizeof (ip6_header_t); + oh6_0 = vlib_buffer_get_current (b0); + + if (PREDICT_TRUE (sa0->is_tunnel)) + { + next_hdr_type = IP_PROTOCOL_IPV6; + oh6_0->ip6.ip_version_traffic_class_and_flow_label = + ih6_0->ip6.ip_version_traffic_class_and_flow_label; + } + else + { + next_hdr_type = ih6_0->ip6.protocol; + memmove (oh6_0, ih6_0, sizeof (ip6_header_t)); + } + + oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP; + oh6_0->ip6.hop_limit = 254; + oh6_0->esp.spi = clib_net_to_host_u32 (sa0->spi); + oh6_0->esp.seq = clib_net_to_host_u32 (sa0->seq); + } + else + { + ip_hdr_size = sizeof (ip4_header_t); + oh0 = vlib_buffer_get_current (b0); + + if (PREDICT_TRUE (sa0->is_tunnel)) + { + next_hdr_type = IP_PROTOCOL_IP_IN_IP; + oh0->ip4.tos = ih0->ip4.tos; + } + else + { + next_hdr_type = ih0->ip4.protocol; + memmove (oh0, ih0, sizeof (ip4_header_t)); + } + + oh0->ip4.ip_version_and_header_length = 0x45; + oh0->ip4.fragment_id = 0; + oh0->ip4.flags_and_fragment_offset = 0; + oh0->ip4.ttl = 254; + oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP; + oh0->esp.spi = clib_net_to_host_u32 (sa0->spi); + oh0->esp.seq = clib_net_to_host_u32 (sa0->seq); + } + + if (PREDICT_TRUE (sa0->is_tunnel && !sa0->is_tunnel_ip6)) + { + oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32; + oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32; + + /* in tunnel mode send it back to FIB */ + next0 = ESP_ENCRYPT_NEXT_IP4_LOOKUP; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + } + else if (sa0->is_tunnel && sa0->is_tunnel_ip6) + { + oh6_0->ip6.src_address.as_u64[0] = + sa0->tunnel_src_addr.ip6.as_u64[0]; + oh6_0->ip6.src_address.as_u64[1] = + sa0->tunnel_src_addr.ip6.as_u64[1]; + oh6_0->ip6.dst_address.as_u64[0] = + sa0->tunnel_dst_addr.ip6.as_u64[0]; + oh6_0->ip6.dst_address.as_u64[1] = + sa0->tunnel_dst_addr.ip6.as_u64[1]; + + /* in tunnel mode send it back to FIB */ + next0 = ESP_ENCRYPT_NEXT_IP6_LOOKUP; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + } + else + { + next0 = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT; + transport_mode = 1; + } + + ASSERT (sa0->crypto_alg < IPSEC_CRYPTO_N_ALG); + ASSERT (sa0->crypto_alg != IPSEC_CRYPTO_ALG_NONE); + + int blocks = 1 + (orig_sz + 1) / BLOCK_SIZE; + + /* pad packet in input buffer */ + u8 pad_bytes = BLOCK_SIZE * blocks - 2 - orig_sz; + u8 i; + u8 *padding = vlib_buffer_get_current (b0) + b0->current_length; + + for (i = 0; i < pad_bytes; ++i) + padding[i] = i + 1; + + f0 = vlib_buffer_get_current (b0) + b0->current_length + pad_bytes; + f0->pad_length = pad_bytes; + f0->next_header = next_hdr_type; + b0->current_length += pad_bytes + 2 + + em->esp_integ_algs[sa0->integ_alg].trunc_size; + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + vnet_buffer (b0)->sw_if_index[VLIB_RX]; + b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + + struct rte_crypto_sym_op *sym_cop; + sym_cop = (struct rte_crypto_sym_op *) (cop + 1); + + dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *) (sym_cop + 1); + + vnet_buffer (b0)->unused[0] = next0; + + mb0 = rte_mbuf_from_vlib_buffer (b0); + mb0->data_len = b0->current_length; + mb0->pkt_len = b0->current_length; + mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; + + rte_crypto_op_attach_sym_session (cop, sess); + + sym_cop->m_src = mb0; + + dpdk_gcm_cnt_blk *icb = &priv->cb; + icb->salt = sa0->salt; + icb->iv[0] = sa0->seq; + icb->iv[1] = sa0->seq_hi; + + if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + icb->cnt = clib_host_to_net_u32 (1); + clib_memcpy (vlib_buffer_get_current (b0) + ip_hdr_size + + sizeof (esp_header_t), icb->iv, 8); + sym_cop->cipher.data.offset = + ip_hdr_size + sizeof (esp_header_t) + iv_size; + sym_cop->cipher.data.length = BLOCK_SIZE * blocks; + sym_cop->cipher.iv.length = 16; + } + else + { + sym_cop->cipher.data.offset = + ip_hdr_size + sizeof (esp_header_t); + sym_cop->cipher.data.length = BLOCK_SIZE * blocks + iv_size; + sym_cop->cipher.iv.length = iv_size; + } + + sym_cop->cipher.iv.data = (u8 *) icb; + sym_cop->cipher.iv.phys_addr = cop->phys_addr + (uintptr_t) icb + - (uintptr_t) cop; + + + ASSERT (sa0->integ_alg < IPSEC_INTEG_N_ALG); + ASSERT (sa0->integ_alg != IPSEC_INTEG_ALG_NONE); + + if (PREDICT_FALSE (sa0->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128)) + { + u8 *aad = priv->aad; + clib_memcpy (aad, vlib_buffer_get_current (b0) + ip_hdr_size, + 8); + sym_cop->auth.aad.data = aad; + sym_cop->auth.aad.phys_addr = cop->phys_addr + + (uintptr_t) aad - (uintptr_t) cop; + + if (PREDICT_FALSE (sa0->use_esn)) + { + *((u32 *) & aad[8]) = sa0->seq_hi; + sym_cop->auth.aad.length = 12; + } + else + { + sym_cop->auth.aad.length = 8; + } + } + else + { + sym_cop->auth.data.offset = ip_hdr_size; + sym_cop->auth.data.length = b0->current_length - ip_hdr_size + - em->esp_integ_algs[sa0->integ_alg].trunc_size; + + if (PREDICT_FALSE (sa0->use_esn)) + { + u8 *payload_end = + vlib_buffer_get_current (b0) + b0->current_length; + *((u32 *) payload_end) = sa0->seq_hi; + sym_cop->auth.data.length += sizeof (sa0->seq_hi); + } + } + sym_cop->auth.digest.data = vlib_buffer_get_current (b0) + + b0->current_length - + em->esp_integ_algs[sa0->integ_alg].trunc_size; + sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset (mb0, + b0->current_length + - + em->esp_integ_algs + [sa0->integ_alg].trunc_size); + sym_cop->auth.digest.length = + em->esp_integ_algs[sa0->integ_alg].trunc_size; + + + if (PREDICT_FALSE (is_ipv6)) + { + oh6_0->ip6.payload_length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - + sizeof (ip6_header_t)); + } + else + { + oh0->ip4.length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4); + } + + if (transport_mode) + vlib_buffer_advance (b0, -sizeof (ethernet_header_t)); + + trace: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + esp_encrypt_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->spi = sa0->spi; + tr->seq = sa0->seq - 1; + tr->crypto_alg = sa0->crypto_alg; + tr->integ_alg = sa0->integ_alg; + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_RX_PKTS, + from_frame->n_vectors); + crypto_qp_data_t *qpd; + /* *INDENT-OFF* */ + vec_foreach_index (i, cwm->qp_data) + { + u32 enq; + + qpd = vec_elt_at_index(cwm->qp_data, i); + enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, + qpd->cops, n_cop_qp[i]); + qpd->inflights += enq; + + if (PREDICT_FALSE(enq < n_cop_qp[i])) + { + crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); + vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); + + vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, + ESP_ENCRYPT_ERROR_ENQ_FAIL, + n_cop_qp[i] - enq); + } + } + /* *INDENT-ON* */ + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_encrypt_node) = +{ + .function = dpdk_esp_encrypt_node_fn,.name = "dpdk-esp-encrypt",.flags = + VLIB_NODE_FLAG_IS_OUTPUT,.vector_size = sizeof (u32),.format_trace = + format_esp_encrypt_trace,.n_errors = + ARRAY_LEN (esp_encrypt_error_strings),.error_strings = + esp_encrypt_error_strings,.n_next_nodes = 1,.next_nodes = + { + [ESP_ENCRYPT_NEXT_DROP] = "error-drop",} +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_node, dpdk_esp_encrypt_node_fn) +/* + * ESP Encrypt Post Node + */ +#define foreach_esp_encrypt_post_error \ + _(PKTS, "ESP post pkts") + typedef enum + { +#define _(sym,str) ESP_ENCRYPT_POST_ERROR_##sym, + foreach_esp_encrypt_post_error +#undef _ + ESP_ENCRYPT_POST_N_ERROR, + } esp_encrypt_post_error_t; + + static char *esp_encrypt_post_error_strings[] = { +#define _(sym,string) string, + foreach_esp_encrypt_post_error +#undef _ + }; + +vlib_node_registration_t dpdk_esp_encrypt_post_node; + +static u8 * +format_esp_encrypt_post_trace (u8 * s, va_list * args) +{ + return s; +} + +static uword +dpdk_esp_encrypt_post_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, *from, *to_next = 0, next_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0; + vlib_buffer_t *b0 = 0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + to_next[0] = bi0; + to_next += 1; + + next0 = vnet_buffer (b0)->unused[0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, bi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, dpdk_esp_encrypt_post_node.index, + ESP_ENCRYPT_POST_ERROR_PKTS, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (dpdk_esp_encrypt_post_node) = +{ + .function = dpdk_esp_encrypt_post_node_fn,.name = + "dpdk-esp-encrypt-post",.vector_size = sizeof (u32),.format_trace = + format_esp_encrypt_post_trace,.type = VLIB_NODE_TYPE_INTERNAL,.n_errors = + ARRAY_LEN (esp_encrypt_post_error_strings),.error_strings = + esp_encrypt_post_error_strings,.n_next_nodes = + ESP_ENCRYPT_N_NEXT,.next_nodes = + { +#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n, + foreach_esp_encrypt_next +#undef _ + } +}; + +VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_post_node, + dpdk_esp_encrypt_post_node_fn) +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c new file mode 100644 index 00000000..16bec20a --- /dev/null +++ b/src/plugins/dpdk/ipsec/ipsec.c @@ -0,0 +1,430 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#define DPDK_CRYPTO_NB_SESS_OBJS 20000 +#define DPDK_CRYPTO_CACHE_SIZE 512 +#define DPDK_CRYPTO_PRIV_SIZE 128 +#define DPDK_CRYPTO_N_QUEUE_DESC 1024 +#define DPDK_CRYPTO_NB_COPS (1024 * 4) + +static int +add_del_sa_sess (u32 sa_index, u8 is_add) +{ + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + crypto_worker_main_t *cwm; + u8 skip_master = vlib_num_workers () > 0; + + /* *INDENT-OFF* */ + vec_foreach (cwm, dcm->workers_main) + { + crypto_sa_session_t *sa_sess; + u8 is_outbound; + + if (skip_master) + { + skip_master = 0; + continue; + } + + for (is_outbound = 0; is_outbound < 2; is_outbound++) + { + if (is_add) + { + pool_get (cwm->sa_sess_d[is_outbound], sa_sess); + } + else + { + u8 dev_id; + + sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index); + dev_id = cwm->qp_data[sa_sess->qp_index].dev_id; + + if (!sa_sess->sess) + continue; + + if (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess)) + { + clib_warning("failed to free session"); + return -1; + } + memset(sa_sess, 0, sizeof(sa_sess[0])); + } + } + } + /* *INDENT-OFF* */ + + return 0; +} + +static void +update_qp_data (crypto_worker_main_t * cwm, + u8 cdev_id, u16 qp_id, u8 is_outbound, u16 * idx) +{ + crypto_qp_data_t *qpd; + + /* *INDENT-OFF* */ + vec_foreach_index (*idx, cwm->qp_data) + { + qpd = vec_elt_at_index(cwm->qp_data, *idx); + + if (qpd->dev_id == cdev_id && qpd->qp_id == qp_id && + qpd->is_outbound == is_outbound) + return; + } + /* *INDENT-ON* */ + + vec_add2 (cwm->qp_data, qpd, 1); + + qpd->dev_id = cdev_id; + qpd->qp_id = qp_id; + qpd->is_outbound = is_outbound; +} + +/* + * return: + * 0: already exist + * 1: mapped + */ +static int +add_mapping (crypto_worker_main_t * cwm, + u8 cdev_id, u16 qp, u8 is_outbound, + const struct rte_cryptodev_capabilities *cipher_cap, + const struct rte_cryptodev_capabilities *auth_cap) +{ + u16 qp_index; + uword key = 0, data, *ret; + crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; + + p_key->cipher_algo = (u8) cipher_cap->sym.cipher.algo; + p_key->auth_algo = (u8) auth_cap->sym.auth.algo; + p_key->is_outbound = is_outbound; + + ret = hash_get (cwm->algo_qp_map, key); + if (ret) + return 0; + + update_qp_data (cwm, cdev_id, qp, is_outbound, &qp_index); + + data = (uword) qp_index; + hash_set (cwm->algo_qp_map, key, data); + + return 1; +} + +/* + * return: + * 0: already exist + * 1: mapped + */ +static int +add_cdev_mapping (crypto_worker_main_t * cwm, + struct rte_cryptodev_info *dev_info, u8 cdev_id, + u16 qp, u8 is_outbound) +{ + const struct rte_cryptodev_capabilities *i, *j; + u32 mapped = 0; + + for (i = dev_info->capabilities; i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++) + { + if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER) + continue; + + if (check_algo_is_supported (i, NULL) != 0) + continue; + + for (j = dev_info->capabilities; j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; + j++) + { + if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH) + continue; + + if (check_algo_is_supported (j, NULL) != 0) + continue; + + mapped |= add_mapping (cwm, cdev_id, qp, is_outbound, i, j); + } + } + + return mapped; +} + +static int +check_cryptodev_queues () +{ + u32 n_qs = 0; + u8 cdev_id; + u32 n_req_qs = 2; + + if (vlib_num_workers () > 0) + n_req_qs = vlib_num_workers () * 2; + + for (cdev_id = 0; cdev_id < rte_cryptodev_count (); cdev_id++) + { + struct rte_cryptodev_info cdev_info; + + rte_cryptodev_info_get (cdev_id, &cdev_info); + + if (! + (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) + continue; + + n_qs += cdev_info.max_nb_queue_pairs; + } + + if (n_qs >= n_req_qs) + return 0; + else + return -1; +} + +static clib_error_t * +dpdk_ipsec_check_support (ipsec_sa_t * sa) +{ + if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + { + if (sa->integ_alg != IPSEC_INTEG_ALG_NONE) + return clib_error_return (0, "unsupported integ-alg %U with " + "crypto-algo aes-gcm-128", + format_ipsec_integ_alg, sa->integ_alg); + sa->integ_alg = IPSEC_INTEG_ALG_AES_GCM_128; + } + else + { + if (sa->integ_alg == IPSEC_INTEG_ALG_NONE || + sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128) + return clib_error_return (0, "unsupported integ-alg %U", + format_ipsec_integ_alg, sa->integ_alg); + } + + return 0; +} + +static uword +dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + dpdk_config_main_t *conf = &dpdk_config_main; + ipsec_main_t *im = &ipsec_main; + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + struct rte_cryptodev_config dev_conf; + struct rte_cryptodev_qp_conf qp_conf; + struct rte_cryptodev_info cdev_info; + struct rte_mempool *rmp; + i32 dev_id, ret; + u32 i, skip_master; + + if (!conf->cryptodev) + { + clib_warning ("DPDK Cryptodev support is disabled, " + "default to OpenSSL IPsec"); + return 0; + } + + if (check_cryptodev_queues () < 0) + { + conf->cryptodev = 0; + clib_warning ("not enough Cryptodevs, default to OpenSSL IPsec"); + return 0; + } + + vec_alloc (dcm->workers_main, tm->n_vlib_mains); + _vec_len (dcm->workers_main) = tm->n_vlib_mains; + + fprintf (stdout, "DPDK Cryptodevs info:\n"); + fprintf (stdout, "dev_id\tn_qp\tnb_obj\tcache_size\n"); + /* HW cryptodevs have higher dev_id, use HW first */ + for (dev_id = rte_cryptodev_count () - 1; dev_id >= 0; dev_id--) + { + u16 max_nb_qp, qp = 0; + skip_master = vlib_num_workers () > 0; + + rte_cryptodev_info_get (dev_id, &cdev_info); + + if (! + (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) + continue; + + max_nb_qp = cdev_info.max_nb_queue_pairs; + + for (i = 0; i < tm->n_vlib_mains; i++) + { + u8 is_outbound; + crypto_worker_main_t *cwm; + uword *map; + + if (skip_master) + { + skip_master = 0; + continue; + } + + cwm = vec_elt_at_index (dcm->workers_main, i); + map = cwm->algo_qp_map; + + if (!map) + { + map = hash_create (0, sizeof (crypto_worker_qp_key_t)); + if (!map) + { + clib_warning ("unable to create hash table for worker %u", + vlib_mains[i]->cpu_index); + goto error; + } + cwm->algo_qp_map = map; + } + + for (is_outbound = 0; is_outbound < 2 && qp < max_nb_qp; + is_outbound++) + qp += add_cdev_mapping (cwm, &cdev_info, dev_id, qp, is_outbound); + } + + if (qp == 0) + continue; + + dev_conf.socket_id = rte_cryptodev_socket_id (dev_id); + dev_conf.nb_queue_pairs = cdev_info.max_nb_queue_pairs; + dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_SESS_OBJS; + dev_conf.session_mp.cache_size = DPDK_CRYPTO_CACHE_SIZE; + + ret = rte_cryptodev_configure (dev_id, &dev_conf); + if (ret < 0) + { + clib_warning ("cryptodev %u config error", dev_id); + goto error; + } + + qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC; + for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++) + { + ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf, + dev_conf.socket_id); + if (ret < 0) + { + clib_warning ("cryptodev %u qp %u setup error", dev_id, qp); + goto error; + } + } + vec_validate_aligned (dcm->cop_pools, dev_conf.socket_id, + CLIB_CACHE_LINE_BYTES); + + if (!vec_elt (dcm->cop_pools, dev_conf.socket_id)) + { + u8 *pool_name = format (0, "crypto_op_pool_socket%u%c", + dev_conf.socket_id, 0); + + rmp = rte_crypto_op_pool_create ((char *) pool_name, + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + DPDK_CRYPTO_NB_COPS * + (1 + vlib_num_workers ()), + DPDK_CRYPTO_CACHE_SIZE, + DPDK_CRYPTO_PRIV_SIZE, + dev_conf.socket_id); + vec_free (pool_name); + + if (!rmp) + { + clib_warning ("failed to allocate mempool on socket %u", + dev_conf.socket_id); + goto error; + } + vec_elt (dcm->cop_pools, dev_conf.socket_id) = rmp; + } + + fprintf (stdout, "%u\t%u\t%u\t%u\n", dev_id, dev_conf.nb_queue_pairs, + DPDK_CRYPTO_NB_SESS_OBJS, DPDK_CRYPTO_CACHE_SIZE); + } + + dpdk_esp_init (); + + /* Add new next node and set as default */ + vlib_node_t *node, *next_node; + + next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-encrypt"); + ASSERT (next_node); + node = vlib_get_node_by_name (vm, (u8 *) "ipsec-output-ip4"); + ASSERT (node); + im->esp_encrypt_node_index = next_node->index; + im->esp_encrypt_next_index = + vlib_node_add_next (vm, node->index, next_node->index); + + next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-decrypt"); + ASSERT (next_node); + node = vlib_get_node_by_name (vm, (u8 *) "ipsec-input-ip4"); + ASSERT (node); + im->esp_decrypt_node_index = next_node->index; + im->esp_decrypt_next_index = + vlib_node_add_next (vm, node->index, next_node->index); + + im->cb.check_support_cb = dpdk_ipsec_check_support; + im->cb.add_del_sa_sess_cb = add_del_sa_sess; + + if (vec_len (vlib_mains) == 0) + vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index, + VLIB_NODE_STATE_POLLING); + else + for (i = 1; i < tm->n_vlib_mains; i++) + vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index, + VLIB_NODE_STATE_POLLING); + + /* TODO cryptodev counters */ + + return 0; + +error: + ; + crypto_worker_main_t *cwm; + struct rte_mempool **mp; + /* *INDENT-OFF* */ + vec_foreach (cwm, dcm->workers_main) + hash_free (cwm->algo_qp_map); + + vec_foreach (mp, dcm->cop_pools) + { + if (mp) + rte_mempool_free (mp[0]); + } + /* *INDENT-ON* */ + vec_free (dcm->workers_main); + vec_free (dcm->cop_pools); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_ipsec_process_node,static) = { + .function = dpdk_ipsec_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dpdk-ipsec-process", + .process_log2_n_stack_bytes = 17, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/ipsec/ipsec.h b/src/plugins/dpdk/ipsec/ipsec.h new file mode 100644 index 00000000..3465b361 --- /dev/null +++ b/src/plugins/dpdk/ipsec/ipsec.h @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2016 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __DPDK_IPSEC_H__ +#define __DPDK_IPSEC_H__ + +#include + +#undef always_inline +#include +#include + +#if CLIB_DEBUG > 0 +#define always_inline static inline +#else +#define always_inline static inline __attribute__ ((__always_inline__)) +#endif + + +#define MAX_QP_PER_LCORE 16 + +typedef struct +{ + u32 salt; + u32 iv[2]; + u32 cnt; +} dpdk_gcm_cnt_blk; + +typedef struct +{ + dpdk_gcm_cnt_blk cb; + union + { + u8 aad[12]; + u8 icv[64]; + }; +} dpdk_cop_priv_t; + +typedef struct +{ + u8 cipher_algo; + u8 auth_algo; + u8 is_outbound; +} crypto_worker_qp_key_t; + +typedef struct +{ + u16 dev_id; + u16 qp_id; + u16 is_outbound; + i16 inflights; + u32 bi[VLIB_FRAME_SIZE]; + struct rte_crypto_op *cops[VLIB_FRAME_SIZE]; + struct rte_crypto_op **free_cops; +} crypto_qp_data_t; + +typedef struct +{ + u8 qp_index; + void *sess; +} crypto_sa_session_t; + +typedef struct +{ + crypto_sa_session_t *sa_sess_d[2]; + crypto_qp_data_t *qp_data; + uword *algo_qp_map; +} crypto_worker_main_t; + +typedef struct +{ + struct rte_mempool **cop_pools; + crypto_worker_main_t *workers_main; +} dpdk_crypto_main_t; + +dpdk_crypto_main_t dpdk_crypto_main; + +extern vlib_node_registration_t dpdk_crypto_input_node; + +#define CRYPTO_N_FREE_COPS (VLIB_FRAME_SIZE * 3) + +static_always_inline void +crypto_alloc_cops () +{ + dpdk_crypto_main_t *dcm = &dpdk_crypto_main; + u32 cpu_index = os_get_cpu_number (); + crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; + unsigned socket_id = rte_socket_id (); + crypto_qp_data_t *qpd; + + /* *INDENT-OFF* */ + vec_foreach (qpd, cwm->qp_data) + { + u32 l = vec_len (qpd->free_cops); + + if (PREDICT_FALSE (l < VLIB_FRAME_SIZE)) + { + u32 n_alloc; + + if (PREDICT_FALSE (!qpd->free_cops)) + vec_alloc (qpd->free_cops, CRYPTO_N_FREE_COPS); + + n_alloc = rte_crypto_op_bulk_alloc (dcm->cop_pools[socket_id], + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + &qpd->free_cops[l], + CRYPTO_N_FREE_COPS - l - 1); + + _vec_len (qpd->free_cops) = l + n_alloc; + } + } + /* *INDENT-ON* */ +} + +static_always_inline void +crypto_free_cop (crypto_qp_data_t * qpd, struct rte_crypto_op **cops, u32 n) +{ + u32 l = vec_len (qpd->free_cops); + + if (l + n >= CRYPTO_N_FREE_COPS) + { + l -= VLIB_FRAME_SIZE; + rte_mempool_put_bulk (cops[0]->mempool, + (void **) &qpd->free_cops[l], VLIB_FRAME_SIZE); + } + clib_memcpy (&qpd->free_cops[l], cops, sizeof (*cops) * n); + + _vec_len (qpd->free_cops) = l + n; +} + +static_always_inline int +check_algo_is_supported (const struct rte_cryptodev_capabilities *cap, + char *name) +{ + struct + { + uint8_t cipher_algo; + enum rte_crypto_sym_xform_type type; + union + { + enum rte_crypto_auth_algorithm auth; + enum rte_crypto_cipher_algorithm cipher; + }; + char *name; + } supported_algo[] = + { + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_NULL,.name = "NULL"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_AES_CBC,.name = "AES_CBC"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_AES_CTR,.name = "AES_CTR"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_3DES_CBC,.name = "3DES-CBC"}, + { + .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = + RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA1_HMAC,.name = "HMAC-SHA1"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA256_HMAC,.name = "HMAC-SHA256"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA384_HMAC,.name = "HMAC-SHA384"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_SHA512_HMAC,.name = "HMAC-SHA512"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_AES_XCBC_MAC,.name = "AES-XCBC-MAC"}, + { + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_AES_GCM,.name = "AES-GCM"}, + { + /* tail */ + .type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED},}; + uint32_t i = 0; + + if (cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC) + return -1; + + while (supported_algo[i].type != RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED) + { + if (cap->sym.xform_type == supported_algo[i].type) + { + if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER && + cap->sym.cipher.algo == supported_algo[i].cipher) || + (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH && + cap->sym.auth.algo == supported_algo[i].auth)) + { + if (name) + strcpy (name, supported_algo[i].name); + return 0; + } + } + + i++; + } + + return -1; +} + +#endif /* __DPDK_IPSEC_H__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c new file mode 100644 index 00000000..8073a50a --- /dev/null +++ b/src/plugins/dpdk/main.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +/* + * Called by the dpdk driver's rte_delay_us() function. + * Return 0 to have the dpdk do a regular delay loop. + * Return 1 if to skip the delay loop because we are suspending + * the calling vlib process instead. + */ +int +rte_delay_us_override (unsigned us) +{ + vlib_main_t *vm; + + /* Don't bother intercepting for short delays */ + if (us < 10) + return 0; + + /* + * Only intercept if we are in a vlib process. + * If we are called from a vlib worker thread or the vlib main + * thread then do not intercept. (Must not be called from an + * independent pthread). + */ + if (os_get_cpu_number () == 0) + { + /* + * We're in the vlib main thread or a vlib process. Make sure + * the process is running and we're not still initializing. + */ + vm = vlib_get_main (); + if (vlib_in_process_context (vm)) + { + /* Only suspend for the admin_down_process */ + vlib_process_t *proc = vlib_get_current_process (vm); + if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) || + (proc->node_runtime.function != admin_up_down_process)) + return 0; + + f64 delay = 1e-6 * us; + vlib_process_suspend (vm, delay); + return 1; + } + } + return 0; // no override +} + +static void +rte_delay_us_override_cb (unsigned us) +{ + if (rte_delay_us_override (us) == 0) + rte_delay_us_block (us); +} + +static clib_error_t * dpdk_main_init (vlib_main_t * vm) +{ + dpdk_main_t * dm = &dpdk_main; + clib_error_t * error = 0; + + dm->vlib_main = vm; + dm->vnet_main = vnet_get_main (); + + if ((error = vlib_call_init_function (vm, dpdk_init))) + return error; + + /* register custom delay function */ + rte_delay_us_callback_register (rte_delay_us_override_cb); + + return error; +} + +VLIB_INIT_FUNCTION (dpdk_main_init); + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, +}; +/* *INDENT-ON* */ diff --git a/src/plugins/dpdk/thread.c b/src/plugins/dpdk/thread.c new file mode 100644 index 00000000..3a3fcc6c --- /dev/null +++ b/src/plugins/dpdk/thread.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static clib_error_t * +dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) +{ + int r; + r = rte_eal_remote_launch (fp, (void *) w, lcore_id); + if (r) + return clib_error_return (0, "Failed to launch thread %u", lcore_id); + return 0; +} + +static clib_error_t * +dpdk_thread_set_lcore (u32 thread, u16 lcore) +{ + return 0; +} + +static vlib_thread_callbacks_t callbacks = { + .vlib_launch_thread_cb = &dpdk_launch_thread, + .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore, +}; + +static clib_error_t * +dpdk_thread_init (vlib_main_t * vm) +{ + vlib_thread_cb_register (vm, &callbacks); + return 0; +} + +VLIB_INIT_FUNCTION (dpdk_thread_init); + +/** @endcond */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 999f9869..14e78817 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -3970,13 +3970,6 @@ _(feature_enable_disable_reply) \ _(sw_interface_tag_add_del_reply) \ _(sw_interface_set_mtu_reply) -#if DPDK > 0 -#define foreach_standard_dpdk_reply_retval_handler \ -_(sw_interface_set_dpdk_hqos_pipe_reply) \ -_(sw_interface_set_dpdk_hqos_subport_reply) \ -_(sw_interface_set_dpdk_hqos_tctbl_reply) -#endif - #define _(n) \ static void vl_api_##n##_t_handler \ (vl_api_##n##_t * mp) \ @@ -4008,39 +4001,6 @@ foreach_standard_reply_retval_handler; foreach_standard_reply_retval_handler; #undef _ -#if DPDK > 0 -#define _(n) \ - static void vl_api_##n##_t_handler \ - (vl_api_##n##_t * mp) \ - { \ - vat_main_t * vam = &vat_main; \ - i32 retval = ntohl(mp->retval); \ - if (vam->async_mode) { \ - vam->async_errors += (retval < 0); \ - } else { \ - vam->retval = retval; \ - vam->result_ready = 1; \ - } \ - } -foreach_standard_dpdk_reply_retval_handler; -#undef _ - -#define _(n) \ - static void vl_api_##n##_t_handler_json \ - (vl_api_##n##_t * mp) \ - { \ - vat_main_t * vam = &vat_main; \ - vat_json_node_t node; \ - vat_json_init_object(&node); \ - vat_json_object_add_int(&node, "retval", ntohl(mp->retval)); \ - vat_json_print(vam->ofp, &node); \ - vam->retval = ntohl(mp->retval); \ - vam->result_ready = 1; \ - } -foreach_standard_dpdk_reply_retval_handler; -#undef _ -#endif - /* * Table of message reply handlers, must include boilerplate handlers * we just generated @@ -4272,16 +4232,6 @@ _(SW_INTERFACE_SET_MTU_REPLY, sw_interface_set_mtu_reply) \ _(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) -#if DPDK > 0 -#define foreach_vpe_dpdk_api_reply_msg \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY, \ - sw_interface_set_dpdk_hqos_pipe_reply) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY, \ - sw_interface_set_dpdk_hqos_subport_reply) \ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY, \ - sw_interface_set_dpdk_hqos_tctbl_reply) -#endif - typedef struct { u8 *name; @@ -5081,226 +5031,6 @@ api_sw_interface_clear_stats (vat_main_t * vam) return ret; } -#if DPDK >0 -static int -api_sw_interface_set_dpdk_hqos_pipe (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_sw_interface_set_dpdk_hqos_pipe_t *mp; - u32 sw_if_index; - u8 sw_if_index_set = 0; - u32 subport; - u8 subport_set = 0; - u32 pipe; - u8 pipe_set = 0; - u32 profile; - u8 profile_set = 0; - int ret; - - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "rx %U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "sw_if_index %u", &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "subport %u", &subport)) - subport_set = 1; - else - if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "pipe %u", &pipe)) - pipe_set = 1; - else if (unformat (i, "profile %u", &profile)) - profile_set = 1; - else - break; - } - - if (sw_if_index_set == 0) - { - errmsg ("missing interface name or sw_if_index"); - return -99; - } - - if (subport_set == 0) - { - errmsg ("missing subport "); - return -99; - } - - if (pipe_set == 0) - { - errmsg ("missing pipe"); - return -99; - } - - if (profile_set == 0) - { - errmsg ("missing profile"); - return -99; - } - - M (SW_INTERFACE_SET_DPDK_HQOS_PIPE, mp); - - mp->sw_if_index = ntohl (sw_if_index); - mp->subport = ntohl (subport); - mp->pipe = ntohl (pipe); - mp->profile = ntohl (profile); - - - S (mp); - W (ret); - return ret; -} - -static int -api_sw_interface_set_dpdk_hqos_subport (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_sw_interface_set_dpdk_hqos_subport_t *mp; - u32 sw_if_index; - u8 sw_if_index_set = 0; - u32 subport; - u8 subport_set = 0; - u32 tb_rate = 1250000000; /* 10GbE */ - u32 tb_size = 1000000; - u32 tc_rate[] = { 1250000000, 1250000000, 1250000000, 1250000000 }; - u32 tc_period = 10; - int ret; - - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "rx %U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "sw_if_index %u", &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "subport %u", &subport)) - subport_set = 1; - else - if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "rate %u", &tb_rate)) - { - u32 tc_id; - - for (tc_id = 0; tc_id < (sizeof (tc_rate) / sizeof (tc_rate[0])); - tc_id++) - tc_rate[tc_id] = tb_rate; - } - else if (unformat (i, "bktsize %u", &tb_size)) - ; - else if (unformat (i, "tc0 %u", &tc_rate[0])) - ; - else if (unformat (i, "tc1 %u", &tc_rate[1])) - ; - else if (unformat (i, "tc2 %u", &tc_rate[2])) - ; - else if (unformat (i, "tc3 %u", &tc_rate[3])) - ; - else if (unformat (i, "period %u", &tc_period)) - ; - else - break; - } - - if (sw_if_index_set == 0) - { - errmsg ("missing interface name or sw_if_index"); - return -99; - } - - if (subport_set == 0) - { - errmsg ("missing subport "); - return -99; - } - - M (SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, mp); - - mp->sw_if_index = ntohl (sw_if_index); - mp->subport = ntohl (subport); - mp->tb_rate = ntohl (tb_rate); - mp->tb_size = ntohl (tb_size); - mp->tc_rate[0] = ntohl (tc_rate[0]); - mp->tc_rate[1] = ntohl (tc_rate[1]); - mp->tc_rate[2] = ntohl (tc_rate[2]); - mp->tc_rate[3] = ntohl (tc_rate[3]); - mp->tc_period = ntohl (tc_period); - - S (mp); - W (ret); - return ret; -} - -static int -api_sw_interface_set_dpdk_hqos_tctbl (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_sw_interface_set_dpdk_hqos_tctbl_t *mp; - u32 sw_if_index; - u8 sw_if_index_set = 0; - u8 entry_set = 0; - u8 tc_set = 0; - u8 queue_set = 0; - u32 entry, tc, queue; - int ret; - - /* Parse args required to build the message */ - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "rx %U", api_unformat_sw_if_index, vam, &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "sw_if_index %u", &sw_if_index)) - sw_if_index_set = 1; - else if (unformat (i, "entry %d", &entry)) - entry_set = 1; - else if (unformat (i, "tc %d", &tc)) - tc_set = 1; - else if (unformat (i, "queue %d", &queue)) - queue_set = 1; - else - break; - } - - if (sw_if_index_set == 0) - { - errmsg ("missing interface name or sw_if_index"); - return -99; - } - - if (entry_set == 0) - { - errmsg ("missing entry "); - return -99; - } - - if (tc_set == 0) - { - errmsg ("missing traffic class "); - return -99; - } - - if (queue_set == 0) - { - errmsg ("missing queue "); - return -99; - } - - M (SW_INTERFACE_SET_DPDK_HQOS_TCTBL, mp); - - mp->sw_if_index = ntohl (sw_if_index); - mp->entry = ntohl (entry); - mp->tc = ntohl (tc); - mp->queue = ntohl (queue); - - S (mp); - W (ret); - return ret; -} -#endif - static int api_sw_interface_add_del_address (vat_main_t * vam) { @@ -18656,18 +18386,6 @@ _(sw_interface_set_mtu, " | sw_if_index mtu ") \ _(ip_neighbor_dump, "[ip6] | sw_if_index ") \ _(sw_interface_get_table, " | sw_if_index [ipv6]") -#if DPDK > 0 -#define foreach_vpe_dpdk_api_msg \ -_(sw_interface_set_dpdk_hqos_pipe, \ - "rx | sw_if_index subport pipe \n" \ - "profile \n") \ -_(sw_interface_set_dpdk_hqos_subport, \ - "rx | sw_if_index subport [rate ]\n" \ - "[bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ]\n") \ -_(sw_interface_set_dpdk_hqos_tctbl, \ - "rx | sw_if_index entry tc queue \n") -#endif - /* List of command functions, CLI names map directly to functions */ #define foreach_cli_function \ _(comment, "usage: comment ") \ @@ -18705,22 +18423,6 @@ _(unset, "usage: unset ") foreach_vpe_api_reply_msg; #undef _ -#if DPDK > 0 -#define _(N,n) \ - static void vl_api_##n##_t_handler_uni \ - (vl_api_##n##_t * mp) \ - { \ - vat_main_t * vam = &vat_main; \ - if (vam->json_output) { \ - vl_api_##n##_t_handler_json(mp); \ - } else { \ - vl_api_##n##_t_handler(mp); \ - } \ - } -foreach_vpe_dpdk_api_reply_msg; -#undef _ -#endif - void vat_api_hookup (vat_main_t * vam) { @@ -18734,18 +18436,6 @@ vat_api_hookup (vat_main_t * vam) foreach_vpe_api_reply_msg; #undef _ -#if DPDK > 0 -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler_uni, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vpe_dpdk_api_reply_msg; -#undef _ -#endif - #if (VPP_API_TEST_BUILTIN==0) vl_msg_api_set_first_available_msg_id (VL_MSG_FIRST_AVAILABLE); #endif @@ -18760,21 +18450,11 @@ vat_api_hookup (vat_main_t * vam) #define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); foreach_vpe_api_msg; #undef _ -#if DPDK >0 -#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); - foreach_vpe_dpdk_api_msg; -#undef _ -#endif /* Help strings */ #define _(n,h) hash_set_mem (vam->help_by_name, #n, h); foreach_vpe_api_msg; #undef _ -#if DPDK >0 -#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); - foreach_vpe_dpdk_api_msg; -#undef _ -#endif /* CLI functions */ #define _(n,h) hash_set_mem (vam->function_by_name, #n, n); diff --git a/src/vnet.am b/src/vnet.am index 923f61d8..84930f05 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -23,8 +23,7 @@ libvnet_la_DEPENDENCIES = \ libvlibmemory.la \ libvlibmemoryclient.la -libvnet_la_LIBADD = $(libvnet_la_DEPENDENCIES) -lm -lpthread -ldl -lrt $(DPDK_LD_ADD) -libvnet_la_LDFLAGS = $(DPDK_LD_FLAGS) +libvnet_la_LIBADD = $(libvnet_la_DEPENDENCIES) -lm -lpthread -ldl -lrt if WITH_LIBSSL libvnet_la_LIBADD += -lcrypto @@ -396,15 +395,6 @@ libvnet_la_SOURCES += \ vnet/ipsec/ipsec_api.c API_FILES += vnet/ipsec/ipsec.api - -if WITH_DPDK -libvnet_la_SOURCES += \ - vnet/devices/dpdk/ipsec/esp_encrypt.c \ - vnet/devices/dpdk/ipsec/esp_decrypt.c \ - vnet/devices/dpdk/ipsec/crypto_node.c \ - vnet/devices/dpdk/ipsec/cli.c \ - vnet/devices/dpdk/ipsec/ipsec.c -endif endif libvnet_la_SOURCES += \ @@ -416,11 +406,6 @@ nobase_include_HEADERS += \ vnet/ipsec/ikev2.h \ vnet/ipsec/ikev2_priv.h \ vnet/ipsec/ipsec.api.h -if WITH_DPDK -nobase_include_HEADERS += \ - vnet/devices/dpdk/ipsec/ipsec.h \ - vnet/devices/dpdk/ipsec/esp.h -endif ######################################## # Layer 3 protocol: osi @@ -803,29 +788,7 @@ nobase_include_HEADERS += \ vnet/pg/pg.h \ vnet/pg/edit.h -######################################## -# DPDK -######################################## -if WITH_DPDK -libvnet_la_SOURCES += \ - vnet/devices/dpdk/buffer.c \ - vnet/devices/dpdk/dpdk_priv.h \ - vnet/devices/dpdk/device.c \ - vnet/devices/dpdk/format.c \ - vnet/devices/dpdk/init.c \ - vnet/devices/dpdk/main.c \ - vnet/devices/dpdk/node.c \ - vnet/devices/dpdk/thread.c \ - vnet/devices/dpdk/hqos.c \ - vnet/devices/dpdk/cli.c \ - vnet/devices/dpdk/dpdk_api.c - -nobase_include_HEADERS += \ - vnet/devices/dpdk/dpdk.h \ - vnet/devices/dpdk/dpdk.api.h - -API_FILES += vnet/devices/dpdk/dpdk.api -else +if !WITH_DPDK libvnet_la_SOURCES += \ vnet/devices/nic/ixge.c \ vnet/devices/nic/ixge.h \ diff --git a/src/vnet/devices/dpdk/buffer.c b/src/vnet/devices/dpdk/buffer.c deleted file mode 100644 index f95d4cb5..00000000 --- a/src/vnet/devices/dpdk/buffer.c +++ /dev/null @@ -1,588 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * buffer.c: allocate/free network buffers. - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @file - * - * Allocate/free network buffers. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - - -STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM, - "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM"); - -static_always_inline void -dpdk_rte_pktmbuf_free (vlib_main_t * vm, vlib_buffer_t * b) -{ - vlib_buffer_t *hb = b; - struct rte_mbuf *mb; - u32 next, flags; - mb = rte_mbuf_from_vlib_buffer (hb); - -next: - flags = b->flags; - next = b->next_buffer; - mb = rte_mbuf_from_vlib_buffer (b); - - if (PREDICT_FALSE (b->n_add_refs)) - { - rte_mbuf_refcnt_update (mb, b->n_add_refs); - b->n_add_refs = 0; - } - - rte_pktmbuf_free_seg (mb); - - if (flags & VLIB_BUFFER_NEXT_PRESENT) - { - b = vlib_get_buffer (vm, next); - goto next; - } -} - -static void -del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) -{ - u32 i; - vlib_buffer_t *b; - - for (i = 0; i < vec_len (f->buffers); i++) - { - b = vlib_get_buffer (vm, f->buffers[i]); - dpdk_rte_pktmbuf_free (vm, b); - } - - vec_free (f->name); - vec_free (f->buffers); -} - -/* Add buffer free list. */ -static void -dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *f; - u32 merge_index; - int i; - - ASSERT (os_get_cpu_number () == 0); - - f = vlib_buffer_get_free_list (vm, free_list_index); - - merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); - if (merge_index != ~0 && merge_index != free_list_index) - { - vlib_buffer_merge_free_lists (pool_elt_at_index - (bm->buffer_free_list_pool, merge_index), - f); - } - - del_free_list (vm, f); - - /* Poison it. */ - memset (f, 0xab, sizeof (f[0])); - - pool_put (bm->buffer_free_list_pool, f); - - for (i = 1; i < vec_len (vlib_mains); i++) - { - bm = vlib_mains[i]->buffer_main; - f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);; - memset (f, 0xab, sizeof (f[0])); - pool_put (bm->buffer_free_list_pool, f); - } -} - -/* Make sure free list has at least given number of free buffers. */ -static uword -fill_free_list (vlib_main_t * vm, - vlib_buffer_free_list_t * fl, uword min_free_buffers) -{ - dpdk_main_t *dm = &dpdk_main; - vlib_buffer_t *b0, *b1, *b2, *b3; - int n, i; - u32 bi0, bi1, bi2, bi3; - unsigned socket_id = rte_socket_id (); - struct rte_mempool *rmp = dm->pktmbuf_pools[socket_id]; - struct rte_mbuf *mb0, *mb1, *mb2, *mb3; - - /* Too early? */ - if (PREDICT_FALSE (rmp == 0)) - return 0; - - /* Already have enough free buffers on free list? */ - n = min_free_buffers - vec_len (fl->buffers); - if (n <= 0) - return min_free_buffers; - - /* Always allocate round number of buffers. */ - n = round_pow2 (n, CLIB_CACHE_LINE_BYTES / sizeof (u32)); - - /* Always allocate new buffers in reasonably large sized chunks. */ - n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); - - vec_validate (vm->mbuf_alloc_list, n - 1); - - if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0) - return 0; - - _vec_len (vm->mbuf_alloc_list) = n; - - i = 0; - - while (i < (n - 7)) - { - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf - (vm->mbuf_alloc_list[i + 4]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf - (vm->mbuf_alloc_list[i + 5]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf - (vm->mbuf_alloc_list[i + 6]), STORE); - vlib_prefetch_buffer_header (vlib_buffer_from_rte_mbuf - (vm->mbuf_alloc_list[i + 7]), STORE); - - mb0 = vm->mbuf_alloc_list[i]; - mb1 = vm->mbuf_alloc_list[i + 1]; - mb2 = vm->mbuf_alloc_list[i + 2]; - mb3 = vm->mbuf_alloc_list[i + 3]; - - ASSERT (rte_mbuf_refcnt_read (mb0) == 0); - ASSERT (rte_mbuf_refcnt_read (mb1) == 0); - ASSERT (rte_mbuf_refcnt_read (mb2) == 0); - ASSERT (rte_mbuf_refcnt_read (mb3) == 0); - - rte_mbuf_refcnt_set (mb0, 1); - rte_mbuf_refcnt_set (mb1, 1); - rte_mbuf_refcnt_set (mb2, 1); - rte_mbuf_refcnt_set (mb3, 1); - - b0 = vlib_buffer_from_rte_mbuf (mb0); - b1 = vlib_buffer_from_rte_mbuf (mb1); - b2 = vlib_buffer_from_rte_mbuf (mb2); - b3 = vlib_buffer_from_rte_mbuf (mb3); - - bi0 = vlib_get_buffer_index (vm, b0); - bi1 = vlib_get_buffer_index (vm, b1); - bi2 = vlib_get_buffer_index (vm, b2); - bi3 = vlib_get_buffer_index (vm, b3); - - vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); - vec_add1_aligned (fl->buffers, bi1, CLIB_CACHE_LINE_BYTES); - vec_add1_aligned (fl->buffers, bi2, CLIB_CACHE_LINE_BYTES); - vec_add1_aligned (fl->buffers, bi3, CLIB_CACHE_LINE_BYTES); - - vlib_buffer_init_for_free_list (b0, fl); - vlib_buffer_init_for_free_list (b1, fl); - vlib_buffer_init_for_free_list (b2, fl); - vlib_buffer_init_for_free_list (b3, fl); - - if (fl->buffer_init_function) - { - fl->buffer_init_function (vm, fl, &bi0, 1); - fl->buffer_init_function (vm, fl, &bi1, 1); - fl->buffer_init_function (vm, fl, &bi2, 1); - fl->buffer_init_function (vm, fl, &bi3, 1); - } - i += 4; - } - - while (i < n) - { - mb0 = vm->mbuf_alloc_list[i]; - - ASSERT (rte_mbuf_refcnt_read (mb0) == 0); - rte_mbuf_refcnt_set (mb0, 1); - - b0 = vlib_buffer_from_rte_mbuf (mb0); - bi0 = vlib_get_buffer_index (vm, b0); - - vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES); - - vlib_buffer_init_for_free_list (b0, fl); - - if (fl->buffer_init_function) - fl->buffer_init_function (vm, fl, &bi0, 1); - i++; - } - - fl->n_alloc += n; - - return n; -} - -static u32 -alloc_from_free_list (vlib_main_t * vm, - vlib_buffer_free_list_t * free_list, - u32 * alloc_buffers, u32 n_alloc_buffers) -{ - u32 *dst, *src; - uword len, n_filled; - - dst = alloc_buffers; - - n_filled = fill_free_list (vm, free_list, n_alloc_buffers); - if (n_filled == 0) - return 0; - - len = vec_len (free_list->buffers); - ASSERT (len >= n_alloc_buffers); - - src = free_list->buffers + len - n_alloc_buffers; - clib_memcpy (dst, src, n_alloc_buffers * sizeof (u32)); - - _vec_len (free_list->buffers) -= n_alloc_buffers; - - return n_alloc_buffers; -} - -/* Allocate a given number of buffers into given array. - Returns number actually allocated which will be either zero or - number requested. */ -u32 -dpdk_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - - return alloc_from_free_list - (vm, - pool_elt_at_index (bm->buffer_free_list_pool, - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX), - buffers, n_buffers); -} - - -u32 -dpdk_buffer_alloc_from_free_list (vlib_main_t * vm, - u32 * buffers, - u32 n_buffers, u32 free_list_index) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *f; - f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); - return alloc_from_free_list (vm, f, buffers, n_buffers); -} - -static_always_inline void -vlib_buffer_free_inline (vlib_main_t * vm, - u32 * buffers, u32 n_buffers, u32 follow_buffer_next) -{ - vlib_buffer_main_t *bm = vm->buffer_main; - vlib_buffer_free_list_t *fl; - u32 fi; - int i; - u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers, - u32 follow_buffer_next); - - cb = bm->buffer_free_callback; - - if (PREDICT_FALSE (cb != 0)) - n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next); - - if (!n_buffers) - return; - - for (i = 0; i < n_buffers; i++) - { - vlib_buffer_t *b; - - b = vlib_get_buffer (vm, buffers[i]); - - fl = vlib_buffer_get_buffer_free_list (vm, b, &fi); - - /* The only current use of this callback: multicast recycle */ - if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) - { - int j; - - vlib_buffer_add_to_free_list - (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0); - - for (j = 0; j < vec_len (bm->announce_list); j++) - { - if (fl == bm->announce_list[j]) - goto already_announced; - } - vec_add1 (bm->announce_list, fl); - already_announced: - ; - } - else - { - if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0)) - dpdk_rte_pktmbuf_free (vm, b); - } - } - if (vec_len (bm->announce_list)) - { - vlib_buffer_free_list_t *fl; - for (i = 0; i < vec_len (bm->announce_list); i++) - { - fl = bm->announce_list[i]; - fl->buffers_added_to_freelist_function (vm, fl); - } - _vec_len (bm->announce_list) = 0; - } -} - -static void -dpdk_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers) -{ - vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ - 1); -} - -static void -dpdk_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers) -{ - vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ - 0); -} - -static void -dpdk_packet_template_init (vlib_main_t * vm, - void *vt, - void *packet_data, - uword n_packet_data_bytes, - uword min_n_buffers_each_physmem_alloc, u8 * name) -{ - vlib_packet_template_t *t = (vlib_packet_template_t *) vt; - - vlib_worker_thread_barrier_sync (vm); - memset (t, 0, sizeof (t[0])); - - vec_add (t->packet_data, packet_data, n_packet_data_bytes); - - vlib_worker_thread_barrier_release (vm); -} - -clib_error_t * -vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs, - unsigned socket_id) -{ - dpdk_main_t *dm = &dpdk_main; - vlib_physmem_main_t *vpm = &vm->physmem_main; - struct rte_mempool *rmp; - int i; - - vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); - - /* pool already exists, nothing to do */ - if (dm->pktmbuf_pools[socket_id]) - return 0; - - u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0); - - rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */ - num_mbufs, /* number of mbufs */ - 512, /* cache size */ - VLIB_BUFFER_HDR_SIZE, /* priv size */ - VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */ - socket_id); /* cpu socket */ - - if (rmp) - { - { - uword this_pool_end; - uword this_pool_start; - uword this_pool_size; - uword save_vpm_start, save_vpm_end, save_vpm_size; - struct rte_mempool_memhdr *memhdr; - - this_pool_start = ~0ULL; - this_pool_end = 0LL; - - STAILQ_FOREACH (memhdr, &rmp->mem_list, next) - { - if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end) - this_pool_end = (uword) (memhdr->addr + memhdr->len); - if (((uword) memhdr->addr) < this_pool_start) - this_pool_start = (uword) (memhdr->addr); - } - ASSERT (this_pool_start < ~0ULL && this_pool_end > 0); - this_pool_size = this_pool_end - this_pool_start; - - if (CLIB_DEBUG > 1) - { - clib_warning ("%s: pool start %llx pool end %llx pool size %lld", - pool_name, this_pool_start, this_pool_end, - this_pool_size); - clib_warning - ("before: virtual.start %llx virtual.end %llx virtual.size %lld", - vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); - } - - save_vpm_start = vpm->virtual.start; - save_vpm_end = vpm->virtual.end; - save_vpm_size = vpm->virtual.size; - - if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0) - vpm->virtual.start = this_pool_start; - if (this_pool_end > vpm->virtual.end) - vpm->virtual.end = this_pool_end; - - vpm->virtual.size = vpm->virtual.end - vpm->virtual.start; - - if (CLIB_DEBUG > 1) - { - clib_warning - ("after: virtual.start %llx virtual.end %llx virtual.size %lld", - vpm->virtual.start, vpm->virtual.end, vpm->virtual.size); - } - - /* check if fits into buffer index range */ - if ((u64) vpm->virtual.size > - ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) - { - clib_warning ("physmem: virtual size out of range!"); - vpm->virtual.start = save_vpm_start; - vpm->virtual.end = save_vpm_end; - vpm->virtual.size = save_vpm_size; - rmp = 0; - } - } - if (rmp) - { - dm->pktmbuf_pools[socket_id] = rmp; - vec_free (pool_name); - return 0; - } - } - - vec_free (pool_name); - - /* no usable pool for this socket, try to use pool from another one */ - for (i = 0; i < vec_len (dm->pktmbuf_pools); i++) - { - if (dm->pktmbuf_pools[i]) - { - clib_warning - ("WARNING: Failed to allocate mempool for CPU socket %u. " - "Threads running on socket %u will use socket %u mempool.", - socket_id, socket_id, i); - dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i]; - return 0; - } - } - - return clib_error_return (0, "failed to allocate mempool on socket %u", - socket_id); -} - -#if CLIB_DEBUG > 0 - -u32 *vlib_buffer_state_validation_lock; -uword *vlib_buffer_state_validation_hash; -void *vlib_buffer_state_heap; - -static clib_error_t * -buffer_state_validation_init (vlib_main_t * vm) -{ - void *oldheap; - - vlib_buffer_state_heap = mheap_alloc (0, 10 << 20); - - oldheap = clib_mem_set_heap (vlib_buffer_state_heap); - - vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword)); - vec_validate_aligned (vlib_buffer_state_validation_lock, 0, - CLIB_CACHE_LINE_BYTES); - clib_mem_set_heap (oldheap); - return 0; -} - -VLIB_INIT_FUNCTION (buffer_state_validation_init); -#endif - -static vlib_buffer_callbacks_t callbacks = { - .vlib_buffer_alloc_cb = &dpdk_buffer_alloc, - .vlib_buffer_alloc_from_free_list_cb = &dpdk_buffer_alloc_from_free_list, - .vlib_buffer_free_cb = &dpdk_buffer_free, - .vlib_buffer_free_no_next_cb = &dpdk_buffer_free_no_next, - .vlib_packet_template_init_cb = &dpdk_packet_template_init, - .vlib_buffer_delete_free_list_cb = &dpdk_buffer_delete_free_list, -}; - -static clib_error_t * -dpdk_buffer_init (vlib_main_t * vm) -{ - vlib_buffer_cb_register (vm, &callbacks); - return 0; -} - -VLIB_INIT_FUNCTION (dpdk_buffer_init); - -/** @endcond */ -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c deleted file mode 100644 index 99998862..00000000 --- a/src/vnet/devices/dpdk/cli.c +++ /dev/null @@ -1,2079 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "dpdk_priv.h" - -/** - * @file - * @brief CLI for DPDK Abstraction Layer and pcap Tx Trace. - * - * This file contains the source code for CLI for DPDK - * Abstraction Layer and pcap Tx Trace. - */ - - -static clib_error_t * -get_hqos (u32 hw_if_index, u32 subport_id, dpdk_device_t ** xd, - dpdk_device_config_t ** devconf) -{ - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - struct rte_eth_dev_info dev_info; - uword *p = 0; - clib_error_t *error = NULL; - - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - if (subport_id != 0) - { - error = clib_error_return (0, "Invalid subport"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - *xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rte_eth_dev_info_get ((*xd)->device_index, &dev_info); - if (dev_info.pci_dev) - { /* bonded interface has no pci info */ - vlib_pci_addr_t pci_addr; - - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - - p = - hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } - - if (p) - (*devconf) = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - (*devconf) = &dm->conf->default_devconf; - -done: - return error; -} - -static clib_error_t * -pcap_trace_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ -#define PCAP_DEF_PKT_TO_CAPTURE (100) - - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - u8 *filename; - u8 *chroot_filename = 0; - u32 max = 0; - int enabled = 0; - int errorFlag = 0; - clib_error_t *error = 0; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "on")) - { - if (dm->tx_pcap_enable == 0) - { - enabled = 1; - } - else - { - vlib_cli_output (vm, "pcap tx capture already on..."); - errorFlag = 1; - break; - } - } - else if (unformat (line_input, "off")) - { - if (dm->tx_pcap_enable) - { - vlib_cli_output (vm, "captured %d pkts...", - dm->pcap_main.n_packets_captured + 1); - if (dm->pcap_main.n_packets_captured) - { - dm->pcap_main.n_packets_to_capture = - dm->pcap_main.n_packets_captured; - error = pcap_write (&dm->pcap_main); - if (error) - clib_error_report (error); - else - vlib_cli_output (vm, "saved to %s...", dm->pcap_filename); - } - - dm->tx_pcap_enable = 0; - } - else - { - vlib_cli_output (vm, "pcap tx capture already off..."); - errorFlag = 1; - break; - } - } - else if (unformat (line_input, "max %d", &max)) - { - if (dm->tx_pcap_enable) - { - vlib_cli_output (vm, - "can't change max value while pcap tx capture active..."); - errorFlag = 1; - break; - } - } - else if (unformat (line_input, "intfc %U", - unformat_vnet_sw_interface, dm->vnet_main, - &dm->pcap_sw_if_index)) - ; - - else if (unformat (line_input, "intfc any")) - { - dm->pcap_sw_if_index = 0; - } - else if (unformat (line_input, "file %s", &filename)) - { - if (dm->tx_pcap_enable) - { - vlib_cli_output (vm, - "can't change file while pcap tx capture active..."); - errorFlag = 1; - break; - } - - /* Brain-police user path input */ - if (strstr ((char *) filename, "..") - || index ((char *) filename, '/')) - { - vlib_cli_output (vm, "illegal characters in filename '%s'", - filename); - vlib_cli_output (vm, - "Hint: Only filename, do not enter directory structure."); - vec_free (filename); - errorFlag = 1; - break; - } - - chroot_filename = format (0, "/tmp/%s%c", filename, 0); - vec_free (filename); - } - else if (unformat (line_input, "status")) - { - if (dm->pcap_sw_if_index == 0) - { - vlib_cli_output (vm, "max is %d for any interface to file %s", - dm-> - pcap_pkts_to_capture ? dm->pcap_pkts_to_capture - : PCAP_DEF_PKT_TO_CAPTURE, - dm-> - pcap_filename ? dm->pcap_filename : (u8 *) - "/tmp/vpe.pcap"); - } - else - { - vlib_cli_output (vm, "max is %d for interface %U to file %s", - dm-> - pcap_pkts_to_capture ? dm->pcap_pkts_to_capture - : PCAP_DEF_PKT_TO_CAPTURE, - format_vnet_sw_if_index_name, dm->vnet_main, - dm->pcap_sw_if_index, - dm-> - pcap_filename ? dm->pcap_filename : (u8 *) - "/tmp/vpe.pcap"); - } - - if (dm->tx_pcap_enable == 0) - { - vlib_cli_output (vm, "pcap tx capture is off..."); - } - else - { - vlib_cli_output (vm, "pcap tx capture is on: %d of %d pkts...", - dm->pcap_main.n_packets_captured, - dm->pcap_main.n_packets_to_capture); - } - break; - } - - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - errorFlag = 1; - break; - } - } - unformat_free (line_input); - - - if (errorFlag == 0) - { - /* Since no error, save configured values. */ - if (chroot_filename) - { - if (dm->pcap_filename) - vec_free (dm->pcap_filename); - vec_add1 (chroot_filename, 0); - dm->pcap_filename = chroot_filename; - } - - if (max) - dm->pcap_pkts_to_capture = max; - - - if (enabled) - { - if (dm->pcap_filename == 0) - dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0); - - memset (&dm->pcap_main, 0, sizeof (dm->pcap_main)); - dm->pcap_main.file_name = (char *) dm->pcap_filename; - dm->pcap_main.n_packets_to_capture = PCAP_DEF_PKT_TO_CAPTURE; - if (dm->pcap_pkts_to_capture) - dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture; - - dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet; - dm->tx_pcap_enable = 1; - vlib_cli_output (vm, "pcap tx capture on..."); - } - } - else if (chroot_filename) - vec_free (chroot_filename); - - - return error; -} - -/*? - * This command is used to start or stop a packet capture, or show - * the status of packet capture. - * - * This command has the following optional parameters: - * - * - on|off - Used to start or stop a packet capture. - * - * - max - Depth of local buffer. Once 'nn' number - * of packets have been received, buffer is flushed to file. Once another - * 'nn' number of packets have been received, buffer is flushed - * to file, overwriting previous write. If not entered, value defaults - * to 100. Can only be updated if packet capture is off. - * - * - intfc |any - Used to specify a given interface, - * or use 'any' to run packet capture on all interfaces. - * 'any' is the default if not provided. Settings from a previous - * packet capture are preserved, so 'any' can be used to reset - * the interface setting. - * - * - file - Used to specify the output filename. The file will - * be placed in the '/tmp' directory, so only the filename is - * supported. Directory should not be entered. If file already exists, file - * will be overwritten. If no filename is provided, '/tmp/vpe.pcap' - * will be used. Can only be updated if packet capture is off. - * - * - status - Displays the current status and configured attributes - * associated with a packet capture. If packet capture is in progress, - * 'status' also will return the number of packets currently in - * the local buffer. All additional attributes entered on command line - * with 'status' will be ingnored and not applied. - * - * @cliexpar - * Example of how to display the status of a tx packet capture when off: - * @cliexstart{pcap tx trace status} - * max is 100, for any interface to file /tmp/vpe.pcap - * pcap tx capture is off... - * @cliexend - * Example of how to start a tx packet capture: - * @cliexstart{pcap tx trace on max 35 intfc GigabitEthernet0/8/0 file vppTest.pcap} - * pcap tx capture on... - * @cliexend - * Example of how to display the status of a tx packet capture in progress: - * @cliexstart{pcap tx trace status} - * max is 35, for interface GigabitEthernet0/8/0 to file /tmp/vppTest.pcap - * pcap tx capture is on: 20 of 35 pkts... - * @cliexend - * Example of how to stop a tx packet capture: - * @cliexstart{vppctl pcap tx trace off} - * captured 21 pkts... - * saved to /tmp/vppTest.pcap... - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (pcap_trace_command, static) = { - .path = "pcap tx trace", - .short_help = - "pcap tx trace [on|off] [max ] [intfc |any] [file ] [status]", - .function = pcap_trace_command_fn, -}; -/* *INDENT-ON* */ - - -static clib_error_t * -show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - struct rte_mempool *rmp; - int i; - - for (i = 0; i < vec_len (dpdk_main.pktmbuf_pools); i++) - { - rmp = dpdk_main.pktmbuf_pools[i]; - if (rmp) - { - unsigned count = rte_mempool_avail_count (rmp); - unsigned free_count = rte_mempool_in_use_count (rmp); - - vlib_cli_output (vm, - "name=\"%s\" available = %7d allocated = %7d total = %7d\n", - rmp->name, (u32) count, (u32) free_count, - (u32) (count + free_count)); - } - else - { - vlib_cli_output (vm, "rte_mempool is NULL (!)\n"); - } - } - return 0; -} - -/*? - * This command displays statistics of each DPDK mempool. - * - * @cliexpar - * Example of how to display DPDK buffer data: - * @cliexstart{show dpdk buffer} - * name="mbuf_pool_socket0" available = 15104 allocated = 1280 total = 16384 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_bufferr,static) = { - .path = "show dpdk buffer", - .short_help = "show dpdk buffer", - .function = show_dpdk_buffer, - .is_mp_safe = 1, -}; -/* *INDENT-ON* */ - -static clib_error_t * -test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - static u32 *allocated_buffers; - u32 n_alloc = 0; - u32 n_free = 0; - u32 first, actual_alloc; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "allocate %d", &n_alloc)) - ; - else if (unformat (input, "free %d", &n_free)) - ; - else - break; - } - - if (n_free) - { - if (vec_len (allocated_buffers) < n_free) - return clib_error_return (0, "Can't free %d, only %d allocated", - n_free, vec_len (allocated_buffers)); - - first = vec_len (allocated_buffers) - n_free; - vlib_buffer_free (vm, allocated_buffers + first, n_free); - _vec_len (allocated_buffers) = first; - } - if (n_alloc) - { - first = vec_len (allocated_buffers); - vec_validate (allocated_buffers, - vec_len (allocated_buffers) + n_alloc - 1); - - actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first, - n_alloc); - _vec_len (allocated_buffers) = first + actual_alloc; - - if (actual_alloc < n_alloc) - vlib_cli_output (vm, "WARNING: only allocated %d buffers", - actual_alloc); - } - - vlib_cli_output (vm, "Currently %d buffers allocated", - vec_len (allocated_buffers)); - - if (allocated_buffers && vec_len (allocated_buffers) == 0) - vec_free (allocated_buffers); - - return 0; -} - -/*? - * This command tests the allocation and freeing of DPDK buffers. - * If both 'allocate' and 'free' are entered on the - * same command, the 'free' is executed first. If no - * parameters are provided, this command display how many DPDK buffers - * the test command has allocated. - * - * @cliexpar - * @parblock - * - * Example of how to display how many DPDK buffer test command has allcoated: - * @cliexstart{test dpdk buffer} - * Currently 0 buffers allocated - * @cliexend - * - * Example of how to allocate DPDK buffers using the test command: - * @cliexstart{test dpdk buffer allocate 10} - * Currently 10 buffers allocated - * @cliexend - * - * Example of how to free DPDK buffers allocated by the test command: - * @cliexstart{test dpdk buffer free 10} - * Currently 0 buffers allocated - * @cliexend - * @endparblock -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = { - .path = "test dpdk buffer", - .short_help = "test dpdk buffer [allocate ] [free ]", - .function = test_dpdk_buffer, - .is_mp_safe = 1, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 nb_rx_desc = (u32) ~ 0; - u32 nb_tx_desc = (u32) ~ 0; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "tx %d", &nb_tx_desc)) - ; - else if (unformat (line_input, "rx %d", &nb_rx_desc)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - { - error = - clib_error_return (0, - "number of descriptors can be set only for " - "physical devices"); - goto done; - } - - if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) && - (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc)) - { - error = clib_error_return (0, "nothing changed"); - goto done; - } - - if (nb_rx_desc != (u32) ~ 0) - xd->nb_rx_desc = nb_rx_desc; - - if (nb_tx_desc != (u32) ~ 0) - xd->nb_tx_desc = nb_tx_desc; - - error = dpdk_port_setup (dm, xd); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command sets the number of DPDK 'rx' and - * 'tx' descriptors for the given physical interface. Use - * the command 'show hardware-interface' to display the - * current descriptor allocation. - * - * @cliexpar - * Example of how to set the DPDK interface descriptors: - * @cliexcmd{set dpdk interface descriptors GigabitEthernet0/8/0 rx 512 tx 512} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = { - .path = "set dpdk interface descriptors", - .short_help = "set dpdk interface descriptors [rx ] [tx ]", - .function = set_dpdk_if_desc, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_and_queue_t *dq; - int cpu; - - if (tm->n_vlib_mains == 1) - vlib_cli_output (vm, "All interfaces are handled by main thread"); - - for (cpu = 0; cpu < vec_len (dm->devices_by_cpu); cpu++) - { - if (cpu >= dm->input_cpu_first_index && - cpu < (dm->input_cpu_first_index + dm->input_cpu_count)) - vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, - vlib_worker_threads[cpu].name, - vlib_worker_threads[cpu].lcore_id); - - /* *INDENT-OFF* */ - vec_foreach(dq, dm->devices_by_cpu[cpu]) - { - u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; - vnet_hw_interface_t * hi = vnet_get_hw_interface(dm->vnet_main, hw_if_index); - vlib_cli_output(vm, " %v queue %u", hi->name, dq->queue_id); - } - /* *INDENT-ON* */ - } - return 0; -} - -/*? - * This command is used to display the thread and core each - * DPDK interface and queue is assigned too. - * - * @cliexpar - * Example of how to display the DPDK interface placement: - * @cliexstart{show dpdk interface placement} - * Thread 1 (vpp_wk_0 at lcore 1): - * GigabitEthernet0/8/0 queue 0 - * GigabitEthernet0/9/0 queue 0 - * Thread 2 (vpp_wk_1 at lcore 2): - * GigabitEthernet0/8/0 queue 1 - * GigabitEthernet0/9/0 queue 1 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_if_placement,static) = { - .path = "show dpdk interface placement", - .short_help = "show dpdk interface placement", - .function = show_dpdk_if_placement, -}; -/* *INDENT-ON* */ - -static int -dpdk_device_queue_sort (void *a1, void *a2) -{ - dpdk_device_and_queue_t *dq1 = a1; - dpdk_device_and_queue_t *dq2 = a2; - - if (dq1->device > dq2->device) - return 1; - else if (dq1->device < dq2->device) - return -1; - else if (dq1->queue_id > dq2->queue_id) - return 1; - else if (dq1->queue_id < dq2->queue_id) - return -1; - else - return 0; -} - -static clib_error_t * -set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_and_queue_t *dq; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 queue = (u32) 0; - u32 cpu = (u32) ~ 0; - int i; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "queue %d", &queue)) - ; - else if (unformat (line_input, "thread %d", &cpu)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - if (cpu < dm->input_cpu_first_index || - cpu >= (dm->input_cpu_first_index + dm->input_cpu_count)) - { - error = clib_error_return (0, "please specify valid thread id"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - for (i = 0; i < vec_len (dm->devices_by_cpu); i++) - { - /* *INDENT-OFF* */ - vec_foreach(dq, dm->devices_by_cpu[i]) - { - if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index && - queue == dq->queue_id) - { - if (cpu == i) /* nothing to do */ - goto done; - - vec_del1(dm->devices_by_cpu[i], dq - dm->devices_by_cpu[i]); - vec_add2(dm->devices_by_cpu[cpu], dq, 1); - dq->queue_id = queue; - dq->device = xd->device_index; - xd->cpu_socket_id_by_queue[queue] = - rte_lcore_to_socket_id(vlib_worker_threads[cpu].lcore_id); - - vec_sort_with_function(dm->devices_by_cpu[i], - dpdk_device_queue_sort); - - vec_sort_with_function(dm->devices_by_cpu[cpu], - dpdk_device_queue_sort); - - if (vec_len(dm->devices_by_cpu[i]) == 0) - vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, - VLIB_NODE_STATE_DISABLED); - - if (vec_len(dm->devices_by_cpu[cpu]) == 1) - vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); - - goto done; - } - } - /* *INDENT-ON* */ - } - - error = clib_error_return (0, "not found"); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to assign a given interface, and optionally a - * given queue, to a different thread. This will not create a thread, - * so the thread must already exist. Use '/etc/vpp/startup.conf' - * for the initial thread creation. If the 'queue' is not provided, - * it defaults to 0. - * - * @cliexpar - * Example of how to display the DPDK interface placement: - * @cliexstart{show dpdk interface placement} - * Thread 1 (vpp_wk_0 at lcore 1): - * GigabitEthernet0/8/0 queue 0 - * GigabitEthernet0/9/0 queue 0 - * Thread 2 (vpp_wk_1 at lcore 2): - * GigabitEthernet0/8/0 queue 1 - * GigabitEthernet0/9/0 queue 1 - * @cliexend - * Example of how to assign a DPDK interface and queue to a thread: - * @cliexcmd{set dpdk interface placement GigabitEthernet0/8/0 queue 1 thread 1} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = { - .path = "set dpdk interface placement", - .short_help = "set dpdk interface placement [queue ] thread ", - .function = set_dpdk_if_placement, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_and_queue_t *dq; - int cpu; - - if (tm->n_vlib_mains == 1) - vlib_cli_output (vm, "All interfaces are handled by main thread"); - - for (cpu = 0; cpu < vec_len (dm->devices_by_hqos_cpu); cpu++) - { - if (cpu >= dm->hqos_cpu_first_index && - cpu < (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) - vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu, - vlib_worker_threads[cpu].name, - vlib_worker_threads[cpu].lcore_id); - - vec_foreach (dq, dm->devices_by_hqos_cpu[cpu]) - { - u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index; - vnet_hw_interface_t *hi = - vnet_get_hw_interface (dm->vnet_main, hw_if_index); - vlib_cli_output (vm, " %v queue %u", hi->name, dq->queue_id); - } - } - return 0; -} - -/*? - * This command is used to display the thread and core each - * DPDK output interface and HQoS queue is assigned too. - * - * @cliexpar - * Example of how to display the DPDK output interface and HQoS queue placement: - * @cliexstart{show dpdk interface hqos placement} - * Thread 1 (vpp_hqos-threads_0 at lcore 3): - * GigabitEthernet0/8/0 queue 0 - * Thread 2 (vpp_hqos-threads_1 at lcore 4): - * GigabitEthernet0/9/0 queue 0 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos_placement, static) = { - .path = "show dpdk interface hqos placement", - .short_help = "show dpdk interface hqos placement", - .function = show_dpdk_if_hqos_placement, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_and_queue_t *dq; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 cpu = (u32) ~ 0; - int i; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "thread %d", &cpu)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - return clib_error_return (0, "please specify valid interface name"); - - if (cpu < dm->hqos_cpu_first_index || - cpu >= (dm->hqos_cpu_first_index + dm->hqos_cpu_count)) - { - error = clib_error_return (0, "please specify valid thread id"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - for (i = 0; i < vec_len (dm->devices_by_hqos_cpu); i++) - { - vec_foreach (dq, dm->devices_by_hqos_cpu[i]) - { - if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index) - { - if (cpu == i) /* nothing to do */ - goto done; - - vec_del1 (dm->devices_by_hqos_cpu[i], - dq - dm->devices_by_hqos_cpu[i]); - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->queue_id = 0; - dq->device = xd->device_index; - - vec_sort_with_function (dm->devices_by_hqos_cpu[i], - dpdk_device_queue_sort); - - vec_sort_with_function (dm->devices_by_hqos_cpu[cpu], - dpdk_device_queue_sort); - - goto done; - } - } - } - - error = clib_error_return (0, "not found"); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to assign a given DPDK output interface and - * HQoS queue to a different thread. This will not create a thread, - * so the thread must already exist. Use '/etc/vpp/startup.conf' - * for the initial thread creation. See @ref qos_doc for more details. - * - * @cliexpar - * Example of how to display the DPDK output interface and HQoS queue placement: - * @cliexstart{show dpdk interface hqos placement} - * Thread 1 (vpp_hqos-threads_0 at lcore 3): - * GigabitEthernet0/8/0 queue 0 - * Thread 2 (vpp_hqos-threads_1 at lcore 4): - * GigabitEthernet0/9/0 queue 0 - * @cliexend - * Example of how to assign a DPDK output interface and HQoS queue to a thread: - * @cliexcmd{set dpdk interface hqos placement GigabitEthernet0/8/0 thread 2} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_placement, static) = { - .path = "set dpdk interface hqos placement", - .short_help = "set dpdk interface hqos placement thread ", - .function = set_dpdk_if_hqos_placement, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 subport_id = (u32) ~ 0; - u32 pipe_id = (u32) ~ 0; - u32 profile_id = (u32) ~ 0; - int rv; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "subport %d", &subport_id)) - ; - else if (unformat (line_input, "pipe %d", &pipe_id)) - ; - else if (unformat (line_input, "profile %d", &profile_id)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = - rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, - profile_id); - if (rv) - { - error = clib_error_return (0, "pipe configuration failed"); - goto done; - } - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to change the profile associate with a HQoS pipe. The - * '' is zero based. Use the command - * 'show dpdk interface hqos' to display the content of each profile. - * See @ref qos_doc for more details. - * - * @note - * Currently there is not an API to create a new HQoS pipe profile. One is - * created by default in the code (search for 'hqos_pipe_params_default''). - * Additional profiles can be created in code and code recompiled. Then use this - * command to assign it. - * - * @cliexpar - * Example of how to assign a new profile to a HQoS pipe: - * @cliexcmd{set dpdk interface hqos pipe GigabitEthernet0/8/0 subport 0 pipe 2 profile 1} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pipe, static) = -{ - .path = "set dpdk interface hqos pipe", - .short_help = "set dpdk interface hqos pipe subport pipe " - "profile ", - .function = set_dpdk_if_hqos_pipe, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = NULL; - u32 hw_if_index = (u32) ~ 0; - u32 subport_id = (u32) ~ 0; - struct rte_sched_subport_params p; - int rv; - clib_error_t *error = NULL; - u32 tb_rate = (u32) ~ 0; - u32 tb_size = (u32) ~ 0; - u32 tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = - { (u32) ~ 0, (u32) ~ 0, (u32) ~ 0, (u32) ~ 0 }; - u32 tc_period = (u32) ~ 0; - dpdk_device_config_t *devconf = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "subport %d", &subport_id)) - ; - else if (unformat (line_input, "rate %d", &tb_rate)) - ; - else if (unformat (line_input, "bktsize %d", &tb_size)) - ; - else if (unformat (line_input, "tc0 %d", &tc_rate[0])) - ; - else if (unformat (line_input, "tc1 %d", &tc_rate[1])) - ; - else if (unformat (line_input, "tc2 %d", &tc_rate[2])) - ; - else if (unformat (line_input, "tc3 %d", &tc_rate[3])) - ; - else if (unformat (line_input, "period %d", &tc_period)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - error = get_hqos (hw_if_index, subport_id, &xd, &devconf); - - if (error == NULL) - { - /* Copy the current values over to local structure. */ - memcpy (&p, &devconf->hqos.subport[subport_id], sizeof (p)); - - /* Update local structure with input values. */ - if (tb_rate != (u32) ~ 0) - { - p.tb_rate = tb_rate; - p.tc_rate[0] = tb_rate; - p.tc_rate[1] = tb_rate; - p.tc_rate[2] = tb_rate; - p.tc_rate[3] = tb_rate; - } - if (tb_size != (u32) ~ 0) - { - p.tb_size = tb_size; - } - if (tc_rate[0] != (u32) ~ 0) - { - p.tc_rate[0] = tc_rate[0]; - } - if (tc_rate[1] != (u32) ~ 0) - { - p.tc_rate[1] = tc_rate[1]; - } - if (tc_rate[2] != (u32) ~ 0) - { - p.tc_rate[2] = tc_rate[2]; - } - if (tc_rate[3] != (u32) ~ 0) - { - p.tc_rate[3] = tc_rate[3]; - } - if (tc_period != (u32) ~ 0) - { - p.tc_period = tc_period; - } - - /* Apply changes. */ - rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p); - if (rv) - { - error = clib_error_return (0, "subport configuration failed"); - goto done; - } - else - { - /* Successfully applied, so save of the input values. */ - memcpy (&devconf->hqos.subport[subport_id], &p, sizeof (p)); - } - } - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to set the subport level parameters such as token - * bucket rate (bytes per seconds), token bucket size (bytes), traffic class - * rates (bytes per seconds) and token update period (Milliseconds). - * - * By default, the 'rate' is set to 1250000000 bytes/second (10GbE - * rate) and each of the four traffic classes is set to 100% of the port rate. - * If the 'rate' is updated by this command, all four traffic classes - * are assigned the same value. Each of the four traffic classes can be updated - * individually. - * - * @cliexpar - * Example of how modify the subport attributes for a 1GbE link: - * @cliexcmd{set dpdk interface hqos subport GigabitEthernet0/8/0 subport 0 rate 125000000} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_subport, static) = { - .path = "set dpdk interface hqos subport", - .short_help = "set dpdk interface hqos subport subport " - "[rate ] [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] " - "[period ]", - .function = set_dpdk_if_hqos_subport, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - u32 tc = (u32) ~ 0; - u32 queue = (u32) ~ 0; - u32 entry = (u32) ~ 0; - u32 val, i; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "entry %d", &entry)) - ; - else if (unformat (line_input, "tc %d", &tc)) - ; - else if (unformat (line_input, "queue %d", &queue)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - if (entry >= 64) - { - error = clib_error_return (0, "invalid entry"); - goto done; - } - if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) - { - error = clib_error_return (0, "invalid traffic class"); - goto done; - } - if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) - { - error = clib_error_return (0, "invalid traffic class queue"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - /* Should never happen, shut up Coverity warning */ - if (p == 0) - { - error = clib_error_return (0, "no worker registrations?"); - goto done; - } - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; - for (i = 0; i < worker_thread_count; i++) - xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to set the traffic class translation table. The - * traffic class translation table is used to map 64 values (0-63) to one of - * four traffic class and one of four HQoS input queue. Use the 'show - * dpdk interface hqos' command to display the traffic class translation - * table. See @ref qos_doc for more details. - * - * This command has the following parameters: - * - * - - Used to specify the output interface. - * - * - entry - Mapped value (0-63) to assign traffic class and queue to. - * - * - tc - Traffic class (0-3) to be used by the provided mapped value. - * - * - queue - HQoS input queue (0-3) to be used by the provided mapped value. - * - * @cliexpar - * Example of how modify the traffic class translation table: - * @cliexcmd{set dpdk interface hqos tctbl GigabitEthernet0/8/0 entry 16 tc 2 queue 2} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_tctbl, static) = { - .path = "set dpdk interface hqos tctbl", - .short_help = "set dpdk interface hqos tctbl entry tc queue ", - .function = set_dpdk_if_hqos_tctbl, -}; -/* *INDENT-ON* */ - -static clib_error_t * -set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - clib_error_t *error = NULL; - - /* Device specific data */ - struct rte_eth_dev_info dev_info; - dpdk_device_config_t *devconf = 0; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - u32 hw_if_index = (u32) ~ 0; - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - /* Should never happen, shut up Coverity warning */ - if (p == 0) - return clib_error_return (0, "no worker registrations?"); - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - /* Packet field configuration */ - u64 mask = (u64) ~ 0; - u32 id = (u32) ~ 0; - u32 offset = (u32) ~ 0; - - /* HQoS params */ - u32 n_subports_per_port, n_pipes_per_subport, tctbl_size; - - u32 i; - - /* Parse input arguments */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else if (unformat (line_input, "id subport")) - id = 0; - else if (unformat (line_input, "id pipe")) - id = 1; - else if (unformat (line_input, "id tc")) - id = 2; - else if (unformat (line_input, "id %d", &id)) - ; - else if (unformat (line_input, "offset %d", &offset)) - ; - else if (unformat (line_input, "mask %llx", &mask)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - /* Get interface */ - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify valid interface name"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rte_eth_dev_info_get (xd->device_index, &dev_info); - if (dev_info.pci_dev) - { /* bonded interface has no pci info */ - vlib_pci_addr_t pci_addr; - - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - - p = - hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - if (devconf->hqos_enabled == 0) - { - vlib_cli_output (vm, "HQoS disabled for this interface"); - goto done; - } - - n_subports_per_port = devconf->hqos.port.n_subports_per_port; - n_pipes_per_subport = devconf->hqos.port.n_pipes_per_subport; - tctbl_size = RTE_DIM (devconf->hqos.tc_table); - - /* Validate packet field configuration: id, offset and mask */ - if (id >= 3) - { - error = clib_error_return (0, "invalid packet field id"); - goto done; - } - - switch (id) - { - case 0: - if (dpdk_hqos_validate_mask (mask, n_subports_per_port) != 0) - { - error = clib_error_return (0, "invalid subport ID mask " - "(n_subports_per_port = %u)", - n_subports_per_port); - goto done; - } - break; - case 1: - if (dpdk_hqos_validate_mask (mask, n_pipes_per_subport) != 0) - { - error = clib_error_return (0, "invalid pipe ID mask " - "(n_pipes_per_subport = %u)", - n_pipes_per_subport); - goto done; - } - break; - case 2: - default: - if (dpdk_hqos_validate_mask (mask, tctbl_size) != 0) - { - error = clib_error_return (0, "invalid TC table index mask " - "(TC table size = %u)", tctbl_size); - goto done; - } - } - - /* Propagate packet field configuration to all workers */ - for (i = 0; i < worker_thread_count; i++) - switch (id) - { - case 0: - xd->hqos_wt[worker_thread_first + i].hqos_field0_slabpos = offset; - xd->hqos_wt[worker_thread_first + i].hqos_field0_slabmask = mask; - xd->hqos_wt[worker_thread_first + i].hqos_field0_slabshr = - __builtin_ctzll (mask); - break; - case 1: - xd->hqos_wt[worker_thread_first + i].hqos_field1_slabpos = offset; - xd->hqos_wt[worker_thread_first + i].hqos_field1_slabmask = mask; - xd->hqos_wt[worker_thread_first + i].hqos_field1_slabshr = - __builtin_ctzll (mask); - break; - case 2: - default: - xd->hqos_wt[worker_thread_first + i].hqos_field2_slabpos = offset; - xd->hqos_wt[worker_thread_first + i].hqos_field2_slabmask = mask; - xd->hqos_wt[worker_thread_first + i].hqos_field2_slabshr = - __builtin_ctzll (mask); - } - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to set the packet fields required for classifiying the - * incoming packet. As a result of classification process, packet field - * information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, - * color) and stored in packet mbuf. - * - * This command has the following parameters: - * - * - - Used to specify the output interface. - * - * - id subport|pipe|tc - Classification occurs across three fields. - * This parameter indicates which of the three masks are being configured. Legacy - * code used 0-2 to represent these three fields, so 0-2 is still accepted. - * - subport|0 - Currently only one subport is supported, so only - * an empty mask is supported for the subport classification. - * - pipe|1 - Currently, 4096 pipes per subport are supported, so a - * 12-bit mask should be configure to map to the 0-4095 pipes. - * - tc|2 - The translation table (see 'set dpdk interface hqos - * tctbl' command) maps each value (0-63) into one of the 4 traffic classes - * per pipe. A 6-bit mask should be configure to map this field to a traffic class. - * - * - offset - Offset in the packet to apply the 64-bit mask for classification. - * The offset should be on an 8-byte boundary (0,8,16,24..). - * - * - mask - 64-bit mask to apply to packet at the given 'offset'. - * Bits must be contiguous and should not include '0x'. - * - * The default values for the 'pktfield' assumes Ethernet/IPv4/UDP packets with - * no VLAN. Adjust based on expected packet format and desired classification field. - * - 'subport' is always empty (offset 0 mask 0000000000000000) - * - By default, 'pipe' maps to the UDP payload bits 12 .. 23 (offset 40 - * mask 0000000fff000000) - * - By default, 'tc' maps to the DSCP field in IP header (offset 48 mask - * 00000000000000fc) - * - * @cliexpar - * Example of how modify the 'pipe' classification filter to match VLAN: - * @cliexcmd{set dpdk interface hqos pktfield GigabitEthernet0/8/0 id pipe offset 8 mask 0000000000000FFF} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pktfield, static) = { - .path = "set dpdk interface hqos pktfield", - .short_help = "set dpdk interface hqos pktfield id subport|pipe|tc offset " - "mask ", - .function = set_dpdk_if_hqos_pktfield, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - dpdk_device_config_hqos_t *cfg; - dpdk_device_hqos_per_hqos_thread_t *ht; - dpdk_device_hqos_per_worker_thread_t *wk; - u32 *tctbl; - u32 hw_if_index = (u32) ~ 0; - u32 profile_id, subport_id, i; - struct rte_eth_dev_info dev_info; - dpdk_device_config_t *devconf = 0; - vlib_thread_registration_t *tr; - uword *p = 0; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify interface name!!"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rte_eth_dev_info_get (xd->device_index, &dev_info); - if (dev_info.pci_dev) - { /* bonded interface has no pci info */ - vlib_pci_addr_t pci_addr; - - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - - p = - hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - if (devconf->hqos_enabled == 0) - { - vlib_cli_output (vm, "HQoS disabled for this interface"); - goto done; - } - - /* Detect the set of worker threads */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - - /* Should never happen, shut up Coverity warning */ - if (p == 0) - { - error = clib_error_return (0, "no worker registrations?"); - goto done; - } - - tr = (vlib_thread_registration_t *) p[0]; - - cfg = &devconf->hqos; - ht = xd->hqos_ht; - wk = &xd->hqos_wt[tr->first_index]; - tctbl = wk->hqos_tc_table; - - vlib_cli_output (vm, " Thread:"); - vlib_cli_output (vm, " Input SWQ size = %u packets", cfg->swq_size); - vlib_cli_output (vm, " Enqueue burst size = %u packets", - ht->hqos_burst_enq); - vlib_cli_output (vm, " Dequeue burst size = %u packets", - ht->hqos_burst_deq); - - vlib_cli_output (vm, - " Packet field 0: slab position = %4u, slab bitmask = 0x%016llx (subport)", - wk->hqos_field0_slabpos, wk->hqos_field0_slabmask); - vlib_cli_output (vm, - " Packet field 1: slab position = %4u, slab bitmask = 0x%016llx (pipe)", - wk->hqos_field1_slabpos, wk->hqos_field1_slabmask); - vlib_cli_output (vm, - " Packet field 2: slab position = %4u, slab bitmask = 0x%016llx (tc)", - wk->hqos_field2_slabpos, wk->hqos_field2_slabmask); - vlib_cli_output (vm, - " Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...)"); - vlib_cli_output (vm, - " [ 0 .. 15]: " - "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", - tctbl[0] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[0] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[1] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[1] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[2] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[2] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[3] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[3] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[4] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[4] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[5] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[5] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[6] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[6] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[7] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[7] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[8] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[8] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[9] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[9] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[10] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[10] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[11] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[11] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[12] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[12] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[13] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[13] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[14] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[14] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[15] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[15] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - vlib_cli_output (vm, - " [16 .. 31]: " - "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", - tctbl[16] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[16] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[17] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[17] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[18] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[18] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[19] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[19] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[20] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[20] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[21] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[21] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[22] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[22] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[23] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[23] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[24] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[24] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[25] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[25] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[26] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[26] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[27] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[27] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[28] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[28] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[29] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[29] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[30] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[30] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[31] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[31] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - vlib_cli_output (vm, - " [32 .. 47]: " - "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", - tctbl[32] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[32] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[33] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[33] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[34] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[34] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[35] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[35] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[36] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[36] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[37] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[37] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[38] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[38] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[39] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[39] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[40] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[40] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[41] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[41] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[42] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[42] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[43] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[43] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[44] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[44] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[45] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[45] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[46] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[46] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[47] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[47] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - vlib_cli_output (vm, - " [48 .. 63]: " - "%u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u %u/%u", - tctbl[48] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[48] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[49] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[49] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[50] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[50] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[51] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[51] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[52] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[52] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[53] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[53] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[54] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[54] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[55] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[55] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[56] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[56] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[57] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[57] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[58] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[58] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[59] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[59] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[60] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[60] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[61] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[61] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[62] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[62] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[63] / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS, - tctbl[63] % RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - vlib_cli_output (vm, " Port:"); - vlib_cli_output (vm, " Rate = %u bytes/second", cfg->port.rate); - vlib_cli_output (vm, " MTU = %u bytes", cfg->port.mtu); - vlib_cli_output (vm, " Frame overhead = %u bytes", - cfg->port.frame_overhead); - vlib_cli_output (vm, " Number of subports = %u", - cfg->port.n_subports_per_port); - vlib_cli_output (vm, " Number of pipes per subport = %u", - cfg->port.n_pipes_per_subport); - vlib_cli_output (vm, - " Packet queue size: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u packets", - cfg->port.qsize[0], cfg->port.qsize[1], cfg->port.qsize[2], - cfg->port.qsize[3]); - vlib_cli_output (vm, " Number of pipe profiles = %u", - cfg->port.n_pipe_profiles); - - for (subport_id = 0; subport_id < vec_len (cfg->subport); subport_id++) - { - vlib_cli_output (vm, " Subport %u:", subport_id); - vlib_cli_output (vm, " Rate = %u bytes/second", - cfg->subport[subport_id].tb_rate); - vlib_cli_output (vm, " Token bucket size = %u bytes", - cfg->subport[subport_id].tb_size); - vlib_cli_output (vm, - " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", - cfg->subport[subport_id].tc_rate[0], - cfg->subport[subport_id].tc_rate[1], - cfg->subport[subport_id].tc_rate[2], - cfg->subport[subport_id].tc_rate[3]); - vlib_cli_output (vm, " TC period = %u milliseconds", - cfg->subport[subport_id].tc_period); - } - - for (profile_id = 0; profile_id < vec_len (cfg->pipe); profile_id++) - { - vlib_cli_output (vm, " Pipe profile %u:", profile_id); - vlib_cli_output (vm, " Rate = %u bytes/second", - cfg->pipe[profile_id].tb_rate); - vlib_cli_output (vm, " Token bucket size = %u bytes", - cfg->pipe[profile_id].tb_size); - vlib_cli_output (vm, - " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second", - cfg->pipe[profile_id].tc_rate[0], - cfg->pipe[profile_id].tc_rate[1], - cfg->pipe[profile_id].tc_rate[2], - cfg->pipe[profile_id].tc_rate[3]); - vlib_cli_output (vm, " TC period = %u milliseconds", - cfg->pipe[profile_id].tc_period); -#ifdef RTE_SCHED_SUBPORT_TC_OV - vlib_cli_output (vm, " TC3 oversubscription_weight = %u", - cfg->pipe[profile_id].tc_ov_weight); -#endif - - for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) - { - vlib_cli_output (vm, - " TC%u WRR weights: Q0 = %u, Q1 = %u, Q2 = %u, Q3 = %u", - i, cfg->pipe[profile_id].wrr_weights[i * 4], - cfg->pipe[profile_id].wrr_weights[i * 4 + 1], - cfg->pipe[profile_id].wrr_weights[i * 4 + 2], - cfg->pipe[profile_id].wrr_weights[i * 4 + 3]); - } - } - -#ifdef RTE_SCHED_RED - vlib_cli_output (vm, " Weighted Random Early Detection (WRED):"); - for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) - { - vlib_cli_output (vm, " TC%u min: G = %u, Y = %u, R = %u", i, - cfg->port.red_params[i][e_RTE_METER_GREEN].min_th, - cfg->port.red_params[i][e_RTE_METER_YELLOW].min_th, - cfg->port.red_params[i][e_RTE_METER_RED].min_th); - - vlib_cli_output (vm, " TC%u max: G = %u, Y = %u, R = %u", i, - cfg->port.red_params[i][e_RTE_METER_GREEN].max_th, - cfg->port.red_params[i][e_RTE_METER_YELLOW].max_th, - cfg->port.red_params[i][e_RTE_METER_RED].max_th); - - vlib_cli_output (vm, - " TC%u inverted probability: G = %u, Y = %u, R = %u", - i, cfg->port.red_params[i][e_RTE_METER_GREEN].maxp_inv, - cfg->port.red_params[i][e_RTE_METER_YELLOW].maxp_inv, - cfg->port.red_params[i][e_RTE_METER_RED].maxp_inv); - - vlib_cli_output (vm, " TC%u weight: R = %u, Y = %u, R = %u", i, - cfg->port.red_params[i][e_RTE_METER_GREEN].wq_log2, - cfg->port.red_params[i][e_RTE_METER_YELLOW].wq_log2, - cfg->port.red_params[i][e_RTE_METER_RED].wq_log2); - } -#endif - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to display details of an output interface's HQoS - * settings. - * - * @cliexpar - * Example of how to display HQoS settings for an interfaces: - * @cliexstart{show dpdk interface hqos GigabitEthernet0/8/0} - * Thread: - * Input SWQ size = 4096 packets - * Enqueue burst size = 256 packets - * Dequeue burst size = 220 packets - * Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) - * Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) - * Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) - * Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) - * [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - * [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - * [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - * [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - * Port: - * Rate = 1250000000 bytes/second - * MTU = 1514 bytes - * Frame overhead = 24 bytes - * Number of subports = 1 - * Number of pipes per subport = 4096 - * Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets - * Number of pipe profiles = 2 - * Subport 0: - * Rate = 1250000000 bytes/second - * Token bucket size = 1000000 bytes - * Traffic class rate: TC0 = 1250000000, TC1 = 1250000000, TC2 = 1250000000, TC3 = 1250000000 bytes/second - * TC period = 10 milliseconds - * Pipe profile 0: - * Rate = 305175 bytes/second - * Token bucket size = 1000000 bytes - * Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second - * TC period = 40 milliseconds - * TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - * TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - * TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - * TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos, static) = { - .path = "show dpdk interface hqos", - .short_help = "show dpdk interface hqos ", - .function = show_dpdk_if_hqos, -}; - -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - clib_error_t *error = NULL; -#ifdef RTE_SCHED_COLLECT_STATS - dpdk_main_t *dm = &dpdk_main; - u32 hw_if_index = (u32) ~ 0; - u32 subport = (u32) ~ 0; - u32 pipe = (u32) ~ 0; - u32 tc = (u32) ~ 0; - u32 tc_q = (u32) ~ 0; - vnet_hw_interface_t *hw; - dpdk_device_t *xd; - uword *p = 0; - struct rte_eth_dev_info dev_info; - dpdk_device_config_t *devconf = 0; - u32 qindex; - struct rte_sched_queue_stats stats; - u16 qlen; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main, - &hw_if_index)) - ; - - else if (unformat (line_input, "subport %d", &subport)) - ; - - else if (unformat (line_input, "pipe %d", &pipe)) - ; - - else if (unformat (line_input, "tc %d", &tc)) - ; - - else if (unformat (line_input, "tc_q %d", &tc_q)) - ; - - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (hw_if_index == (u32) ~ 0) - { - error = clib_error_return (0, "please specify interface name!!"); - goto done; - } - - hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index); - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rte_eth_dev_info_get (xd->device_index, &dev_info); - if (dev_info.pci_dev) - { /* bonded interface has no pci info */ - vlib_pci_addr_t pci_addr; - - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - - p = - hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - if (devconf->hqos_enabled == 0) - { - vlib_cli_output (vm, "HQoS disabled for this interface"); - goto done; - } - - /* - * Figure out which queue to query. cf rte_sched_port_qindex. (Not sure why - * that method isn't made public by DPDK - how _should_ we get the queue ID?) - */ - qindex = subport * devconf->hqos.port.n_pipes_per_subport + pipe; - qindex = qindex * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + tc; - qindex = qindex * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + tc_q; - - if (rte_sched_queue_read_stats (xd->hqos_ht->hqos, qindex, &stats, &qlen) != - 0) - { - error = clib_error_return (0, "failed to read stats"); - goto done; - } - - vlib_cli_output (vm, "%=24s%=16s", "Stats Parameter", "Value"); - vlib_cli_output (vm, "%=24s%=16d", "Packets", stats.n_pkts); - vlib_cli_output (vm, "%=24s%=16d", "Packets dropped", stats.n_pkts_dropped); -#ifdef RTE_SCHED_RED - vlib_cli_output (vm, "%=24s%=16d", "Packets dropped (RED)", - stats.n_pkts_red_dropped); -#endif - vlib_cli_output (vm, "%=24s%=16d", "Bytes", stats.n_bytes); - vlib_cli_output (vm, "%=24s%=16d", "Bytes dropped", stats.n_bytes_dropped); - -#else - - /* Get a line of input */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - vlib_cli_output (vm, "RTE_SCHED_COLLECT_STATS disabled in DPDK"); - goto done; - -#endif - -done: - unformat_free (line_input); - - return error; -} - -/*? - * This command is used to display statistics associated with a HQoS traffic class - * queue. - * - * @note - * Statistic collection by the scheduler is disabled by default in DPDK. In order to - * turn it on, add the following line to '../vpp/dpdk/Makefile': - * - $(call set,RTE_SCHED_COLLECT_STATS,y) - * - * @cliexpar - * Example of how to display statistics of HQoS a HQoS traffic class queue: - * @cliexstart{show dpdk hqos queue GigabitEthernet0/9/0 subport 0 pipe 3181 tc 0 tc_q 0} - * Stats Parameter Value - * Packets 140 - * Packets dropped 0 - * Bytes 8400 - * Bytes dropped 0 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = { - .path = "show dpdk hqos queue", - .short_help = "show dpdk hqos queue subport pipe tc tc_q ", - .function = show_dpdk_hqos_queue_stats, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_dpdk_version_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ -#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c); - _("DPDK Version", "%s", rte_version ()); - _("DPDK EAL init args", "%s", dpdk_config_main.eal_init_args_str); -#undef _ - return 0; -} - -/*? - * This command is used to display the current DPDK version and - * the list of arguments passed to DPDK when started. - * - * @cliexpar - * Example of how to display how many DPDK buffer test command has allcoated: - * @cliexstart{show dpdk version} - * DPDK Version: DPDK 16.11.0 - * DPDK EAL init args: -c 1 -n 4 --huge-dir /run/vpp/hugepages --file-prefix vpp -w 0000:00:08.0 -w 0000:00:09.0 --master-lcore 0 --socket-mem 256 - * @cliexend -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_vpe_version_command, static) = { - .path = "show dpdk version", - .short_help = "show dpdk version", - .function = show_dpdk_version_command_fn, -}; -/* *INDENT-ON* */ - -clib_error_t * -dpdk_cli_init (vlib_main_t * vm) -{ - return 0; -} - -VLIB_INIT_FUNCTION (dpdk_cli_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/device.c b/src/vnet/devices/dpdk/device.c deleted file mode 100644 index 17397900..00000000 --- a/src/vnet/devices/dpdk/device.c +++ /dev/null @@ -1,852 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include - -#include "dpdk_priv.h" -#include - -#define foreach_dpdk_tx_func_error \ - _(BAD_RETVAL, "DPDK tx function returned an error") \ - _(RING_FULL, "Tx packet drops (ring full)") \ - _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \ - _(REPL_FAIL, "Tx packet drops (replication failure)") - -typedef enum -{ -#define _(f,s) DPDK_TX_FUNC_ERROR_##f, - foreach_dpdk_tx_func_error -#undef _ - DPDK_TX_FUNC_N_ERROR, -} dpdk_tx_func_error_t; - -static char *dpdk_tx_func_error_strings[] = { -#define _(n,s) s, - foreach_dpdk_tx_func_error -#undef _ -}; - -clib_error_t * -dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address) -{ - int error; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - - error = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) address); - - if (error) - { - return clib_error_return (0, "mac address set failed: %d", error); - } - else - { - vec_reset_length (xd->default_mac_address); - vec_add (xd->default_mac_address, address, sizeof (address)); - return NULL; - } -} - -clib_error_t * -dpdk_set_mc_filter (vnet_hw_interface_t * hi, - struct ether_addr mc_addr_vec[], int naddr) -{ - int error; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - - error = rte_eth_dev_set_mc_addr_list (xd->device_index, mc_addr_vec, naddr); - - if (error) - { - return clib_error_return (0, "mc addr list failed: %d", error); - } - else - { - return NULL; - } -} - -struct rte_mbuf * -dpdk_replicate_packet_mb (vlib_buffer_t * b) -{ - dpdk_main_t *dm = &dpdk_main; - struct rte_mbuf **mbufs = 0, *s, *d; - u8 nb_segs; - unsigned socket_id = rte_socket_id (); - int i; - - ASSERT (dm->pktmbuf_pools[socket_id]); - s = rte_mbuf_from_vlib_buffer (b); - nb_segs = s->nb_segs; - vec_validate (mbufs, nb_segs - 1); - - if (rte_pktmbuf_alloc_bulk (dm->pktmbuf_pools[socket_id], mbufs, nb_segs)) - { - vec_free (mbufs); - return 0; - } - - d = mbufs[0]; - d->nb_segs = s->nb_segs; - d->data_len = s->data_len; - d->pkt_len = s->pkt_len; - d->data_off = s->data_off; - clib_memcpy (d->buf_addr, s->buf_addr, RTE_PKTMBUF_HEADROOM + s->data_len); - - for (i = 1; i < nb_segs; i++) - { - d->next = mbufs[i]; - d = mbufs[i]; - s = s->next; - d->data_len = s->data_len; - clib_memcpy (d->buf_addr, s->buf_addr, - RTE_PKTMBUF_HEADROOM + s->data_len); - } - - d = mbufs[0]; - vec_free (mbufs); - return d; -} - -static void -dpdk_tx_trace_buffer (dpdk_main_t * dm, - vlib_node_runtime_t * node, - dpdk_device_t * xd, - u16 queue_id, u32 buffer_index, vlib_buffer_t * buffer) -{ - vlib_main_t *vm = vlib_get_main (); - dpdk_tx_dma_trace_t *t0; - struct rte_mbuf *mb; - - mb = rte_mbuf_from_vlib_buffer (buffer); - - t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0])); - t0->queue_index = queue_id; - t0->device_index = xd->device_index; - t0->buffer_index = buffer_index; - clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); - clib_memcpy (&t0->buffer, buffer, - sizeof (buffer[0]) - sizeof (buffer->pre_data)); - clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data, - sizeof (t0->buffer.pre_data)); -} - -static_always_inline void -dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b, - int maybe_multiseg) -{ - struct rte_mbuf *mb, *first_mb, *last_mb; - - /* buffer is coming from non-dpdk source so we need to init - rte_mbuf header */ - if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_EXT_HDR_VALID) == 0)) - { - vlib_buffer_t *b2 = b; - last_mb = mb = rte_mbuf_from_vlib_buffer (b2); - rte_pktmbuf_reset (mb); - while (maybe_multiseg && (b2->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - b2 = vlib_get_buffer (vm, b2->next_buffer); - mb = rte_mbuf_from_vlib_buffer (b2); - rte_pktmbuf_reset (mb); - } - } - - last_mb = first_mb = mb = rte_mbuf_from_vlib_buffer (b); - first_mb->nb_segs = 1; - mb->data_len = b->current_length; - mb->pkt_len = maybe_multiseg ? vlib_buffer_length_in_chain (vm, b) : - b->current_length; - mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; - - while (maybe_multiseg && (b->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - b = vlib_get_buffer (vm, b->next_buffer); - mb = rte_mbuf_from_vlib_buffer (b); - last_mb->next = mb; - last_mb = mb; - mb->data_len = b->current_length; - mb->pkt_len = b->current_length; - mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data; - first_mb->nb_segs++; - if (PREDICT_FALSE (b->n_add_refs)) - { - rte_mbuf_refcnt_update (mb, b->n_add_refs); - b->n_add_refs = 0; - } - } -} - -/* - * This function calls the dpdk's tx_burst function to transmit the packets - * on the tx_vector. It manages a lock per-device if the device does not - * support multiple queues. It returns the number of packets untransmitted - * on the tx_vector. If all packets are transmitted (the normal case), the - * function returns 0. - * - * The function assumes there is at least one packet on the tx_vector. - */ -static_always_inline - u32 tx_burst_vector_internal (vlib_main_t * vm, - dpdk_device_t * xd, - struct rte_mbuf **tx_vector) -{ - dpdk_main_t *dm = &dpdk_main; - u32 n_packets; - u32 tx_head; - u32 tx_tail; - u32 n_retry; - int rv; - int queue_id; - tx_ring_hdr_t *ring; - - ring = vec_header (tx_vector, sizeof (*ring)); - - n_packets = ring->tx_head - ring->tx_tail; - - tx_head = ring->tx_head % xd->nb_tx_desc; - - /* - * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to - * unpredictable results. - */ - ASSERT (n_packets > 0); - - /* - * Check for tx_vector overflow. If this fails it is a system configuration - * error. The ring should be sized big enough to handle the largest un-flowed - * off burst from a traffic manager. A larger size also helps performance - * a bit because it decreases the probability of having to issue two tx_burst - * calls due to a ring wrap. - */ - ASSERT (n_packets < xd->nb_tx_desc); - ASSERT (ring->tx_tail == 0); - - n_retry = 16; - queue_id = vm->cpu_index; - - do - { - /* start the burst at the tail */ - tx_tail = ring->tx_tail % xd->nb_tx_desc; - - /* - * This device only supports one TX queue, - * and we're running multi-threaded... - */ - if (PREDICT_FALSE (xd->lockp != 0)) - { - queue_id = queue_id % xd->tx_q_used; - while (__sync_lock_test_and_set (xd->lockp[queue_id], 1)) - /* zzzz */ - queue_id = (queue_id + 1) % xd->tx_q_used; - } - - if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */ - { - /* no wrap, transmit in one burst */ - dpdk_device_hqos_per_worker_thread_t *hqos = - &xd->hqos_wt[vm->cpu_index]; - - ASSERT (hqos->swq != NULL); - - dpdk_hqos_metadata_set (hqos, - &tx_vector[tx_tail], tx_head - tx_tail); - rv = rte_ring_sp_enqueue_burst (hqos->swq, - (void **) &tx_vector[tx_tail], - (uint16_t) (tx_head - tx_tail)); - } - else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) - { - /* no wrap, transmit in one burst */ - rv = rte_eth_tx_burst (xd->device_index, - (uint16_t) queue_id, - &tx_vector[tx_tail], - (uint16_t) (tx_head - tx_tail)); - } - else - { - ASSERT (0); - rv = 0; - } - - if (PREDICT_FALSE (xd->lockp != 0)) - *xd->lockp[queue_id] = 0; - - if (PREDICT_FALSE (rv < 0)) - { - // emit non-fatal message, bump counter - vnet_main_t *vnm = dm->vnet_main; - vnet_interface_main_t *im = &vnm->interface_main; - u32 node_index; - - node_index = vec_elt_at_index (im->hw_interfaces, - xd->vlib_hw_if_index)->tx_node_index; - - vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1); - clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index, - rv); - return n_packets; // untransmitted packets - } - ring->tx_tail += (u16) rv; - n_packets -= (uint16_t) rv; - } - while (rv && n_packets && (n_retry > 0)); - - return n_packets; -} - -static_always_inline void -dpdk_prefetch_buffer_by_index (vlib_main_t * vm, u32 bi) -{ - vlib_buffer_t *b; - struct rte_mbuf *mb; - b = vlib_get_buffer (vm, bi); - mb = rte_mbuf_from_vlib_buffer (b); - CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); -} - -static_always_inline void -dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp) -{ - dpdk_main_t *dm = &dpdk_main; - u32 my_cpu = vm->cpu_index; - struct rte_mbuf *mb_new; - - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0) - return; - - mb_new = dpdk_replicate_packet_mb (b); - if (PREDICT_FALSE (mb_new == 0)) - { - vlib_error_count (vm, node->node_index, - DPDK_TX_FUNC_ERROR_REPL_FAIL, 1); - b->flags |= VLIB_BUFFER_REPL_FAIL; - } - else - *mbp = mb_new; - - vec_add1 (dm->recycle[my_cpu], bi); -} - -/* - * Transmits the packets on the frame to the interface associated with the - * node. It first copies packets on the frame to a tx_vector containing the - * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal - * which calls the dpdk tx_burst function. - */ -static uword -dpdk_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * f) -{ - dpdk_main_t *dm = &dpdk_main; - vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance); - u32 n_packets = f->n_vectors; - u32 n_left; - u32 *from; - struct rte_mbuf **tx_vector; - u16 i; - u16 nb_tx_desc = xd->nb_tx_desc; - int queue_id; - u32 my_cpu; - u32 tx_pkts = 0; - tx_ring_hdr_t *ring; - u32 n_on_ring; - - my_cpu = vm->cpu_index; - - queue_id = my_cpu; - - tx_vector = xd->tx_vectors[queue_id]; - ring = vec_header (tx_vector, sizeof (*ring)); - - n_on_ring = ring->tx_head - ring->tx_tail; - from = vlib_frame_vector_args (f); - - ASSERT (n_packets <= VLIB_FRAME_SIZE); - - if (PREDICT_FALSE (n_on_ring + n_packets > nb_tx_desc)) - { - /* - * Overflowing the ring should never happen. - * If it does then drop the whole frame. - */ - vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL, - n_packets); - - while (n_packets--) - { - u32 bi0 = from[n_packets]; - vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer (b0); - rte_pktmbuf_free (mb0); - } - return n_on_ring; - } - - if (PREDICT_FALSE (dm->tx_pcap_enable)) - { - n_left = n_packets; - while (n_left > 0) - { - u32 bi0 = from[0]; - vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - if (dm->pcap_sw_if_index == 0 || - dm->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_TX]) - pcap_add_buffer (&dm->pcap_main, vm, bi0, 512); - from++; - n_left--; - } - } - - from = vlib_frame_vector_args (f); - n_left = n_packets; - i = ring->tx_head % nb_tx_desc; - - while (n_left >= 8) - { - u32 bi0, bi1, bi2, bi3; - struct rte_mbuf *mb0, *mb1, *mb2, *mb3; - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 or_flags; - - dpdk_prefetch_buffer_by_index (vm, from[4]); - dpdk_prefetch_buffer_by_index (vm, from[5]); - dpdk_prefetch_buffer_by_index (vm, from[6]); - dpdk_prefetch_buffer_by_index (vm, from[7]); - - bi0 = from[0]; - bi1 = from[1]; - bi2 = from[2]; - bi3 = from[3]; - from += 4; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - b2 = vlib_get_buffer (vm, bi2); - b3 = vlib_get_buffer (vm, bi3); - - or_flags = b0->flags | b1->flags | b2->flags | b3->flags; - - if (or_flags & VLIB_BUFFER_NEXT_PRESENT) - { - dpdk_validate_rte_mbuf (vm, b0, 1); - dpdk_validate_rte_mbuf (vm, b1, 1); - dpdk_validate_rte_mbuf (vm, b2, 1); - dpdk_validate_rte_mbuf (vm, b3, 1); - } - else - { - dpdk_validate_rte_mbuf (vm, b0, 0); - dpdk_validate_rte_mbuf (vm, b1, 0); - dpdk_validate_rte_mbuf (vm, b2, 0); - dpdk_validate_rte_mbuf (vm, b3, 0); - } - - mb0 = rte_mbuf_from_vlib_buffer (b0); - mb1 = rte_mbuf_from_vlib_buffer (b1); - mb2 = rte_mbuf_from_vlib_buffer (b2); - mb3 = rte_mbuf_from_vlib_buffer (b3); - - if (PREDICT_FALSE (or_flags & VLIB_BUFFER_RECYCLE)) - { - dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); - dpdk_buffer_recycle (vm, node, b1, bi1, &mb1); - dpdk_buffer_recycle (vm, node, b2, bi2, &mb2); - dpdk_buffer_recycle (vm, node, b3, bi3, &mb3); - - /* dont enqueue packets if replication failed as they must - be sent back to recycle */ - if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - tx_vector[i++ % nb_tx_desc] = mb0; - if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - tx_vector[i++ % nb_tx_desc] = mb1; - if (PREDICT_TRUE ((b2->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - tx_vector[i++ % nb_tx_desc] = mb2; - if (PREDICT_TRUE ((b3->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - tx_vector[i++ % nb_tx_desc] = mb3; - } - else - { - if (PREDICT_FALSE (i + 3 >= nb_tx_desc)) - { - tx_vector[i++ % nb_tx_desc] = mb0; - tx_vector[i++ % nb_tx_desc] = mb1; - tx_vector[i++ % nb_tx_desc] = mb2; - tx_vector[i++ % nb_tx_desc] = mb3; - i %= nb_tx_desc; - } - else - { - tx_vector[i++] = mb0; - tx_vector[i++] = mb1; - tx_vector[i++] = mb2; - tx_vector[i++] = mb3; - } - } - - - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) - { - if (b0->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); - if (b1->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1); - if (b2->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi2, b2); - if (b3->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi3, b3); - } - - n_left -= 4; - } - while (n_left > 0) - { - u32 bi0; - struct rte_mbuf *mb0; - vlib_buffer_t *b0; - - bi0 = from[0]; - from++; - - b0 = vlib_get_buffer (vm, bi0); - - dpdk_validate_rte_mbuf (vm, b0, 1); - - mb0 = rte_mbuf_from_vlib_buffer (b0); - dpdk_buffer_recycle (vm, node, b0, bi0, &mb0); - - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) - if (b0->flags & VLIB_BUFFER_IS_TRACED) - dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0); - - if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0)) - { - tx_vector[i % nb_tx_desc] = mb0; - i++; - } - n_left--; - } - - /* account for additional packets in the ring */ - ring->tx_head += n_packets; - n_on_ring = ring->tx_head - ring->tx_tail; - - /* transmit as many packets as possible */ - n_packets = tx_burst_vector_internal (vm, xd, tx_vector); - - /* - * tx_pkts is the number of packets successfully transmitted - * This is the number originally on ring minus the number remaining on ring - */ - tx_pkts = n_on_ring - n_packets; - - { - /* If there is no callback then drop any non-transmitted packets */ - if (PREDICT_FALSE (n_packets)) - { - vlib_simple_counter_main_t *cm; - vnet_main_t *vnm = vnet_get_main (); - - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_TX_ERROR); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - n_packets); - - vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP, - n_packets); - - while (n_packets--) - rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]); - } - - /* Reset head/tail to avoid unnecessary wrap */ - ring->tx_head = 0; - ring->tx_tail = 0; - } - - /* Recycle replicated buffers */ - if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu]))) - { - vlib_buffer_free (vm, dm->recycle[my_cpu], - vec_len (dm->recycle[my_cpu])); - _vec_len (dm->recycle[my_cpu]) = 0; - } - - ASSERT (ring->tx_head >= ring->tx_tail); - - return tx_pkts; -} - -static void -dpdk_clear_hw_interface_counters (u32 instance) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, instance); - - /* - * Set the "last_cleared_stats" to the current stats, so that - * things appear to clear from a display perspective. - */ - dpdk_update_counters (xd, vlib_time_now (dm->vlib_main)); - - clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof (xd->stats)); - clib_memcpy (xd->last_cleared_xstats, xd->xstats, - vec_len (xd->last_cleared_xstats) * - sizeof (xd->last_cleared_xstats[0])); - -} - -static clib_error_t * -dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) -{ - vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); - uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hif->dev_instance); - int rv = 0; - - if (is_up) - { - f64 now = vlib_time_now (dm->vlib_main); - - if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) - { - rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - } - - if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) - rte_eth_promiscuous_enable (xd->device_index); - else - rte_eth_promiscuous_disable (xd->device_index); - - rte_eth_allmulticast_enable (xd->device_index); - xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP; - dpdk_update_counters (xd, now); - dpdk_update_link_state (xd, now); - } - else - { - xd->flags &= ~DPDK_DEVICE_FLAG_ADMIN_UP; - - rte_eth_allmulticast_disable (xd->device_index); - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0); - rte_eth_dev_stop (xd->device_index); - - /* For bonded interface, stop slave links */ - if (xd->pmd == VNET_DPDK_PMD_BOND) - { - u8 slink[16]; - int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16); - while (nlink >= 1) - { - u8 dpdk_port = slink[--nlink]; - rte_eth_dev_stop (dpdk_port); - } - } - } - - if (rv < 0) - clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop", rv); - - return /* no error */ 0; -} - -/* - * Dynamically redirect all pkts from a specific interface - * to the specified node - */ -static void -dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, - u32 node_index) -{ - dpdk_main_t *xm = &dpdk_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); - - /* Shut off redirection */ - if (node_index == ~0) - { - xd->per_interface_next_index = node_index; - return; - } - - xd->per_interface_next_index = - vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index); -} - - -static clib_error_t * -dpdk_subif_add_del_function (vnet_main_t * vnm, - u32 hw_if_index, - struct vnet_sw_interface_t *st, int is_add) -{ - dpdk_main_t *xm = &dpdk_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); - vnet_sw_interface_t *t = (vnet_sw_interface_t *) st; - int r, vlan_offload; - u32 prev_subifs = xd->num_subifs; - clib_error_t *err = 0; - - if (is_add) - xd->num_subifs++; - else if (xd->num_subifs) - xd->num_subifs--; - - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - goto done; - - /* currently we program VLANS only for IXGBE VF and I40E VF */ - if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && (xd->pmd != VNET_DPDK_PMD_I40EVF)) - goto done; - - if (t->sub.eth.flags.no_tags == 1) - goto done; - - if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1)) - { - xd->num_subifs = prev_subifs; - err = clib_error_return (0, "unsupported VLAN setup"); - goto done; - } - - vlan_offload = rte_eth_dev_get_vlan_offload (xd->device_index); - vlan_offload |= ETH_VLAN_FILTER_OFFLOAD; - - if ((r = rte_eth_dev_set_vlan_offload (xd->device_index, vlan_offload))) - { - xd->num_subifs = prev_subifs; - err = clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d", - xd->device_index, r); - goto done; - } - - - if ((r = - rte_eth_dev_vlan_filter (xd->device_index, t->sub.eth.outer_vlan_id, - is_add))) - { - xd->num_subifs = prev_subifs; - err = clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d", - xd->device_index, r); - goto done; - } - -done: - if (xd->num_subifs) - xd->flags |= DPDK_DEVICE_FLAG_HAVE_SUBIF; - else - xd->flags &= ~DPDK_DEVICE_FLAG_HAVE_SUBIF; - - return err; -} - -/* *INDENT-OFF* */ -VNET_DEVICE_CLASS (dpdk_device_class) = { - .name = "dpdk", - .tx_function = dpdk_interface_tx, - .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR, - .tx_function_error_strings = dpdk_tx_func_error_strings, - .format_device_name = format_dpdk_device_name, - .format_device = format_dpdk_device, - .format_tx_trace = format_dpdk_tx_dma_trace, - .clear_counters = dpdk_clear_hw_interface_counters, - .admin_up_down_function = dpdk_interface_admin_up_down, - .subif_add_del_function = dpdk_subif_add_del_function, - .rx_redirect_to_node = dpdk_set_interface_next_node, - .mac_addr_change_function = dpdk_set_mac_address, -}; - -VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx) -/* *INDENT-ON* */ - -#define UP_DOWN_FLAG_EVENT 1 - -uword -admin_up_down_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - clib_error_t *error = 0; - uword event_type; - uword *event_data = 0; - u32 sw_if_index; - u32 flags; - - while (1) - { - vlib_process_wait_for_event (vm); - - event_type = vlib_process_get_events (vm, &event_data); - - dpdk_main.admin_up_down_in_progress = 1; - - switch (event_type) - { - case UP_DOWN_FLAG_EVENT: - { - if (vec_len (event_data) == 2) - { - sw_if_index = event_data[0]; - flags = event_data[1]; - error = - vnet_sw_interface_set_flags (vnet_get_main (), sw_if_index, - flags); - clib_error_report (error); - } - } - break; - } - - vec_reset_length (event_data); - - dpdk_main.admin_up_down_in_progress = 0; - - } - return 0; /* or not */ -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (admin_up_down_process_node,static) = { - .function = admin_up_down_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "admin-up-down-process", - .process_log2_n_stack_bytes = 17, // 256KB -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/dir.dox b/src/vnet/devices/dpdk/dir.dox deleted file mode 100644 index 43e36753..00000000 --- a/src/vnet/devices/dpdk/dir.dox +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Doxygen directory documentation */ - -/** -@dir -@brief DPDK Abstraction Layer. - -This directory contains the source code for the DPDK abstraction layer. - -*/ -/*? %%clicmd:group_label DPDK and pcap tx %% ?*/ -/*? %%syscfg:group_label DPDK and pcap tx %% ?*/ diff --git a/src/vnet/devices/dpdk/dpdk.api b/src/vnet/devices/dpdk/dpdk.api deleted file mode 100644 index 21215d45..00000000 --- a/src/vnet/devices/dpdk/dpdk.api +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** \brief DPDK interface HQoS pipe profile set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param pipe - pipe ID within its subport - @param profile - pipe profile ID -*/ -define sw_interface_set_dpdk_hqos_pipe { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 pipe; - u32 profile; -}; - -/** \brief DPDK interface HQoS pipe profile set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_pipe_reply { - u32 context; - i32 retval; -}; - -/** \brief DPDK interface HQoS subport parameters set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param tb_rate - subport token bucket rate (measured in bytes/second) - @param tb_size - subport token bucket size (measured in credits) - @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) - @param tc_period - enforcement period for rates (measured in milliseconds) -*/ -define sw_interface_set_dpdk_hqos_subport { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 tb_rate; - u32 tb_size; - u32 tc_rate[4]; - u32 tc_period; -}; - -/** \brief DPDK interface HQoS subport parameters set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_subport_reply { - u32 context; - i32 retval; -}; - -/** \brief DPDK interface HQoS tctbl entry set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param entry - entry index ID - @param tc - traffic class (0 .. 3) - @param queue - traffic class queue (0 .. 3) -*/ -define sw_interface_set_dpdk_hqos_tctbl { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 entry; - u32 tc; - u32 queue; -}; - -/** \brief DPDK interface HQoS tctbl entry set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_tctbl_reply { - u32 context; - i32 retval; -}; - -/* - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ - \ No newline at end of file diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h deleted file mode 100644 index bf9f2768..00000000 --- a/src/vnet/devices/dpdk/dpdk.h +++ /dev/null @@ -1,487 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __included_dpdk_h__ -#define __included_dpdk_h__ - -/* $$$$ We should rename always_inline -> clib_always_inline */ -#undef always_inline - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#if CLIB_DEBUG > 0 -#define always_inline static inline -#else -#define always_inline static inline __attribute__ ((__always_inline__)) -#endif - -#include - -#define NB_MBUF (16<<10) - -extern vnet_device_class_t dpdk_device_class; -extern vlib_node_registration_t dpdk_input_node; -extern vlib_node_registration_t handoff_dispatch_node; - -#define foreach_dpdk_pmd \ - _ ("net_thunderx", THUNDERX) \ - _ ("net_e1000_em", E1000EM) \ - _ ("net_e1000_igb", IGB) \ - _ ("net_e1000_igb_vf", IGBVF) \ - _ ("net_ixgbe", IXGBE) \ - _ ("net_ixgbe_vf", IXGBEVF) \ - _ ("net_i40e", I40E) \ - _ ("net_i40e_vf", I40EVF) \ - _ ("net_virtio", VIRTIO) \ - _ ("net_enic", ENIC) \ - _ ("net_vmxnet3", VMXNET3) \ - _ ("AF_PACKET PMD", AF_PACKET) \ - _ ("rte_bond_pmd", BOND) \ - _ ("net_fm10k", FM10K) \ - _ ("net_cxgbe", CXGBE) \ - _ ("net_mlx5", MLX5) \ - _ ("net_dpaa2", DPAA2) - -typedef enum -{ - VNET_DPDK_PMD_NONE, -#define _(s,f) VNET_DPDK_PMD_##f, - foreach_dpdk_pmd -#undef _ - VNET_DPDK_PMD_UNKNOWN, /* must be last */ -} dpdk_pmd_t; - -typedef enum -{ - VNET_DPDK_PORT_TYPE_ETH_1G, - VNET_DPDK_PORT_TYPE_ETH_10G, - VNET_DPDK_PORT_TYPE_ETH_40G, - VNET_DPDK_PORT_TYPE_ETH_100G, - VNET_DPDK_PORT_TYPE_ETH_BOND, - VNET_DPDK_PORT_TYPE_ETH_SWITCH, - VNET_DPDK_PORT_TYPE_AF_PACKET, - VNET_DPDK_PORT_TYPE_UNKNOWN, -} dpdk_port_type_t; - -/* - * The header for the tx_vector in dpdk_device_t. - * Head and tail are indexes into the tx_vector and are of type - * u64 so they never overflow. - */ -typedef struct -{ - u64 tx_head; - u64 tx_tail; -} tx_ring_hdr_t; - -typedef struct -{ - struct rte_ring *swq; - - u64 hqos_field0_slabmask; - u32 hqos_field0_slabpos; - u32 hqos_field0_slabshr; - u64 hqos_field1_slabmask; - u32 hqos_field1_slabpos; - u32 hqos_field1_slabshr; - u64 hqos_field2_slabmask; - u32 hqos_field2_slabpos; - u32 hqos_field2_slabshr; - u32 hqos_tc_table[64]; -} dpdk_device_hqos_per_worker_thread_t; - -typedef struct -{ - struct rte_ring **swq; - struct rte_mbuf **pkts_enq; - struct rte_mbuf **pkts_deq; - struct rte_sched_port *hqos; - u32 hqos_burst_enq; - u32 hqos_burst_deq; - u32 pkts_enq_len; - u32 swq_pos; - u32 flush_count; -} dpdk_device_hqos_per_hqos_thread_t; - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - volatile u32 **lockp; - - /* Instance ID */ - u32 device_index; - - u32 vlib_hw_if_index; - u32 vlib_sw_if_index; - - /* next node index if we decide to steal the rx graph arc */ - u32 per_interface_next_index; - - /* dpdk rte_mbuf rx and tx vectors, VLIB_FRAME_SIZE */ - struct rte_mbuf ***tx_vectors; /* one per worker thread */ - struct rte_mbuf ***rx_vectors; - - /* vector of traced contexts, per device */ - u32 **d_trace_buffers; - - dpdk_pmd_t pmd:8; - i8 cpu_socket; - - u16 flags; -#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0) -#define DPDK_DEVICE_FLAG_PROMISC (1 << 1) -#define DPDK_DEVICE_FLAG_PMD (1 << 2) -#define DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE (1 << 3) -#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4) -#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5) -#define DPDK_DEVICE_FLAG_HQOS (1 << 6) - - u16 nb_tx_desc; - CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); - - u8 *interface_name_suffix; - - /* number of sub-interfaces */ - u16 num_subifs; - - /* PMD related */ - u16 tx_q_used; - u16 rx_q_used; - u16 nb_rx_desc; - u16 *cpu_socket_id_by_queue; - struct rte_eth_conf port_conf; - struct rte_eth_txconf tx_conf; - - /* HQoS related */ - dpdk_device_hqos_per_worker_thread_t *hqos_wt; - dpdk_device_hqos_per_hqos_thread_t *hqos_ht; - - /* af_packet */ - u8 af_packet_port_id; - - struct rte_eth_link link; - f64 time_last_link_update; - - struct rte_eth_stats stats; - struct rte_eth_stats last_stats; - struct rte_eth_stats last_cleared_stats; - struct rte_eth_xstat *xstats; - struct rte_eth_xstat *last_cleared_xstats; - f64 time_last_stats_update; - dpdk_port_type_t port_type; - - /* mac address */ - u8 *default_mac_address; -} dpdk_device_t; - -#define DPDK_STATS_POLL_INTERVAL (10.0) -#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */ - -#define DPDK_LINK_POLL_INTERVAL (3.0) -#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */ - -typedef struct -{ - u32 device; - u16 queue_id; -} dpdk_device_and_queue_t; - -#ifndef DPDK_HQOS_DBG_BYPASS -#define DPDK_HQOS_DBG_BYPASS 0 -#endif - -#ifndef HQOS_FLUSH_COUNT_THRESHOLD -#define HQOS_FLUSH_COUNT_THRESHOLD 100000 -#endif - -typedef struct dpdk_device_config_hqos_t -{ - u32 hqos_thread; - u32 hqos_thread_valid; - - u32 swq_size; - u32 burst_enq; - u32 burst_deq; - - u32 pktfield0_slabpos; - u32 pktfield1_slabpos; - u32 pktfield2_slabpos; - u64 pktfield0_slabmask; - u64 pktfield1_slabmask; - u64 pktfield2_slabmask; - u32 tc_table[64]; - - struct rte_sched_port_params port; - struct rte_sched_subport_params *subport; - struct rte_sched_pipe_params *pipe; - uint32_t *pipe_map; -} dpdk_device_config_hqos_t; - -int dpdk_hqos_validate_mask (u64 mask, u32 n); -void dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * - hqos, u32 pipe_profile_id); -void dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos); -clib_error_t *dpdk_port_setup_hqos (dpdk_device_t * xd, - dpdk_device_config_hqos_t * hqos); -void dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, - struct rte_mbuf **pkts, u32 n_pkts); - -#define foreach_dpdk_device_config_item \ - _ (num_rx_queues) \ - _ (num_tx_queues) \ - _ (num_rx_desc) \ - _ (num_tx_desc) \ - _ (rss_fn) - -typedef struct -{ - vlib_pci_addr_t pci_addr; - u8 is_blacklisted; - u8 vlan_strip_offload; -#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0 -#define DPDK_DEVICE_VLAN_STRIP_OFF 1 -#define DPDK_DEVICE_VLAN_STRIP_ON 2 - -#define _(x) uword x; - foreach_dpdk_device_config_item -#undef _ - clib_bitmap_t * workers; - u32 hqos_enabled; - dpdk_device_config_hqos_t hqos; -} dpdk_device_config_t; - -typedef struct -{ - - /* Config stuff */ - u8 **eal_init_args; - u8 *eal_init_args_str; - u8 *uio_driver_name; - u8 no_multi_seg; - u8 enable_tcp_udp_checksum; - u8 cryptodev; - - /* Required config parameters */ - u8 coremask_set_manually; - u8 nchannels_set_manually; - u32 coremask; - u32 nchannels; - u32 num_mbufs; - u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */ - - /* - * format interface names ala xxxEthernet%d/%d/%d instead of - * xxxEthernet%x/%x/%x. - */ - u8 interface_name_format_decimal; - - /* per-device config */ - dpdk_device_config_t default_devconf; - dpdk_device_config_t *dev_confs; - uword *device_config_index_by_pci_addr; - -} dpdk_config_main_t; - -dpdk_config_main_t dpdk_config_main; - -typedef struct -{ - - /* Devices */ - dpdk_device_t *devices; - dpdk_device_and_queue_t **devices_by_cpu; - dpdk_device_and_queue_t **devices_by_hqos_cpu; - - /* per-thread recycle lists */ - u32 **recycle; - - /* buffer flags template, configurable to enable/disable tcp / udp cksum */ - u32 buffer_flags_template; - - /* vlib buffer free list, must be same size as an rte_mbuf */ - u32 vlib_buffer_free_list_index; - - /* Ethernet input node index */ - u32 ethernet_input_node_index; - - /* pcap tracing [only works if (CLIB_DEBUG > 0)] */ - int tx_pcap_enable; - pcap_main_t pcap_main; - u8 *pcap_filename; - u32 pcap_sw_if_index; - u32 pcap_pkts_to_capture; - - /* hashes */ - uword *dpdk_device_by_kni_port_id; - uword *vu_sw_if_index_by_listener_fd; - uword *vu_sw_if_index_by_sock_fd; - u32 *vu_inactive_interfaces_device_index; - - /* - * flag indicating that a posted admin up/down - * (via post_sw_interface_set_flags) is in progress - */ - u8 admin_up_down_in_progress; - - u8 use_rss; - - /* which cpus are running dpdk-input */ - int input_cpu_first_index; - int input_cpu_count; - - /* which cpus are running I/O TX */ - int hqos_cpu_first_index; - int hqos_cpu_count; - - /* control interval of dpdk link state and stat polling */ - f64 link_state_poll_interval; - f64 stat_poll_interval; - - /* Sleep for this many MS after each device poll */ - u32 poll_sleep; - - /* convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; - dpdk_config_main_t *conf; - - /* mempool */ - struct rte_mempool **pktmbuf_pools; -} dpdk_main_t; - -dpdk_main_t dpdk_main; - -typedef struct -{ - u32 buffer_index; - u16 device_index; - u8 queue_index; - struct rte_mbuf mb; - /* Copy of VLIB buffer; packet data stored in pre_data. */ - vlib_buffer_t buffer; -} dpdk_tx_dma_trace_t; - -typedef struct -{ - u32 buffer_index; - u16 device_index; - u16 queue_index; - struct rte_mbuf mb; - vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */ - u8 data[256]; /* First 256 data bytes, used for hexdump */ -} dpdk_rx_dma_trace_t; - -void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b); - -clib_error_t *dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address); - -clib_error_t *dpdk_set_mc_filter (vnet_hw_interface_t * hi, - struct ether_addr mc_addr_vec[], int naddr); - -void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd); - -clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd); - -u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance); - -struct rte_mbuf *dpdk_replicate_packet_mb (vlib_buffer_t * b); -struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b); - -#define foreach_dpdk_error \ - _(NONE, "no error") \ - _(RX_PACKET_ERROR, "Rx packet errors") \ - _(RX_BAD_FCS, "Rx bad fcs") \ - _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \ - _(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \ - _(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \ - _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error") - -typedef enum -{ -#define _(f,s) DPDK_ERROR_##f, - foreach_dpdk_error -#undef _ - DPDK_N_ERROR, -} dpdk_error_t; - -int dpdk_set_stat_poll_interval (f64 interval); -int dpdk_set_link_state_poll_interval (f64 interval); -void dpdk_update_link_state (dpdk_device_t * xd, f64 now); -void dpdk_device_lock_init (dpdk_device_t * xd); -void dpdk_device_lock_free (dpdk_device_t * xd); - -void dpdk_rx_trace (dpdk_main_t * dm, - vlib_node_runtime_t * node, - dpdk_device_t * xd, - u16 queue_id, u32 * buffers, uword n_buffers); - -#define EFD_OPERATION_LESS_THAN 0 -#define EFD_OPERATION_GREATER_OR_EQUAL 1 - -format_function_t format_dpdk_device_name; -format_function_t format_dpdk_device; -format_function_t format_dpdk_tx_dma_trace; -format_function_t format_dpdk_rx_dma_trace; -format_function_t format_dpdk_rte_mbuf; -format_function_t format_dpdk_rx_rte_mbuf; -unformat_function_t unformat_socket_mem; -clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn); -clib_error_t *unformat_hqos (unformat_input_t * input, - dpdk_device_config_hqos_t * hqos); - -uword -admin_up_down_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, vlib_frame_t * f); - -#endif /* __included_dpdk_h__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/dpdk_api.c b/src/vnet/devices/dpdk/dpdk_api.c deleted file mode 100644 index 8faf5c2c..00000000 --- a/src/vnet/devices/dpdk/dpdk_api.c +++ /dev/null @@ -1,246 +0,0 @@ -/* - *------------------------------------------------------------------ - * dpdk_api.c - dpdk interface api - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include - -#if DPDK > 0 -#include -#endif - -#include - -#define vl_typedefs /* define message structures */ -#include -#undef vl_typedefs - -#define vl_endianfun /* define message structures */ -#include -#undef vl_endianfun - -/* instantiate all the print functions we know about */ -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) -#define vl_printfun -#include -#undef vl_printfun - -#include - -#define foreach_vpe_api_msg \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) - -static void - vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler - (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - u32 pipe = ntohl (mp->pipe); - u32 profile = ntohl (mp->profile); - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); - - BAD_SW_IF_INDEX_LABEL; -#else - clib_warning ("setting HQoS pipe parameters without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); -} - -static void - vl_api_sw_interface_set_dpdk_hqos_subport_t_handler - (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - struct rte_sched_subport_params p; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - p.tb_rate = ntohl (mp->tb_rate); - p.tb_size = ntohl (mp->tb_size); - p.tc_rate[0] = ntohl (mp->tc_rate[0]); - p.tc_rate[1] = ntohl (mp->tc_rate[1]); - p.tc_rate[2] = ntohl (mp->tc_rate[2]); - p.tc_rate[3] = ntohl (mp->tc_rate[3]); - p.tc_period = ntohl (mp->tc_period); - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); - - BAD_SW_IF_INDEX_LABEL; -#else - clib_warning - ("setting HQoS subport parameters without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); -} - -static void - vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler - (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; - int rv = 0; - -#if DPDK > 0 - dpdk_main_t *dm = &dpdk_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 entry = ntohl (mp->entry); - u32 tc = ntohl (mp->tc); - u32 queue = ntohl (mp->queue); - u32 val, i; - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) - { - clib_warning ("invalid traffic class !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) - { - clib_warning ("invalid queue !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - - if (p == 0) - { - clib_warning ("worker thread registration AWOL !!"); - rv = VNET_API_ERROR_INVALID_VALUE_2; - goto done; - } - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; - for (i = 0; i < worker_thread_count; i++) - xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; - - BAD_SW_IF_INDEX_LABEL; -done: -#else - clib_warning ("setting HQoS DSCP table entry without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); -} - -/* - * dpdk_api_hookup - * Add vpe's API message handlers to the table. - * vlib has alread mapped shared memory and - * added the client registration handlers. - * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() - */ -#define vl_msg_name_crc_list -#include -#undef vl_msg_name_crc_list - -static void -setup_message_id_table (api_main_t * am) -{ -#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); - foreach_vl_msg_name_crc_dpdk; -#undef _ -} - -static clib_error_t * -dpdk_api_hookup (vlib_main_t * vm) -{ - api_main_t *am = &api_main; - -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vpe_api_msg; -#undef _ - - /* - * Set up the (msg_name, crc, message-id) table - */ - setup_message_id_table (am); - - return 0; -} - -VLIB_API_INIT_FUNCTION (dpdk_api_hookup); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/dpdk_priv.h b/src/vnet/devices/dpdk/dpdk_priv.h deleted file mode 100644 index dd40ff48..00000000 --- a/src/vnet/devices/dpdk/dpdk_priv.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1) -#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1)) - -#define DPDK_NB_RX_DESC_DEFAULT 1024 -#define DPDK_NB_TX_DESC_DEFAULT 1024 -#define DPDK_NB_RX_DESC_VIRTIO 256 -#define DPDK_NB_TX_DESC_VIRTIO 256 - -#define I40E_DEV_ID_SFP_XL710 0x1572 -#define I40E_DEV_ID_QSFP_A 0x1583 -#define I40E_DEV_ID_QSFP_B 0x1584 -#define I40E_DEV_ID_QSFP_C 0x1585 -#define I40E_DEV_ID_10G_BASE_T 0x1586 -#define I40E_DEV_ID_VF 0x154C - -/* These args appear by themselves */ -#define foreach_eal_double_hyphen_predicate_arg \ -_(no-shconf) \ -_(no-hpet) \ -_(no-huge) \ -_(vmware-tsc-map) - -#define foreach_eal_single_hyphen_mandatory_arg \ -_(coremask, c) \ -_(nchannels, n) \ - -#define foreach_eal_single_hyphen_arg \ -_(blacklist, b) \ -_(mem-alloc-request, m) \ -_(force-ranks, r) - -/* These args are preceeded by "--" and followed by a single string */ -#define foreach_eal_double_hyphen_arg \ -_(huge-dir) \ -_(proc-type) \ -_(file-prefix) \ -_(vdev) - -static inline void -dpdk_get_xstats (dpdk_device_t * xd) -{ - int len; - if ((len = rte_eth_xstats_get (xd->device_index, NULL, 0)) > 0) - { - vec_validate (xd->xstats, len - 1); - vec_validate (xd->last_cleared_xstats, len - 1); - - len = - rte_eth_xstats_get (xd->device_index, xd->xstats, - vec_len (xd->xstats)); - - ASSERT (vec_len (xd->xstats) == len); - ASSERT (vec_len (xd->last_cleared_xstats) == len); - - _vec_len (xd->xstats) = len; - _vec_len (xd->last_cleared_xstats) = len; - - } -} - - -static inline void -dpdk_update_counters (dpdk_device_t * xd, f64 now) -{ - vlib_simple_counter_main_t *cm; - vnet_main_t *vnm = vnet_get_main (); - u32 my_cpu = os_get_cpu_number (); - u64 rxerrors, last_rxerrors; - - /* only update counters for PMD interfaces */ - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - return; - - xd->time_last_stats_update = now ? now : xd->time_last_stats_update; - clib_memcpy (&xd->last_stats, &xd->stats, sizeof (xd->last_stats)); - rte_eth_stats_get (xd->device_index, &xd->stats); - - /* maybe bump interface rx no buffer counter */ - if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf)) - { - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_RX_NO_BUF); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - xd->stats.rx_nombuf - - xd->last_stats.rx_nombuf); - } - - /* missed pkt counter */ - if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed)) - { - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_RX_MISS); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - xd->stats.imissed - - xd->last_stats.imissed); - } - rxerrors = xd->stats.ierrors; - last_rxerrors = xd->last_stats.ierrors; - - if (PREDICT_FALSE (rxerrors != last_rxerrors)) - { - cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_RX_ERROR); - - vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, - rxerrors - last_rxerrors); - } - - dpdk_get_xstats (xd); -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/format.c b/src/vnet/devices/dpdk/format.c deleted file mode 100644 index 1558630c..00000000 --- a/src/vnet/devices/dpdk/format.c +++ /dev/null @@ -1,754 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include - -#include "dpdk_priv.h" -#include - -#define foreach_dpdk_counter \ - _ (tx_frames_ok, opackets) \ - _ (tx_bytes_ok, obytes) \ - _ (tx_errors, oerrors) \ - _ (rx_frames_ok, ipackets) \ - _ (rx_bytes_ok, ibytes) \ - _ (rx_errors, ierrors) \ - _ (rx_missed, imissed) \ - _ (rx_no_bufs, rx_nombuf) - -#define foreach_dpdk_q_counter \ - _ (rx_frames_ok, q_ipackets) \ - _ (tx_frames_ok, q_opackets) \ - _ (rx_bytes_ok, q_ibytes) \ - _ (tx_bytes_ok, q_obytes) \ - _ (rx_errors, q_errors) - -#define foreach_dpdk_rss_hf \ - _(ETH_RSS_FRAG_IPV4, "ipv4-frag") \ - _(ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \ - _(ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \ - _(ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \ - _(ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \ - _(ETH_RSS_IPV4, "ipv4") \ - _(ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \ - _(ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \ - _(ETH_RSS_FRAG_IPV6, "ipv6-frag") \ - _(ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \ - _(ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \ - _(ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \ - _(ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \ - _(ETH_RSS_L2_PAYLOAD, "l2-payload") \ - _(ETH_RSS_IPV6_EX, "ipv6-ex") \ - _(ETH_RSS_IPV6, "ipv6") - - -#define foreach_dpdk_rx_offload_caps \ - _(DEV_RX_OFFLOAD_VLAN_STRIP, "vlan-strip") \ - _(DEV_RX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ - _(DEV_RX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ - _(DEV_RX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ - _(DEV_RX_OFFLOAD_TCP_LRO , "rcp-lro") \ - _(DEV_RX_OFFLOAD_QINQ_STRIP, "qinq-strip") - -#define foreach_dpdk_tx_offload_caps \ - _(DEV_TX_OFFLOAD_VLAN_INSERT, "vlan-insert") \ - _(DEV_TX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \ - _(DEV_TX_OFFLOAD_UDP_CKSUM , "udp-cksum") \ - _(DEV_TX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \ - _(DEV_TX_OFFLOAD_SCTP_CKSUM , "sctp-cksum") \ - _(DEV_TX_OFFLOAD_TCP_TSO , "tcp-tso") \ - _(DEV_TX_OFFLOAD_UDP_TSO , "udp-tso") \ - _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \ - _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert") - -#define foreach_dpdk_pkt_rx_offload_flag \ - _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \ - _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \ - _ (PKT_RX_FDIR, "RX packet with FDIR infos") \ - _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \ - _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \ - _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \ - _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \ - _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \ - _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \ - _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \ - _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped") - -#define foreach_dpdk_pkt_type \ - _ (L2, ETHER, "Ethernet packet") \ - _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \ - _ (L2, ETHER_ARP, "ARP packet") \ - _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet") \ - _ (L2, ETHER_NSH, "NSH (Network Service Header) packet") \ - _ (L2, ETHER_VLAN, "VLAN packet") \ - _ (L2, ETHER_QINQ, "QinQ packet") \ - _ (L3, IPV4, "IPv4 packet without extension headers") \ - _ (L3, IPV4_EXT, "IPv4 packet with extension headers") \ - _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \ - _ (L3, IPV6, "IPv6 packet without extension headers") \ - _ (L3, IPV6_EXT, "IPv6 packet with extension headers") \ - _ (L3, IPV6_EXT_UNKNOWN, "IPv6 packet with or without extension headers") \ - _ (L4, TCP, "TCP packet") \ - _ (L4, UDP, "UDP packet") \ - _ (L4, FRAG, "Fragmented IP packet") \ - _ (L4, SCTP, "SCTP (Stream Control Transmission Protocol) packet") \ - _ (L4, ICMP, "ICMP packet") \ - _ (L4, NONFRAG, "Non-fragmented IP packet") \ - _ (TUNNEL, GRE, "GRE tunneling packet") \ - _ (TUNNEL, VXLAN, "VXLAN tunneling packet") \ - _ (TUNNEL, NVGRE, "NVGRE Tunneling packet") \ - _ (TUNNEL, GENEVE, "GENEVE Tunneling packet") \ - _ (TUNNEL, GRENAT, "Teredo, VXLAN or GRE Tunneling packet") \ - _ (INNER_L2, ETHER, "Inner Ethernet packet") \ - _ (INNER_L2, ETHER_VLAN, "Inner Ethernet packet with VLAN") \ - _ (INNER_L3, IPV4, "Inner IPv4 packet without extension headers") \ - _ (INNER_L3, IPV4_EXT, "Inner IPv4 packet with extension headers") \ - _ (INNER_L3, IPV4_EXT_UNKNOWN, "Inner IPv4 packet with or without extension headers") \ - _ (INNER_L3, IPV6, "Inner IPv6 packet without extension headers") \ - _ (INNER_L3, IPV6_EXT, "Inner IPv6 packet with extension headers") \ - _ (INNER_L3, IPV6_EXT_UNKNOWN, "Inner IPv6 packet with or without extension headers") \ - _ (INNER_L4, TCP, "Inner TCP packet") \ - _ (INNER_L4, UDP, "Inner UDP packet") \ - _ (INNER_L4, FRAG, "Inner fagmented IP packet") \ - _ (INNER_L4, SCTP, "Inner SCTP (Stream Control Transmission Protocol) packet") \ - _ (INNER_L4, ICMP, "Inner ICMP packet") \ - _ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet") - -#define foreach_dpdk_pkt_tx_offload_flag \ - _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \ - _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \ - _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \ - _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \ - _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp") - -#define foreach_dpdk_pkt_offload_flag \ - foreach_dpdk_pkt_rx_offload_flag \ - foreach_dpdk_pkt_tx_offload_flag - -u8 * -format_dpdk_device_name (u8 * s, va_list * args) -{ - dpdk_main_t *dm = &dpdk_main; - char *devname_format; - char *device_name; - u32 i = va_arg (*args, u32); - struct rte_eth_dev_info dev_info; - u8 *ret; - - if (dm->conf->interface_name_format_decimal) - devname_format = "%s%d/%d/%d"; - else - devname_format = "%s%x/%x/%x"; - - switch (dm->devices[i].port_type) - { - case VNET_DPDK_PORT_TYPE_ETH_1G: - device_name = "GigabitEthernet"; - break; - - case VNET_DPDK_PORT_TYPE_ETH_10G: - device_name = "TenGigabitEthernet"; - break; - - case VNET_DPDK_PORT_TYPE_ETH_40G: - device_name = "FortyGigabitEthernet"; - break; - - case VNET_DPDK_PORT_TYPE_ETH_100G: - device_name = "HundredGigabitEthernet"; - break; - - case VNET_DPDK_PORT_TYPE_ETH_BOND: - return format (s, "BondEthernet%d", dm->devices[i].device_index); - - case VNET_DPDK_PORT_TYPE_ETH_SWITCH: - device_name = "EthernetSwitch"; - break; - - case VNET_DPDK_PORT_TYPE_AF_PACKET: - rte_eth_dev_info_get (i, &dev_info); - return format (s, "af_packet%d", dm->devices[i].af_packet_port_id); - - default: - case VNET_DPDK_PORT_TYPE_UNKNOWN: - device_name = "UnknownEthernet"; - break; - } - - rte_eth_dev_info_get (i, &dev_info); - - if (dev_info.pci_dev) - ret = format (s, devname_format, device_name, dev_info.pci_dev->addr.bus, - dev_info.pci_dev->addr.devid, - dev_info.pci_dev->addr.function); - else - ret = format (s, "%s%d", device_name, dm->devices[i].device_index); - - if (dm->devices[i].interface_name_suffix) - return format (ret, "/%s", dm->devices[i].interface_name_suffix); - return ret; -} - -static u8 * -format_dpdk_device_type (u8 * s, va_list * args) -{ - dpdk_main_t *dm = &dpdk_main; - char *dev_type; - u32 i = va_arg (*args, u32); - - switch (dm->devices[i].pmd) - { - case VNET_DPDK_PMD_E1000EM: - dev_type = "Intel 82540EM (e1000)"; - break; - - case VNET_DPDK_PMD_IGB: - dev_type = "Intel e1000"; - break; - - case VNET_DPDK_PMD_I40E: - dev_type = "Intel X710/XL710 Family"; - break; - - case VNET_DPDK_PMD_I40EVF: - dev_type = "Intel X710/XL710 Family VF"; - break; - - case VNET_DPDK_PMD_FM10K: - dev_type = "Intel FM10000 Family Ethernet Switch"; - break; - - case VNET_DPDK_PMD_IGBVF: - dev_type = "Intel e1000 VF"; - break; - - case VNET_DPDK_PMD_VIRTIO: - dev_type = "Red Hat Virtio"; - break; - - case VNET_DPDK_PMD_IXGBEVF: - dev_type = "Intel 82599 VF"; - break; - - case VNET_DPDK_PMD_IXGBE: - dev_type = "Intel 82599"; - break; - - case VNET_DPDK_PMD_ENIC: - dev_type = "Cisco VIC"; - break; - - case VNET_DPDK_PMD_CXGBE: - dev_type = "Chelsio T4/T5"; - break; - - case VNET_DPDK_PMD_MLX5: - dev_type = "Mellanox ConnectX-4 Family"; - break; - - case VNET_DPDK_PMD_VMXNET3: - dev_type = "VMware VMXNET3"; - break; - - case VNET_DPDK_PMD_AF_PACKET: - dev_type = "af_packet"; - break; - - case VNET_DPDK_PMD_BOND: - dev_type = "Ethernet Bonding"; - break; - - case VNET_DPDK_PMD_DPAA2: - dev_type = "NXP DPAA2 Mac"; - break; - - default: - case VNET_DPDK_PMD_UNKNOWN: - dev_type = "### UNKNOWN ###"; - break; - } - - return format (s, dev_type); -} - -static u8 * -format_dpdk_link_status (u8 * s, va_list * args) -{ - dpdk_device_t *xd = va_arg (*args, dpdk_device_t *); - struct rte_eth_link *l = &xd->link; - vnet_main_t *vnm = vnet_get_main (); - vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index); - - s = format (s, "%s ", l->link_status ? "up" : "down"); - if (l->link_status) - { - u32 promisc = rte_eth_promiscuous_get (xd->device_index); - - s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ? - "full" : "half"); - s = format (s, "speed %u mtu %d %s\n", l->link_speed, - hi->max_packet_bytes, promisc ? " promisc" : ""); - } - else - s = format (s, "\n"); - - return s; -} - -#define _line_len 72 -#define _(v, str) \ -if (bitmap & v) { \ - if (format_get_indent (s) > next_split ) { \ - next_split += _line_len; \ - s = format(s,"\n%U", format_white_space, indent); \ - } \ - s = format(s, "%s ", str); \ -} - -static u8 * -format_dpdk_rss_hf_name (u8 * s, va_list * args) -{ - u64 bitmap = va_arg (*args, u64); - int next_split = _line_len; - int indent = format_get_indent (s); - - if (!bitmap) - return format (s, "none"); - - foreach_dpdk_rss_hf return s; -} - -static u8 * -format_dpdk_rx_offload_caps (u8 * s, va_list * args) -{ - u32 bitmap = va_arg (*args, u32); - int next_split = _line_len; - int indent = format_get_indent (s); - - if (!bitmap) - return format (s, "none"); - - foreach_dpdk_rx_offload_caps return s; -} - -static u8 * -format_dpdk_tx_offload_caps (u8 * s, va_list * args) -{ - u32 bitmap = va_arg (*args, u32); - int next_split = _line_len; - int indent = format_get_indent (s); - if (!bitmap) - return format (s, "none"); - - foreach_dpdk_tx_offload_caps return s; -} - -#undef _line_len -#undef _ - -u8 * -format_dpdk_device (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - int verbose = va_arg (*args, int); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance); - uword indent = format_get_indent (s); - f64 now = vlib_time_now (dm->vlib_main); - struct rte_eth_dev_info di; - - dpdk_update_counters (xd, now); - dpdk_update_link_state (xd, now); - - s = format (s, "%U\n%Ucarrier %U", - format_dpdk_device_type, xd->device_index, - format_white_space, indent + 2, format_dpdk_link_status, xd); - - rte_eth_dev_info_get (xd->device_index, &di); - - if (verbose > 1 && xd->flags & DPDK_DEVICE_FLAG_PMD) - { - struct rte_pci_device *pci; - struct rte_eth_rss_conf rss_conf; - int vlan_off; - int retval; - - rss_conf.rss_key = 0; - retval = rte_eth_dev_rss_hash_conf_get (xd->device_index, &rss_conf); - if (retval < 0) - clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval); - pci = di.pci_dev; - - if (pci) - s = - format (s, - "%Upci id: device %04x:%04x subsystem %04x:%04x\n" - "%Upci address: %04x:%02x:%02x.%02x\n", - format_white_space, indent + 2, pci->id.vendor_id, - pci->id.device_id, pci->id.subsystem_vendor_id, - pci->id.subsystem_device_id, format_white_space, indent + 2, - pci->addr.domain, pci->addr.bus, pci->addr.devid, - pci->addr.function); - s = - format (s, "%Umax rx packet len: %d\n", format_white_space, - indent + 2, di.max_rx_pktlen); - s = - format (s, "%Umax num of queues: rx %d tx %d\n", format_white_space, - indent + 2, di.max_rx_queues, di.max_tx_queues); - s = - format (s, "%Upromiscuous: unicast %s all-multicast %s\n", - format_white_space, indent + 2, - rte_eth_promiscuous_get (xd->device_index) ? "on" : "off", - rte_eth_promiscuous_get (xd->device_index) ? "on" : "off"); - vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); - s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n", - format_white_space, indent + 2, - vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off", - vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off", - vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off"); - s = format (s, "%Urx offload caps: %U\n", - format_white_space, indent + 2, - format_dpdk_rx_offload_caps, di.rx_offload_capa); - s = format (s, "%Utx offload caps: %U\n", - format_white_space, indent + 2, - format_dpdk_tx_offload_caps, di.tx_offload_capa); - s = format (s, "%Urss active: %U\n" - "%Urss supported: %U\n", - format_white_space, indent + 2, - format_dpdk_rss_hf_name, rss_conf.rss_hf, - format_white_space, indent + 2, - format_dpdk_rss_hf_name, di.flow_type_rss_offloads); - } - - s = format (s, "%Urx queues %d, rx desc %d, tx queues %d, tx desc %d\n", - format_white_space, indent + 2, - xd->rx_q_used, xd->nb_rx_desc, xd->tx_q_used, xd->nb_tx_desc); - - if (xd->cpu_socket > -1) - s = format (s, "%Ucpu socket %d\n", - format_white_space, indent + 2, xd->cpu_socket); - - /* $$$ MIB counters */ - { -#define _(N, V) \ - if ((xd->stats.V - xd->last_cleared_stats.V) != 0) { \ - s = format (s, "\n%U%-40U%16Ld", \ - format_white_space, indent + 2, \ - format_c_identifier, #N, \ - xd->stats.V - xd->last_cleared_stats.V); \ - } \ - - foreach_dpdk_counter -#undef _ - } - - u8 *xs = 0; - u32 i = 0; - struct rte_eth_xstat *xstat, *last_xstat; - struct rte_eth_xstat_name *xstat_names = 0; - int len = rte_eth_xstats_get_names (xd->device_index, NULL, 0); - vec_validate (xstat_names, len - 1); - rte_eth_xstats_get_names (xd->device_index, xstat_names, len); - - ASSERT (vec_len (xd->xstats) == vec_len (xd->last_cleared_xstats)); - - /* *INDENT-OFF* */ - vec_foreach_index(i, xd->xstats) - { - u64 delta = 0; - xstat = vec_elt_at_index(xd->xstats, i); - last_xstat = vec_elt_at_index(xd->last_cleared_xstats, i); - - delta = xstat->value - last_xstat->value; - if (verbose == 2 || (verbose && delta)) - { - /* format_c_identifier doesn't like c strings inside vector */ - u8 * name = format(0,"%s", xstat_names[i].name); - xs = format(xs, "\n%U%-38U%16Ld", - format_white_space, indent + 4, - format_c_identifier, name, delta); - vec_free(name); - } - } - /* *INDENT-ON* */ - - vec_free (xstat_names); - - if (xs) - { - s = format (s, "\n%Uextended stats:%v", - format_white_space, indent + 2, xs); - vec_free (xs); - } - - return s; -} - -u8 * -format_dpdk_tx_dma_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); - dpdk_tx_dma_trace_t *t = va_arg (*va, dpdk_tx_dma_trace_t *); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); - uword indent = format_get_indent (s); - vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); - - s = format (s, "%U tx queue %d", - format_vnet_sw_interface_name, vnm, sw, t->queue_index); - - s = format (s, "\n%Ubuffer 0x%x: %U", - format_white_space, indent, - t->buffer_index, format_vlib_buffer, &t->buffer); - - s = format (s, "\n%U%U", format_white_space, indent, - format_ethernet_header_with_length, t->buffer.pre_data, - sizeof (t->buffer.pre_data)); - - return s; -} - -u8 * -format_dpdk_rx_dma_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main (); - dpdk_rx_dma_trace_t *t = va_arg (*va, dpdk_rx_dma_trace_t *); - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index); - format_function_t *f; - uword indent = format_get_indent (s); - vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); - - s = format (s, "%U rx queue %d", - format_vnet_sw_interface_name, vnm, sw, t->queue_index); - - s = format (s, "\n%Ubuffer 0x%x: %U", - format_white_space, indent, - t->buffer_index, format_vlib_buffer, &t->buffer); - - s = format (s, "\n%U%U", - format_white_space, indent, - format_dpdk_rte_mbuf, &t->mb, &t->data); - - if (vm->trace_main.verbose) - { - s = format (s, "\n%UPacket Dump%s", format_white_space, indent + 2, - t->mb.data_len > sizeof (t->data) ? " (truncated)" : ""); - s = format (s, "\n%U%U", format_white_space, indent + 4, - format_hexdump, &t->data, - t->mb.data_len > - sizeof (t->data) ? sizeof (t->data) : t->mb.data_len); - } - f = node->format_buffer; - if (!f) - f = format_hex_bytes; - s = format (s, "\n%U%U", format_white_space, indent, - f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); - - return s; -} - - -static inline u8 * -format_dpdk_pkt_types (u8 * s, va_list * va) -{ - u32 *pkt_types = va_arg (*va, u32 *); - uword indent __attribute__ ((unused)) = format_get_indent (s) + 2; - - if (!*pkt_types) - return s; - - s = format (s, "Packet Types"); - -#define _(L, F, S) \ - if ((*pkt_types & RTE_PTYPE_##L##_MASK) == RTE_PTYPE_##L##_##F) \ - { \ - s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \ - "RTE_PTYPE_" #L "_" #F, RTE_PTYPE_##L##_##F, S); \ - } - - foreach_dpdk_pkt_type -#undef _ - return s; -} - -static inline u8 * -format_dpdk_pkt_offload_flags (u8 * s, va_list * va) -{ - u64 *ol_flags = va_arg (*va, u64 *); - uword indent = format_get_indent (s) + 2; - - if (!*ol_flags) - return s; - - s = format (s, "Packet Offload Flags"); - -#define _(F, S) \ - if (*ol_flags & F) \ - { \ - s = format (s, "\n%U%s (0x%04x) %s", \ - format_white_space, indent, #F, F, S); \ - } - - foreach_dpdk_pkt_offload_flag -#undef _ - return s; -} - -u8 * -format_dpdk_rte_mbuf_vlan (u8 * s, va_list * va) -{ - ethernet_vlan_header_tv_t *vlan_hdr = - va_arg (*va, ethernet_vlan_header_tv_t *); - - if (clib_net_to_host_u16 (vlan_hdr->type) == ETHERNET_TYPE_DOT1AD) - { - s = format (s, "%U 802.1q vlan ", - format_ethernet_vlan_tci, - clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); - vlan_hdr++; - } - - s = format (s, "%U", - format_ethernet_vlan_tci, - clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id)); - - return s; -} - -u8 * -format_dpdk_rte_mbuf (u8 * s, va_list * va) -{ - struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *); - ethernet_header_t *eth_hdr = va_arg (*va, ethernet_header_t *); - uword indent = format_get_indent (s) + 2; - - s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d" - "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x, data_off %d, phys_addr 0x%x" - "\n%Upacket_type 0x%x", - mb->port, mb->nb_segs, mb->pkt_len, - format_white_space, indent, - mb->buf_len, mb->data_len, mb->ol_flags, mb->data_off, - mb->buf_physaddr, format_white_space, indent, mb->packet_type); - - if (mb->ol_flags) - s = format (s, "\n%U%U", format_white_space, indent, - format_dpdk_pkt_offload_flags, &mb->ol_flags); - - if ((mb->ol_flags & PKT_RX_VLAN_PKT) && - ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0)) - { - ethernet_vlan_header_tv_t *vlan_hdr = - ((ethernet_vlan_header_tv_t *) & (eth_hdr->type)); - s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr); - } - - if (mb->packet_type) - s = format (s, "\n%U%U", format_white_space, indent, - format_dpdk_pkt_types, &mb->packet_type); - - return s; -} - -/* FIXME is this function used? */ -#if 0 -uword -unformat_socket_mem (unformat_input_t * input, va_list * va) -{ - uword **r = va_arg (*va, uword **); - int i = 0; - u32 mem; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, ",")) - hash_set (*r, i, 1024); - else if (unformat (input, "%u,", &mem)) - hash_set (*r, i, mem); - else if (unformat (input, "%u", &mem)) - hash_set (*r, i, mem); - else - { - unformat_put_input (input); - goto done; - } - i++; - } - -done: - return 1; -} -#endif - -clib_error_t * -unformat_rss_fn (unformat_input_t * input, uword * rss_fn) -{ - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (0) - ; -#undef _ -#define _(f, s) \ - else if (unformat (input, s)) \ - *rss_fn |= f; - - foreach_dpdk_rss_hf -#undef _ - else - { - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - } - return 0; -} - -clib_error_t * -unformat_hqos (unformat_input_t * input, dpdk_device_config_hqos_t * hqos) -{ - clib_error_t *error = 0; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "hqos-thread %u", &hqos->hqos_thread)) - hqos->hqos_thread_valid = 1; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - break; - } - } - - return error; -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/hqos.c b/src/vnet/devices/dpdk/hqos.c deleted file mode 100644 index d68bc48f..00000000 --- a/src/vnet/devices/dpdk/hqos.c +++ /dev/null @@ -1,775 +0,0 @@ -/* - * Copyright(c) 2016 Intel Corporation. All rights reserved. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include /* enumerate all vlib messages */ - -#define vl_typedefs /* define message structures */ -#include -#undef vl_typedefs - -/* instantiate all the print functions we know about */ -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) -#define vl_printfun -#include -#undef vl_printfun - -#include "dpdk_priv.h" - -dpdk_main_t dpdk_main; - -/*** - * - * HQoS default configuration values - * - ***/ - -static dpdk_device_config_hqos_t hqos_params_default = { - .hqos_thread_valid = 0, - - .swq_size = 4096, - .burst_enq = 256, - .burst_deq = 220, - - /* - * Packet field to identify the subport. - * - * Default value: Since only one subport is defined by default (see below: - * n_subports_per_port = 1), the subport ID is hardcoded to 0. - */ - .pktfield0_slabpos = 0, - .pktfield0_slabmask = 0, - - /* - * Packet field to identify the pipe. - * - * Default value: Assuming Ethernet/IPv4/UDP packets, UDP payload bits 12 .. 23 - */ - .pktfield1_slabpos = 40, - .pktfield1_slabmask = 0x0000000FFF000000LLU, - - /* Packet field used as index into TC translation table to identify the traffic - * class and queue. - * - * Default value: Assuming Ethernet/IPv4 packets, IPv4 DSCP field - */ - .pktfield2_slabpos = 8, - .pktfield2_slabmask = 0x00000000000000FCLLU, - .tc_table = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - }, - - /* port */ - .port = { - .name = NULL, /* Set at init */ - .socket = 0, /* Set at init */ - .rate = 1250000000, /* Assuming 10GbE port */ - .mtu = 14 + 1500, /* Assuming Ethernet/IPv4 pkt (Ethernet FCS not included) */ - .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT, - .n_subports_per_port = 1, - .n_pipes_per_subport = 4096, - .qsize = {64, 64, 64, 64}, - .pipe_profiles = NULL, /* Set at config */ - .n_pipe_profiles = 1, - -#ifdef RTE_SCHED_RED - .red_params = { - /* Traffic Class 0 Colors Green / Yellow / Red */ - [0][0] = {.min_th = 48,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [0][1] = {.min_th = 40,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [0][2] = {.min_th = 32,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - - /* Traffic Class 1 - Colors Green / Yellow / Red */ - [1][0] = {.min_th = 48,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [1][1] = {.min_th = 40,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [1][2] = {.min_th = 32,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - - /* Traffic Class 2 - Colors Green / Yellow / Red */ - [2][0] = {.min_th = 48,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [2][1] = {.min_th = 40,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [2][2] = {.min_th = 32,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - - /* Traffic Class 3 - Colors Green / Yellow / Red */ - [3][0] = {.min_th = 48,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [3][1] = {.min_th = 40,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9}, - [3][2] = {.min_th = 32,.max_th = 64,.maxp_inv = - 10,.wq_log2 = 9} - }, -#endif /* RTE_SCHED_RED */ - }, -}; - -static struct rte_sched_subport_params hqos_subport_params_default = { - .tb_rate = 1250000000, /* 10GbE line rate (measured in bytes/second) */ - .tb_size = 1000000, - .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000}, - .tc_period = 10, -}; - -static struct rte_sched_pipe_params hqos_pipe_params_default = { - .tb_rate = 305175, /* 10GbE line rate divided by 4K pipes */ - .tb_size = 1000000, - .tc_rate = {305175, 305175, 305175, 305175}, - .tc_period = 40, -#ifdef RTE_SCHED_SUBPORT_TC_OV - .tc_ov_weight = 1, -#endif - .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, -}; - -/*** - * - * HQoS configuration - * - ***/ - -int -dpdk_hqos_validate_mask (u64 mask, u32 n) -{ - int count = __builtin_popcountll (mask); - int pos_lead = sizeof (u64) * 8 - __builtin_clzll (mask); - int pos_trail = __builtin_ctzll (mask); - int count_expected = __builtin_popcount (n - 1); - - /* Handle the exceptions */ - if (n == 0) - return -1; /* Error */ - - if ((mask == 0) && (n == 1)) - return 0; /* OK */ - - if (((mask == 0) && (n != 1)) || ((mask != 0) && (n == 1))) - return -2; /* Error */ - - /* Check that mask is contiguous */ - if ((pos_lead - pos_trail) != count) - return -3; /* Error */ - - /* Check that mask contains the expected number of bits set */ - if (count != count_expected) - return -4; /* Error */ - - return 0; /* OK */ -} - -void -dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t * - hqos, u32 pipe_profile_id) -{ - memcpy (&hqos->pipe[pipe_profile_id], &hqos_pipe_params_default, - sizeof (hqos_pipe_params_default)); -} - -void -dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos) -{ - struct rte_sched_subport_params *subport_params; - struct rte_sched_pipe_params *pipe_params; - u32 *pipe_map; - u32 i; - - memcpy (hqos, &hqos_params_default, sizeof (hqos_params_default)); - - /* pipe */ - vec_add2 (hqos->pipe, pipe_params, hqos->port.n_pipe_profiles); - - for (i = 0; i < vec_len (hqos->pipe); i++) - memcpy (&pipe_params[i], - &hqos_pipe_params_default, sizeof (hqos_pipe_params_default)); - - hqos->port.pipe_profiles = hqos->pipe; - - /* subport */ - vec_add2 (hqos->subport, subport_params, hqos->port.n_subports_per_port); - - for (i = 0; i < vec_len (hqos->subport); i++) - memcpy (&subport_params[i], - &hqos_subport_params_default, - sizeof (hqos_subport_params_default)); - - /* pipe profile */ - vec_add2 (hqos->pipe_map, - pipe_map, - hqos->port.n_subports_per_port * hqos->port.n_pipes_per_subport); - - for (i = 0; i < vec_len (hqos->pipe_map); i++) - pipe_map[i] = 0; -} - -/*** - * - * HQoS init - * - ***/ - -clib_error_t * -dpdk_port_setup_hqos (dpdk_device_t * xd, dpdk_device_config_hqos_t * hqos) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - char name[32]; - u32 subport_id, i; - int rv; - - /* Detect the set of worker threads */ - int worker_thread_first = 0; - int worker_thread_count = 0; - - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - vlib_thread_registration_t *tr = - p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - worker_thread_first = tr->first_index; - worker_thread_count = tr->count; - } - - /* Allocate the per-thread device data array */ - vec_validate_aligned (xd->hqos_wt, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - memset (xd->hqos_wt, 0, tm->n_vlib_mains * sizeof (xd->hqos_wt[0])); - - vec_validate_aligned (xd->hqos_ht, 0, CLIB_CACHE_LINE_BYTES); - memset (xd->hqos_ht, 0, sizeof (xd->hqos_ht[0])); - - /* Allocate space for one SWQ per worker thread in the I/O TX thread data structure */ - vec_validate (xd->hqos_ht->swq, worker_thread_count); - - /* SWQ */ - for (i = 0; i < worker_thread_count + 1; i++) - { - u32 swq_flags = RING_F_SP_ENQ | RING_F_SC_DEQ; - - snprintf (name, sizeof (name), "SWQ-worker%u-to-device%u", i, - xd->device_index); - xd->hqos_ht->swq[i] = - rte_ring_create (name, hqos->swq_size, xd->cpu_socket, swq_flags); - if (xd->hqos_ht->swq[i] == NULL) - return clib_error_return (0, - "SWQ-worker%u-to-device%u: rte_ring_create err", - i, xd->device_index); - } - - /* - * HQoS - */ - - /* HQoS port */ - snprintf (name, sizeof (name), "HQoS%u", xd->device_index); - hqos->port.name = strdup (name); - if (hqos->port.name == NULL) - return clib_error_return (0, "HQoS%u: strdup err", xd->device_index); - - hqos->port.socket = rte_eth_dev_socket_id (xd->device_index); - if (hqos->port.socket == SOCKET_ID_ANY) - hqos->port.socket = 0; - - xd->hqos_ht->hqos = rte_sched_port_config (&hqos->port); - if (xd->hqos_ht->hqos == NULL) - return clib_error_return (0, "HQoS%u: rte_sched_port_config err", - xd->device_index); - - /* HQoS subport */ - for (subport_id = 0; subport_id < hqos->port.n_subports_per_port; - subport_id++) - { - u32 pipe_id; - - rv = - rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, - &hqos->subport[subport_id]); - if (rv) - return clib_error_return (0, - "HQoS%u subport %u: rte_sched_subport_config err (%d)", - xd->device_index, subport_id, rv); - - /* HQoS pipe */ - for (pipe_id = 0; pipe_id < hqos->port.n_pipes_per_subport; pipe_id++) - { - u32 pos = subport_id * hqos->port.n_pipes_per_subport + pipe_id; - u32 profile_id = hqos->pipe_map[pos]; - - rv = - rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id, - profile_id); - if (rv) - return clib_error_return (0, - "HQoS%u subport %u pipe %u: rte_sched_pipe_config err (%d)", - xd->device_index, subport_id, pipe_id, - rv); - } - } - - /* Set up per-thread device data for the I/O TX thread */ - xd->hqos_ht->hqos_burst_enq = hqos->burst_enq; - xd->hqos_ht->hqos_burst_deq = hqos->burst_deq; - vec_validate (xd->hqos_ht->pkts_enq, 2 * hqos->burst_enq - 1); - vec_validate (xd->hqos_ht->pkts_deq, hqos->burst_deq - 1); - xd->hqos_ht->pkts_enq_len = 0; - xd->hqos_ht->swq_pos = 0; - xd->hqos_ht->flush_count = 0; - - /* Set up per-thread device data for each worker thread */ - for (i = 0; i < worker_thread_count + 1; i++) - { - u32 tid; - if (i) - tid = worker_thread_first + (i - 1); - else - tid = i; - - xd->hqos_wt[tid].swq = xd->hqos_ht->swq[i]; - xd->hqos_wt[tid].hqos_field0_slabpos = hqos->pktfield0_slabpos; - xd->hqos_wt[tid].hqos_field0_slabmask = hqos->pktfield0_slabmask; - xd->hqos_wt[tid].hqos_field0_slabshr = - __builtin_ctzll (hqos->pktfield0_slabmask); - xd->hqos_wt[tid].hqos_field1_slabpos = hqos->pktfield1_slabpos; - xd->hqos_wt[tid].hqos_field1_slabmask = hqos->pktfield1_slabmask; - xd->hqos_wt[tid].hqos_field1_slabshr = - __builtin_ctzll (hqos->pktfield1_slabmask); - xd->hqos_wt[tid].hqos_field2_slabpos = hqos->pktfield2_slabpos; - xd->hqos_wt[tid].hqos_field2_slabmask = hqos->pktfield2_slabmask; - xd->hqos_wt[tid].hqos_field2_slabshr = - __builtin_ctzll (hqos->pktfield2_slabmask); - memcpy (xd->hqos_wt[tid].hqos_tc_table, hqos->tc_table, - sizeof (hqos->tc_table)); - } - - return 0; -} - -/*** - * - * HQoS run-time - * - ***/ -/* - * dpdk_hqos_thread - Contains the main loop of an HQoS thread. - * - * w - * Information for the current thread - */ -static_always_inline void -dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm) -{ - dpdk_main_t *dm = &dpdk_main; - u32 cpu_index = vm->cpu_index; - u32 dev_pos; - - dev_pos = 0; - while (1) - { - vlib_worker_thread_barrier_check (); - - u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); - if (dev_pos >= n_devs) - dev_pos = 0; - - dpdk_device_and_queue_t *dq = - vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); - dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); - - dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; - u32 device_index = xd->device_index; - u16 queue_id = dq->queue_id; - - struct rte_mbuf **pkts_enq = hqos->pkts_enq; - u32 pkts_enq_len = hqos->pkts_enq_len; - u32 swq_pos = hqos->swq_pos; - u32 n_swq = vec_len (hqos->swq), i; - u32 flush_count = hqos->flush_count; - - for (i = 0; i < n_swq; i++) - { - /* Get current SWQ for this device */ - struct rte_ring *swq = hqos->swq[swq_pos]; - - /* Read SWQ burst to packet buffer of this device */ - pkts_enq_len += rte_ring_sc_dequeue_burst (swq, - (void **) - &pkts_enq[pkts_enq_len], - hqos->hqos_burst_enq); - - /* Get next SWQ for this device */ - swq_pos++; - if (swq_pos >= n_swq) - swq_pos = 0; - hqos->swq_pos = swq_pos; - - /* HWQ TX enqueue when burst available */ - if (pkts_enq_len >= hqos->hqos_burst_enq) - { - u32 n_pkts = rte_eth_tx_burst (device_index, - (uint16_t) queue_id, - pkts_enq, - (uint16_t) pkts_enq_len); - - for (; n_pkts < pkts_enq_len; n_pkts++) - rte_pktmbuf_free (pkts_enq[n_pkts]); - - pkts_enq_len = 0; - flush_count = 0; - break; - } - } - if (pkts_enq_len) - { - flush_count++; - if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) - { - rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); - - pkts_enq_len = 0; - flush_count = 0; - } - } - hqos->pkts_enq_len = pkts_enq_len; - hqos->flush_count = flush_count; - - /* Advance to next device */ - dev_pos++; - } -} - -static_always_inline void -dpdk_hqos_thread_internal (vlib_main_t * vm) -{ - dpdk_main_t *dm = &dpdk_main; - u32 cpu_index = vm->cpu_index; - u32 dev_pos; - - dev_pos = 0; - while (1) - { - vlib_worker_thread_barrier_check (); - - u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]); - if (PREDICT_FALSE (n_devs == 0)) - { - dev_pos = 0; - continue; - } - if (dev_pos >= n_devs) - dev_pos = 0; - - dpdk_device_and_queue_t *dq = - vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos); - dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device); - - dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht; - u32 device_index = xd->device_index; - u16 queue_id = dq->queue_id; - - struct rte_mbuf **pkts_enq = hqos->pkts_enq; - struct rte_mbuf **pkts_deq = hqos->pkts_deq; - u32 pkts_enq_len = hqos->pkts_enq_len; - u32 swq_pos = hqos->swq_pos; - u32 n_swq = vec_len (hqos->swq), i; - u32 flush_count = hqos->flush_count; - - /* - * SWQ dequeue and HQoS enqueue for current device - */ - for (i = 0; i < n_swq; i++) - { - /* Get current SWQ for this device */ - struct rte_ring *swq = hqos->swq[swq_pos]; - - /* Read SWQ burst to packet buffer of this device */ - pkts_enq_len += rte_ring_sc_dequeue_burst (swq, - (void **) - &pkts_enq[pkts_enq_len], - hqos->hqos_burst_enq); - - /* Get next SWQ for this device */ - swq_pos++; - if (swq_pos >= n_swq) - swq_pos = 0; - hqos->swq_pos = swq_pos; - - /* HQoS enqueue when burst available */ - if (pkts_enq_len >= hqos->hqos_burst_enq) - { - rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); - - pkts_enq_len = 0; - flush_count = 0; - break; - } - } - if (pkts_enq_len) - { - flush_count++; - if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD)) - { - rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len); - - pkts_enq_len = 0; - flush_count = 0; - } - } - hqos->pkts_enq_len = pkts_enq_len; - hqos->flush_count = flush_count; - - /* - * HQoS dequeue and HWQ TX enqueue for current device - */ - { - u32 pkts_deq_len, n_pkts; - - pkts_deq_len = rte_sched_port_dequeue (hqos->hqos, - pkts_deq, - hqos->hqos_burst_deq); - - for (n_pkts = 0; n_pkts < pkts_deq_len;) - n_pkts += rte_eth_tx_burst (device_index, - (uint16_t) queue_id, - &pkts_deq[n_pkts], - (uint16_t) (pkts_deq_len - n_pkts)); - } - - /* Advance to next device */ - dev_pos++; - } -} - -void -dpdk_hqos_thread (vlib_worker_thread_t * w) -{ - vlib_main_t *vm; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_main_t *dm = &dpdk_main; - - vm = vlib_get_main (); - - ASSERT (vm->cpu_index == os_get_cpu_number ()); - - clib_time_init (&vm->clib_time); - clib_mem_set_heap (w->thread_mheap); - - /* Wait until the dpdk init sequence is complete */ - while (tm->worker_thread_release == 0) - vlib_worker_thread_barrier_check (); - - if (vec_len (dm->devices_by_hqos_cpu[vm->cpu_index]) == 0) - return - clib_error - ("current I/O TX thread does not have any devices assigned to it"); - - if (DPDK_HQOS_DBG_BYPASS) - dpdk_hqos_thread_internal_hqos_dbg_bypass (vm); - else - dpdk_hqos_thread_internal (vm); -} - -void -dpdk_hqos_thread_fn (void *arg) -{ - vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; - vlib_worker_thread_init (w); - dpdk_hqos_thread (w); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_THREAD (hqos_thread_reg, static) = -{ - .name = "hqos-threads", - .short_name = "hqos-threads", - .function = dpdk_hqos_thread_fn, -}; -/* *INDENT-ON* */ - -/* - * HQoS run-time code to be called by the worker threads - */ -#define BITFIELD(byte_array, slab_pos, slab_mask, slab_shr) \ -({ \ - u64 slab = *((u64 *) &byte_array[slab_pos]); \ - u64 val = (rte_be_to_cpu_64(slab) & slab_mask) >> slab_shr; \ - val; \ -}) - -#define RTE_SCHED_PORT_HIERARCHY(subport, pipe, traffic_class, queue, color) \ - ((((u64) (queue)) & 0x3) | \ - ((((u64) (traffic_class)) & 0x3) << 2) | \ - ((((u64) (color)) & 0x3) << 4) | \ - ((((u64) (subport)) & 0xFFFF) << 16) | \ - ((((u64) (pipe)) & 0xFFFFFFFF) << 32)) - -void -dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos, - struct rte_mbuf **pkts, u32 n_pkts) -{ - u32 i; - - for (i = 0; i < (n_pkts & (~0x3)); i += 4) - { - struct rte_mbuf *pkt0 = pkts[i]; - struct rte_mbuf *pkt1 = pkts[i + 1]; - struct rte_mbuf *pkt2 = pkts[i + 2]; - struct rte_mbuf *pkt3 = pkts[i + 3]; - - u8 *pkt0_data = rte_pktmbuf_mtod (pkt0, u8 *); - u8 *pkt1_data = rte_pktmbuf_mtod (pkt1, u8 *); - u8 *pkt2_data = rte_pktmbuf_mtod (pkt2, u8 *); - u8 *pkt3_data = rte_pktmbuf_mtod (pkt3, u8 *); - - u64 pkt0_subport = BITFIELD (pkt0_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt0_pipe = BITFIELD (pkt0_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt0_dscp = BITFIELD (pkt0_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt0_tc = hqos->hqos_tc_table[pkt0_dscp & 0x3F] >> 2; - u32 pkt0_tc_q = hqos->hqos_tc_table[pkt0_dscp & 0x3F] & 0x3; - - u64 pkt1_subport = BITFIELD (pkt1_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt1_pipe = BITFIELD (pkt1_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt1_dscp = BITFIELD (pkt1_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt1_tc = hqos->hqos_tc_table[pkt1_dscp & 0x3F] >> 2; - u32 pkt1_tc_q = hqos->hqos_tc_table[pkt1_dscp & 0x3F] & 0x3; - - u64 pkt2_subport = BITFIELD (pkt2_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt2_pipe = BITFIELD (pkt2_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt2_dscp = BITFIELD (pkt2_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt2_tc = hqos->hqos_tc_table[pkt2_dscp & 0x3F] >> 2; - u32 pkt2_tc_q = hqos->hqos_tc_table[pkt2_dscp & 0x3F] & 0x3; - - u64 pkt3_subport = BITFIELD (pkt3_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt3_pipe = BITFIELD (pkt3_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt3_dscp = BITFIELD (pkt3_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt3_tc = hqos->hqos_tc_table[pkt3_dscp & 0x3F] >> 2; - u32 pkt3_tc_q = hqos->hqos_tc_table[pkt3_dscp & 0x3F] & 0x3; - - u64 pkt0_sched = RTE_SCHED_PORT_HIERARCHY (pkt0_subport, - pkt0_pipe, - pkt0_tc, - pkt0_tc_q, - 0); - u64 pkt1_sched = RTE_SCHED_PORT_HIERARCHY (pkt1_subport, - pkt1_pipe, - pkt1_tc, - pkt1_tc_q, - 0); - u64 pkt2_sched = RTE_SCHED_PORT_HIERARCHY (pkt2_subport, - pkt2_pipe, - pkt2_tc, - pkt2_tc_q, - 0); - u64 pkt3_sched = RTE_SCHED_PORT_HIERARCHY (pkt3_subport, - pkt3_pipe, - pkt3_tc, - pkt3_tc_q, - 0); - - pkt0->hash.sched.lo = pkt0_sched & 0xFFFFFFFF; - pkt0->hash.sched.hi = pkt0_sched >> 32; - pkt1->hash.sched.lo = pkt1_sched & 0xFFFFFFFF; - pkt1->hash.sched.hi = pkt1_sched >> 32; - pkt2->hash.sched.lo = pkt2_sched & 0xFFFFFFFF; - pkt2->hash.sched.hi = pkt2_sched >> 32; - pkt3->hash.sched.lo = pkt3_sched & 0xFFFFFFFF; - pkt3->hash.sched.hi = pkt3_sched >> 32; - } - - for (; i < n_pkts; i++) - { - struct rte_mbuf *pkt = pkts[i]; - - u8 *pkt_data = rte_pktmbuf_mtod (pkt, u8 *); - - u64 pkt_subport = BITFIELD (pkt_data, hqos->hqos_field0_slabpos, - hqos->hqos_field0_slabmask, - hqos->hqos_field0_slabshr); - u64 pkt_pipe = BITFIELD (pkt_data, hqos->hqos_field1_slabpos, - hqos->hqos_field1_slabmask, - hqos->hqos_field1_slabshr); - u64 pkt_dscp = BITFIELD (pkt_data, hqos->hqos_field2_slabpos, - hqos->hqos_field2_slabmask, - hqos->hqos_field2_slabshr); - u32 pkt_tc = hqos->hqos_tc_table[pkt_dscp & 0x3F] >> 2; - u32 pkt_tc_q = hqos->hqos_tc_table[pkt_dscp & 0x3F] & 0x3; - - u64 pkt_sched = RTE_SCHED_PORT_HIERARCHY (pkt_subport, - pkt_pipe, - pkt_tc, - pkt_tc_q, - 0); - - pkt->hash.sched.lo = pkt_sched & 0xFFFFFFFF; - pkt->hash.sched.hi = pkt_sched >> 32; - } -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c deleted file mode 100755 index 29423e15..00000000 --- a/src/vnet/devices/dpdk/init.c +++ /dev/null @@ -1,1801 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dpdk_priv.h" - -dpdk_main_t dpdk_main; - -/* force linker to link functions used by vlib and declared weak */ -void *vlib_weakly_linked_functions[] = { - &rte_pktmbuf_init, - &rte_pktmbuf_pool_init, -}; - -#define LINK_STATE_ELOGS 0 - -#define DEFAULT_HUGE_DIR "/run/vpp/hugepages" -#define VPP_RUN_DIR "/run/vpp" - -/* Port configuration, mildly modified Intel app values */ - -static struct rte_eth_conf port_conf_template = { - .rxmode = { - .split_hdr_size = 0, - .header_split = 0, /**< Header Split disabled */ - .hw_ip_checksum = 0, /**< IP checksum offload disabled */ - .hw_vlan_filter = 0, /**< VLAN filtering disabled */ - .hw_strip_crc = 0, /**< CRC stripped by hardware */ - }, - .txmode = { - .mq_mode = ETH_MQ_TX_NONE, - }, -}; - -clib_error_t * -dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) -{ - int rv; - int j; - - ASSERT (os_get_cpu_number () == 0); - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0); - rte_eth_dev_stop (xd->device_index); - } - - rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, - xd->tx_q_used, &xd->port_conf); - - if (rv < 0) - return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); - - /* Set up one TX-queue per worker thread */ - for (j = 0; j < xd->tx_q_used; j++) - { - rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, - xd->cpu_socket, &xd->tx_conf); - - /* retry with any other CPU socket */ - if (rv < 0) - rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, - SOCKET_ID_ANY, &xd->tx_conf); - if (rv < 0) - break; - } - - if (rv < 0) - return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", - xd->device_index, rv); - - for (j = 0; j < xd->rx_q_used; j++) - { - - rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, - xd->cpu_socket, 0, - dm-> - pktmbuf_pools[xd->cpu_socket_id_by_queue - [j]]); - - /* retry with any other CPU socket */ - if (rv < 0) - rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, - SOCKET_ID_ANY, 0, - dm-> - pktmbuf_pools[xd->cpu_socket_id_by_queue - [j]]); - if (rv < 0) - return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", - xd->device_index, rv); - } - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - int rv; - rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } - return 0; -} - -static u32 -dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - u32 old = 0; - - if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC (flags)) - { - old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0; - - if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) - xd->flags |= DPDK_DEVICE_FLAG_PROMISC; - else - xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC; - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) - rte_eth_promiscuous_enable (xd->device_index); - else - rte_eth_promiscuous_disable (xd->device_index); - } - } - else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags)) - { - /* - * DAW-FIXME: The Cisco VIC firmware does not provide an api for a - * driver to dynamically change the mtu. If/when the - * VIC firmware gets fixed, then this should be removed. - */ - if (xd->pmd == VNET_DPDK_PMD_ENIC) - { - struct rte_eth_dev_info dev_info; - - /* - * Restore mtu to what has been set by CIMC in the firmware cfg. - */ - rte_eth_dev_info_get (xd->device_index, &dev_info); - hi->max_packet_bytes = dev_info.max_rx_pktlen; - - vlib_cli_output (vlib_get_main (), - "Cisco VIC mtu can only be changed " - "using CIMC then rebooting the server!"); - } - else - { - int rv; - - xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - rte_eth_dev_stop (xd->device_index); - - rv = rte_eth_dev_configure - (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); - - if (rv < 0) - vlib_cli_output (vlib_get_main (), - "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); - - rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - int rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } - } - } - return old; -} - -void -dpdk_device_lock_init (dpdk_device_t * xd) -{ - int q; - vec_validate (xd->lockp, xd->tx_q_used - 1); - for (q = 0; q < xd->tx_q_used; q++) - { - xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); - memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES); - } -} - -void -dpdk_device_lock_free (dpdk_device_t * xd) -{ - int q; - - for (q = 0; q < vec_len (xd->lockp); q++) - clib_mem_free ((void *) xd->lockp[q]); - vec_free (xd->lockp); - xd->lockp = 0; -} - -static clib_error_t * -dpdk_lib_init (dpdk_main_t * dm) -{ - u32 nports; - u32 nb_desc = 0; - int i; - clib_error_t *error; - vlib_main_t *vm = vlib_get_main (); - vlib_thread_main_t *tm = vlib_get_thread_main (); - vnet_sw_interface_t *sw; - vnet_hw_interface_t *hi; - dpdk_device_t *xd; - vlib_pci_addr_t last_pci_addr; - u32 last_pci_addr_port = 0; - vlib_thread_registration_t *tr, *tr_hqos; - uword *p, *p_hqos; - - u32 next_cpu = 0, next_hqos_cpu = 0; - u8 af_packet_port_id = 0; - last_pci_addr.as_u32 = ~0; - - dm->input_cpu_first_index = 0; - dm->input_cpu_count = 1; - - /* find out which cpus will be used for input */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - dm->input_cpu_first_index = tr->first_index; - dm->input_cpu_count = tr->count; - } - - vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - dm->hqos_cpu_first_index = 0; - dm->hqos_cpu_count = 0; - - /* find out which cpus will be used for I/O TX */ - p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads"); - tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0; - - if (tr_hqos && tr_hqos->count > 0) - { - dm->hqos_cpu_first_index = tr_hqos->first_index; - dm->hqos_cpu_count = tr_hqos->count; - } - - vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - nports = rte_eth_dev_count (); - if (nports < 1) - { - clib_warning ("DPDK drivers found no ports..."); - } - - if (CLIB_DEBUG > 0) - clib_warning ("DPDK drivers found %d ports...", nports); - - /* - * All buffers are all allocated from the same rte_mempool. - * Thus they all have the same number of data bytes. - */ - dm->vlib_buffer_free_list_index = - vlib_buffer_get_or_create_free_list (vm, - VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, - "dpdk rx"); - - if (dm->conf->enable_tcp_udp_checksum) - dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT - | IP_BUFFER_L4_CHECKSUM_COMPUTED); - - for (i = 0; i < nports; i++) - { - u8 addr[6]; - u8 vlan_strip = 0; - int j; - struct rte_eth_dev_info dev_info; - clib_error_t *rv; - struct rte_eth_link l; - dpdk_device_config_t *devconf = 0; - vlib_pci_addr_t pci_addr; - uword *p = 0; - - rte_eth_dev_info_get (i, &dev_info); - if (dev_info.pci_dev) /* bonded interface has no pci info */ - { - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - p = - hash_get (dm->conf->device_config_index_by_pci_addr, - pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - /* Create vnet interface */ - vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); - xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; - xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; - xd->cpu_socket = (i8) rte_eth_dev_socket_id (i); - - /* Handle interface naming for devices with multiple ports sharing same PCI ID */ - if (dev_info.pci_dev) - { - struct rte_eth_dev_info di = { 0 }; - rte_eth_dev_info_get (i + 1, &di); - if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 && - memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr, - sizeof (struct rte_pci_addr)) == 0) - { - xd->interface_name_suffix = format (0, "0"); - last_pci_addr.as_u32 = pci_addr.as_u32; - last_pci_addr_port = i; - } - else if (pci_addr.as_u32 == last_pci_addr.as_u32) - { - xd->interface_name_suffix = - format (0, "%u", i - last_pci_addr_port); - } - else - { - last_pci_addr.as_u32 = ~0; - } - } - else - last_pci_addr.as_u32 = ~0; - - clib_memcpy (&xd->tx_conf, &dev_info.default_txconf, - sizeof (struct rte_eth_txconf)); - if (dm->conf->no_multi_seg) - { - xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; - port_conf_template.rxmode.jumbo_frame = 0; - } - else - { - xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; - port_conf_template.rxmode.jumbo_frame = 1; - xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG; - } - - clib_memcpy (&xd->port_conf, &port_conf_template, - sizeof (struct rte_eth_conf)); - - xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains); - - if (devconf->num_tx_queues > 0 - && devconf->num_tx_queues < xd->tx_q_used) - xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues); - - if (devconf->num_rx_queues > 1 && dm->use_rss == 0) - { - dm->use_rss = 1; - } - - if (devconf->num_rx_queues > 1 - && dev_info.max_rx_queues >= devconf->num_rx_queues) - { - xd->rx_q_used = devconf->num_rx_queues; - xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; - if (devconf->rss_fn == 0) - xd->port_conf.rx_adv_conf.rss_conf.rss_hf = - ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; - else - xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn; - } - else - xd->rx_q_used = 1; - - xd->flags |= DPDK_DEVICE_FLAG_PMD; - - /* workaround for drivers not setting driver_name */ - if ((!dev_info.driver_name) && (dev_info.pci_dev)) - dev_info.driver_name = dev_info.pci_dev->driver->driver.name; - - ASSERT (dev_info.driver_name); - - if (!xd->pmd) - { - - -#define _(s,f) else if (dev_info.driver_name && \ - !strcmp(dev_info.driver_name, s)) \ - xd->pmd = VNET_DPDK_PMD_##f; - if (0) - ; - foreach_dpdk_pmd -#undef _ - else - xd->pmd = VNET_DPDK_PMD_UNKNOWN; - - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; - xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; - - switch (xd->pmd) - { - /* 1G adapters */ - case VNET_DPDK_PMD_E1000EM: - case VNET_DPDK_PMD_IGB: - case VNET_DPDK_PMD_IGBVF: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - break; - - /* 10G adapters */ - case VNET_DPDK_PMD_IXGBE: - case VNET_DPDK_PMD_IXGBEVF: - case VNET_DPDK_PMD_THUNDERX: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - case VNET_DPDK_PMD_DPAA2: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - - /* Cisco VIC */ - case VNET_DPDK_PMD_ENIC: - rte_eth_link_get_nowait (i, &l); - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - if (l.link_speed == 40000) - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - else - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - - /* Intel Fortville */ - case VNET_DPDK_PMD_I40E: - case VNET_DPDK_PMD_I40EVF: - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - - switch (dev_info.pci_dev->id.device_id) - { - case I40E_DEV_ID_10G_BASE_T: - case I40E_DEV_ID_SFP_XL710: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - case I40E_DEV_ID_QSFP_A: - case I40E_DEV_ID_QSFP_B: - case I40E_DEV_ID_QSFP_C: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - case I40E_DEV_ID_VF: - rte_eth_link_get_nowait (i, &l); - xd->port_type = l.link_speed == 10000 ? - VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; - break; - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - break; - - case VNET_DPDK_PMD_CXGBE: - switch (dev_info.pci_dev->id.device_id) - { - case 0x540d: /* T580-CR */ - case 0x5410: /* T580-LP-cr */ - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - case 0x5403: /* T540-CR */ - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - break; - - case VNET_DPDK_PMD_MLX5: - { - char *pn_100g[] = { "MCX415A-CCAT", "MCX416A-CCAT", 0 }; - char *pn_40g[] = { "MCX413A-BCAT", "MCX414A-BCAT", - "MCX415A-BCAT", "MCX416A-BCAT", "MCX4131A-BCAT", 0 - }; - char *pn_10g[] = { "MCX4111A-XCAT", "MCX4121A-XCAT", 0 }; - - vlib_pci_device_t *pd = vlib_get_pci_device (&pci_addr); - u8 *pn = 0; - char **c; - int found = 0; - pn = format (0, "%U%c", - format_vlib_pci_vpd, pd->vpd_r, "PN", 0); - - if (!pn) - break; - - c = pn_100g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_100G; - break; - } - c++; - } - - c = pn_40g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - } - c++; - } - - c = pn_10g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - } - c++; - } - - vec_free (pn); - } - - break; - /* Intel Red Rock Canyon */ - case VNET_DPDK_PMD_FM10K: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; - break; - - /* virtio */ - case VNET_DPDK_PMD_VIRTIO: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO; - xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO; - break; - - /* vmxnet3 */ - case VNET_DPDK_PMD_VMXNET3: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; - break; - - case VNET_DPDK_PMD_AF_PACKET: - xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET; - xd->af_packet_port_id = af_packet_port_id++; - break; - - case VNET_DPDK_PMD_BOND: - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; - break; - - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - - if (devconf->num_rx_desc) - xd->nb_rx_desc = devconf->num_rx_desc; - - if (devconf->num_tx_desc) - xd->nb_tx_desc = devconf->num_tx_desc; - } - - /* - * Ensure default mtu is not > the mtu read from the hardware. - * Otherwise rte_eth_dev_configure() will fail and the port will - * not be available. - */ - if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen) - { - /* - * This device does not support the platforms's max frame - * size. Use it's advertised mru instead. - */ - xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen; - } - else - { - xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES; - - /* - * Some platforms do not account for Ethernet FCS (4 bytes) in - * MTU calculations. To interop with them increase mru but only - * if the device's settings can support it. - */ - if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) && - xd->port_conf.rxmode.hw_strip_crc) - { - /* - * Allow additional 4 bytes (for Ethernet FCS). These bytes are - * stripped by h/w and so will not consume any buffer memory. - */ - xd->port_conf.rxmode.max_rx_pkt_len += 4; - } - } - - if (xd->pmd == VNET_DPDK_PMD_AF_PACKET) - { - f64 now = vlib_time_now (vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - clib_memcpy (addr + 2, &rnd, sizeof (rnd)); - addr[0] = 2; - addr[1] = 0xfe; - } - else - rte_eth_macaddr_get (i, (struct ether_addr *) addr); - - if (xd->tx_q_used < tm->n_vlib_mains) - dpdk_device_lock_init (xd); - - xd->device_index = xd - dm->devices; - ASSERT (i == xd->device_index); - xd->per_interface_next_index = ~0; - - /* assign interface to input thread */ - dpdk_device_and_queue_t *dq; - int q; - - if (devconf->workers) - { - int i; - q = 0; - /* *INDENT-OFF* */ - clib_bitmap_foreach (i, devconf->workers, ({ - int cpu = dm->input_cpu_first_index + i; - unsigned lcore = vlib_worker_threads[cpu].lcore_id; - vec_validate(xd->cpu_socket_id_by_queue, q); - xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); - vec_add2(dm->devices_by_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = q++; - })); - /* *INDENT-ON* */ - } - else - for (q = 0; q < xd->rx_q_used; q++) - { - int cpu = dm->input_cpu_first_index + next_cpu; - unsigned lcore = vlib_worker_threads[cpu].lcore_id; - - /* - * numa node for worker thread handling this queue - * needed for taking buffers from the right mempool - */ - vec_validate (xd->cpu_socket_id_by_queue, q); - xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore); - - /* - * construct vector of (device,queue) pairs for each worker thread - */ - vec_add2 (dm->devices_by_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = q; - - next_cpu++; - if (next_cpu == dm->input_cpu_count) - next_cpu = 0; - } - - - if (devconf->hqos_enabled) - { - xd->flags |= DPDK_DEVICE_FLAG_HQOS; - - if (devconf->hqos.hqos_thread_valid) - { - int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread; - - if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count) - return clib_error_return (0, "invalid HQoS thread index"); - - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = 0; - } - else - { - int cpu = dm->hqos_cpu_first_index + next_hqos_cpu; - - if (dm->hqos_cpu_count == 0) - return clib_error_return (0, "no HQoS threads available"); - - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = 0; - - next_hqos_cpu++; - if (next_hqos_cpu == dm->hqos_cpu_count) - next_hqos_cpu = 0; - - devconf->hqos.hqos_thread_valid = 1; - devconf->hqos.hqos_thread = cpu; - } - } - - vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j < tm->n_vlib_mains; j++) - { - vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc, - sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->tx_vectors[j]); - } - - vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j < xd->rx_q_used; j++) - { - vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, - CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->rx_vectors[j]); - } - - vec_validate_aligned (xd->d_trace_buffers, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); - - rv = dpdk_port_setup (dm, xd); - - if (rv) - return rv; - - if (devconf->hqos_enabled) - { - rv = dpdk_port_setup_hqos (xd, &devconf->hqos); - if (rv) - return rv; - } - - /* count the number of descriptors used for this device */ - nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; - - error = ethernet_register_interface - (dm->vnet_main, dpdk_device_class.index, xd->device_index, - /* ethernet address */ addr, - &xd->vlib_hw_if_index, dpdk_flag_change); - if (error) - return error; - - sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); - xd->vlib_sw_if_index = sw->sw_if_index; - hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); - - /* - * DAW-FIXME: The Cisco VIC firmware does not provide an api for a - * driver to dynamically change the mtu. If/when the - * VIC firmware gets fixed, then this should be removed. - */ - if (xd->pmd == VNET_DPDK_PMD_ENIC) - { - /* - * Initialize mtu to what has been set by CIMC in the firmware cfg. - */ - hi->max_packet_bytes = dev_info.max_rx_pktlen; - if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF) - vlan_strip = 1; /* remove vlan tag from VIC port by default */ - else - clib_warning ("VLAN strip disabled for interface\n"); - } - else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON) - vlan_strip = 1; - - if (vlan_strip) - { - int vlan_off; - vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); - vlan_off |= ETH_VLAN_STRIP_OFFLOAD; - xd->port_conf.rxmode.hw_vlan_strip = vlan_off; - if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0) - clib_warning ("VLAN strip enabled for interface\n"); - else - clib_warning ("VLAN strip cannot be supported by interface\n"); - } - - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = - xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t); - - rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); - } - - if (nb_desc > dm->conf->num_mbufs) - clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", - dm->conf->num_mbufs, nb_desc); - - return 0; -} - -static void -dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) -{ - vlib_pci_main_t *pm = &pci_main; - clib_error_t *error; - vlib_pci_device_t *d; - u8 *pci_addr = 0; - int num_whitelisted = vec_len (conf->dev_confs); - - /* *INDENT-OFF* */ - pool_foreach (d, pm->pci_devs, ({ - dpdk_device_config_t * devconf = 0; - vec_reset_length (pci_addr); - pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); - - if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO) - continue; - - if (num_whitelisted) - { - uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32); - - if (!p) - continue; - - devconf = pool_elt_at_index (conf->dev_confs, p[0]); - } - - /* virtio */ - if (d->vendor_id == 0x1af4 && d->device_id == 0x1000) - ; - /* vmxnet3 */ - else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0) - ; - /* all Intel devices */ - else if (d->vendor_id == 0x8086) - ; - /* Cisco VIC */ - else if (d->vendor_id == 0x1137 && d->device_id == 0x0043) - ; - /* Chelsio T4/T5 */ - else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000) - ; - else - { - clib_warning ("Unsupported Ethernet PCI device 0x%04x:0x%04x found " - "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id, - pci_addr); - continue; - } - - error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name); - - if (error) - { - if (devconf == 0) - { - pool_get (conf->dev_confs, devconf); - hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32, - devconf - conf->dev_confs); - devconf->pci_addr.as_u32 = d->bus_address.as_u32; - } - devconf->is_blacklisted = 1; - clib_error_report (error); - } - })); - /* *INDENT-ON* */ - vec_free (pci_addr); -} - -static clib_error_t * -dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, - unformat_input_t * input, u8 is_default) -{ - clib_error_t *error = 0; - uword *p; - dpdk_device_config_t *devconf; - unformat_input_t sub_input; - - if (is_default) - { - devconf = &conf->default_devconf; - } - else - { - p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32); - - if (!p) - { - pool_get (conf->dev_confs, devconf); - hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32, - devconf - conf->dev_confs); - } - else - return clib_error_return (0, - "duplicate configuration for PCI address %U", - format_vlib_pci_addr, &pci_addr); - } - - devconf->pci_addr.as_u32 = pci_addr.as_u32; - devconf->hqos_enabled = 0; - dpdk_device_config_hqos_default (&devconf->hqos); - - if (!input) - return 0; - - unformat_skip_white_space (input); - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues)) - ; - else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues)) - ; - else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc)) - ; - else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc)) - ; - else if (unformat (input, "workers %U", unformat_bitmap_list, - &devconf->workers)) - ; - else - if (unformat - (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input)) - { - error = unformat_rss_fn (&sub_input, &devconf->rss_fn); - if (error) - break; - } - else if (unformat (input, "vlan-strip-offload off")) - devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; - else if (unformat (input, "vlan-strip-offload on")) - devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; - else - if (unformat - (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input)) - { - devconf->hqos_enabled = 1; - error = unformat_hqos (&sub_input, &devconf->hqos); - if (error) - break; - } - else if (unformat (input, "hqos")) - { - devconf->hqos_enabled = 1; - } - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - break; - } - } - - if (error) - return error; - - if (devconf->workers && devconf->num_rx_queues == 0) - devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers); - else if (devconf->workers && - clib_bitmap_count_set_bits (devconf->workers) != - devconf->num_rx_queues) - error = - clib_error_return (0, - "%U: number of worker threadds must be " - "equal to number of rx queues", format_vlib_pci_addr, - &pci_addr); - - return error; -} - -static clib_error_t * -dpdk_config (vlib_main_t * vm, unformat_input_t * input) -{ - clib_error_t *error = 0; - dpdk_main_t *dm = &dpdk_main; - dpdk_config_main_t *conf = &dpdk_config_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_device_config_t *devconf; - vlib_pci_addr_t pci_addr; - unformat_input_t sub_input; - u8 *s, *tmp = 0; - u8 *rte_cmd = 0, *ethname = 0; - u32 log_level; - int ret, i; - int num_whitelisted = 0; - u8 no_pci = 0; - u8 no_huge = 0; - u8 huge_dir = 0; - u8 file_prefix = 0; - u8 *socket_mem = 0; - - conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - /* Prime the pump */ - if (unformat (input, "no-hugetlb")) - { - vec_add1 (conf->eal_init_args, (u8 *) "no-huge"); - no_huge = 1; - } - - else if (unformat (input, "enable-tcp-udp-checksum")) - conf->enable_tcp_udp_checksum = 1; - - else if (unformat (input, "decimal-interface-names")) - conf->interface_name_format_decimal = 1; - - else if (unformat (input, "no-multi-seg")) - conf->no_multi_seg = 1; - - else if (unformat (input, "enable-cryptodev")) - conf->cryptodev = 1; - - else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input, - &sub_input)) - { - error = - dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input, - 1); - - if (error) - return error; - } - else - if (unformat - (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr, - unformat_vlib_cli_sub_input, &sub_input)) - { - error = dpdk_device_config (conf, pci_addr, &sub_input, 0); - - if (error) - return error; - - num_whitelisted++; - } - else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr)) - { - error = dpdk_device_config (conf, pci_addr, 0, 0); - - if (error) - return error; - - num_whitelisted++; - } - else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) - ; - else if (unformat (input, "kni %d", &conf->num_kni)) - ; - else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) - ; - else if (unformat (input, "socket-mem %s", &socket_mem)) - ; - else if (unformat (input, "no-pci")) - { - no_pci = 1; - tmp = format (0, "--no-pci%c", 0); - vec_add1 (conf->eal_init_args, tmp); - } - else if (unformat (input, "poll-sleep %d", &dm->poll_sleep)) - ; - -#define _(a) \ - else if (unformat(input, #a)) \ - { \ - tmp = format (0, "--%s%c", #a, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - } - foreach_eal_double_hyphen_predicate_arg -#undef _ -#define _(a) \ - else if (unformat(input, #a " %s", &s)) \ - { \ - if (!strncmp(#a, "huge-dir", 8)) \ - huge_dir = 1; \ - else if (!strncmp(#a, "file-prefix", 11)) \ - file_prefix = 1; \ - tmp = format (0, "--%s%c", #a, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - vec_add1 (s, 0); \ - if (!strncmp(#a, "vdev", 4)) \ - if (strstr((char*)s, "af_packet")) \ - clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \ - vec_add1 (conf->eal_init_args, s); \ - } - foreach_eal_double_hyphen_arg -#undef _ -#define _(a,b) \ - else if (unformat(input, #a " %s", &s)) \ - { \ - tmp = format (0, "-%s%c", #b, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - vec_add1 (s, 0); \ - vec_add1 (conf->eal_init_args, s); \ - } - foreach_eal_single_hyphen_arg -#undef _ -#define _(a,b) \ - else if (unformat(input, #a " %s", &s)) \ - { \ - tmp = format (0, "-%s%c", #b, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - vec_add1 (s, 0); \ - vec_add1 (conf->eal_init_args, s); \ - conf->a##_set_manually = 1; \ - } - foreach_eal_single_hyphen_mandatory_arg -#undef _ - else if (unformat (input, "default")) - ; - - else if (unformat_skip_white_space (input)) - ; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - goto done; - } - } - - if (!conf->uio_driver_name) - conf->uio_driver_name = format (0, "uio_pci_generic%c", 0); - - /* - * Use 1G huge pages if available. - */ - if (!no_huge && !huge_dir) - { - u32 x, *mem_by_socket = 0; - uword c = 0; - u8 use_1g = 1; - u8 use_2m = 1; - u8 less_than_1g = 1; - int rv; - - umount (DEFAULT_HUGE_DIR); - - /* Process "socket-mem" parameter value */ - if (vec_len (socket_mem)) - { - unformat_input_t in; - unformat_init_vector (&in, socket_mem); - while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT) - { - if (unformat (&in, "%u,", &x)) - ; - else if (unformat (&in, "%u", &x)) - ; - else if (unformat (&in, ",")) - x = 0; - else - break; - - vec_add1 (mem_by_socket, x); - - if (x > 1023) - less_than_1g = 0; - } - /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ - unformat_free (&in); - socket_mem = 0; - } - else - { - /* *INDENT-OFF* */ - clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( - { - vec_validate(mem_by_socket, c); - mem_by_socket[c] = 256; /* default per-socket mem */ - } - )); - /* *INDENT-ON* */ - } - - /* check if available enough 1GB pages for each socket */ - /* *INDENT-OFF* */ - clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( - { - int pages_avail, page_size, mem; - - vec_validate(mem_by_socket, c); - mem = mem_by_socket[c]; - - page_size = 1024; - pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); - - if (pages_avail < 0 || page_size * pages_avail < mem) - use_1g = 0; - - page_size = 2; - pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); - - if (pages_avail < 0 || page_size * pages_avail < mem) - use_2m = 0; - })); - /* *INDENT-ON* */ - - if (mem_by_socket == 0) - { - error = clib_error_return (0, "mem_by_socket NULL"); - goto done; - } - _vec_len (mem_by_socket) = c + 1; - - /* regenerate socket_mem string */ - vec_foreach_index (x, mem_by_socket) - socket_mem = format (socket_mem, "%s%u", - socket_mem ? "," : "", mem_by_socket[x]); - socket_mem = format (socket_mem, "%c", 0); - - vec_free (mem_by_socket); - - rv = mkdir (VPP_RUN_DIR, 0755); - if (rv && errno != EEXIST) - { - error = clib_error_return (0, "mkdir '%s' failed errno %d", - VPP_RUN_DIR, errno); - goto done; - } - - rv = mkdir (DEFAULT_HUGE_DIR, 0755); - if (rv && errno != EEXIST) - { - error = clib_error_return (0, "mkdir '%s' failed errno %d", - DEFAULT_HUGE_DIR, errno); - goto done; - } - - if (use_1g && !(less_than_1g && use_2m)) - { - rv = - mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); - } - else if (use_2m) - { - rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); - } - else - { - return clib_error_return (0, "not enough free huge pages"); - } - - if (rv) - { - error = clib_error_return (0, "mount failed %d", errno); - goto done; - } - - tmp = format (0, "--huge-dir%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); - vec_add1 (conf->eal_init_args, tmp); - if (!file_prefix) - { - tmp = format (0, "--file-prefix%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "vpp%c", 0); - vec_add1 (conf->eal_init_args, tmp); - } - } - - vec_free (rte_cmd); - vec_free (ethname); - - if (error) - return error; - - /* I'll bet that -c and -n must be the first and second args... */ - if (!conf->coremask_set_manually) - { - vlib_thread_registration_t *tr; - uword *coremask = 0; - int i; - - /* main thread core */ - coremask = clib_bitmap_set (coremask, tm->main_lcore, 1); - - for (i = 0; i < vec_len (tm->registrations); i++) - { - tr = tm->registrations[i]; - coremask = clib_bitmap_or (coremask, tr->coremask); - } - - vec_insert (conf->eal_init_args, 2, 1); - conf->eal_init_args[1] = (u8 *) "-c"; - tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0); - conf->eal_init_args[2] = tmp; - clib_bitmap_free (coremask); - } - - if (!conf->nchannels_set_manually) - { - vec_insert (conf->eal_init_args, 2, 3); - conf->eal_init_args[3] = (u8 *) "-n"; - tmp = format (0, "%d", conf->nchannels); - conf->eal_init_args[4] = tmp; - } - - if (no_pci == 0 && geteuid () == 0) - dpdk_bind_devices_to_uio (conf); - -#define _(x) \ - if (devconf->x == 0 && conf->default_devconf.x > 0) \ - devconf->x = conf->default_devconf.x ; - - /* *INDENT-OFF* */ - pool_foreach (devconf, conf->dev_confs, ({ - - /* default per-device config items */ - foreach_dpdk_device_config_item - - /* add DPDK EAL whitelist/blacklist entry */ - if (num_whitelisted > 0 && devconf->is_blacklisted == 0) - { - tmp = format (0, "-w%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); - vec_add1 (conf->eal_init_args, tmp); - } - else if (num_whitelisted == 0 && devconf->is_blacklisted != 0) - { - tmp = format (0, "-b%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); - vec_add1 (conf->eal_init_args, tmp); - } - })); - /* *INDENT-ON* */ - -#undef _ - - /* set master-lcore */ - tmp = format (0, "--master-lcore%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%u%c", tm->main_lcore, 0); - vec_add1 (conf->eal_init_args, tmp); - - /* set socket-mem */ - tmp = format (0, "--socket-mem%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%s%c", socket_mem, 0); - vec_add1 (conf->eal_init_args, tmp); - - /* NULL terminate the "argv" vector, in case of stupidity */ - vec_add1 (conf->eal_init_args, 0); - _vec_len (conf->eal_init_args) -= 1; - - /* Set up DPDK eal and packet mbuf pool early. */ - - log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE; - - rte_set_log_level (log_level); - - vm = vlib_get_main (); - - /* make copy of args as rte_eal_init tends to mess up with arg array */ - for (i = 1; i < vec_len (conf->eal_init_args); i++) - conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", - conf->eal_init_args[i]); - - ret = - rte_eal_init (vec_len (conf->eal_init_args), - (char **) conf->eal_init_args); - - /* lazy umount hugepages */ - umount2 (DEFAULT_HUGE_DIR, MNT_DETACH); - - if (ret < 0) - return clib_error_return (0, "rte_eal_init returned %d", ret); - - /* Dump the physical memory layout prior to creating the mbuf_pool */ - fprintf (stdout, "DPDK physical memory layout:\n"); - rte_dump_physmem_layout (stdout); - - /* main thread 1st */ - error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); - if (error) - return error; - - for (i = 0; i < RTE_MAX_LCORE; i++) - { - error = vlib_buffer_pool_create (vm, conf->num_mbufs, - rte_lcore_to_socket_id (i)); - if (error) - return error; - } - -done: - return error; -} - -VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk"); - -void -dpdk_update_link_state (dpdk_device_t * xd, f64 now) -{ - vnet_main_t *vnm = vnet_get_main (); - struct rte_eth_link prev_link = xd->link; - u32 hw_flags = 0; - u8 hw_flags_chg = 0; - - /* only update link state for PMD interfaces */ - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - return; - - xd->time_last_link_update = now ? now : xd->time_last_link_update; - memset (&xd->link, 0, sizeof (xd->link)); - rte_eth_link_get_nowait (xd->device_index, &xd->link); - - if (LINK_STATE_ELOGS) - { - vlib_main_t *vm = vlib_get_main (); - ELOG_TYPE_DECLARE (e) = - { - .format = - "update-link-state: sw_if_index %d, admin_up %d," - "old link_state %d new link_state %d",.format_args = "i4i1i1i1",}; - - struct - { - u32 sw_if_index; - u8 admin_up; - u8 old_link_state; - u8 new_link_state; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->sw_if_index = xd->vlib_sw_if_index; - ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0; - ed->old_link_state = (u8) - vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index); - ed->new_link_state = (u8) xd->link.link_status; - } - - if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) && - ((xd->link.link_status != 0) ^ - vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index))) - { - hw_flags_chg = 1; - hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); - } - - if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex)) - { - hw_flags_chg = 1; - switch (xd->link.link_duplex) - { - case ETH_LINK_HALF_DUPLEX: - hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX; - break; - case ETH_LINK_FULL_DUPLEX: - hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX; - break; - default: - break; - } - } - if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) - { - hw_flags_chg = 1; - switch (xd->link.link_speed) - { - case ETH_SPEED_NUM_10M: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; - break; - case ETH_SPEED_NUM_100M: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; - break; - case ETH_SPEED_NUM_1G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; - break; - case ETH_SPEED_NUM_10G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; - break; - case ETH_SPEED_NUM_40G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; - break; - case 0: - break; - default: - clib_warning ("unknown link speed %d", xd->link.link_speed); - break; - } - } - if (hw_flags_chg) - { - if (LINK_STATE_ELOGS) - { - vlib_main_t *vm = vlib_get_main (); - - ELOG_TYPE_DECLARE (e) = - { - .format = - "update-link-state: sw_if_index %d, new flags %d",.format_args - = "i4i4",}; - - struct - { - u32 sw_if_index; - u32 flags; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->sw_if_index = xd->vlib_sw_if_index; - ed->flags = hw_flags; - } - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags); - } -} - -static uword -dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - clib_error_t *error; - vnet_main_t *vnm = vnet_get_main (); - dpdk_main_t *dm = &dpdk_main; - ethernet_main_t *em = ðernet_main; - dpdk_device_t *xd; - vlib_thread_main_t *tm = vlib_get_thread_main (); - int i; - - error = dpdk_lib_init (dm); - - /* - * Turn on the input node if we found some devices to drive - * and we're not running worker threads or i/o threads - */ - - if (error == 0 && vec_len (dm->devices) > 0) - { - if (tm->n_vlib_mains == 1) - vlib_node_set_state (vm, dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); - else - for (i = 0; i < tm->n_vlib_mains; i++) - if (vec_len (dm->devices_by_cpu[i]) > 0) - vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); - } - - if (error) - clib_error_report (error); - - tm->worker_thread_release = 1; - - f64 now = vlib_time_now (vm); - vec_foreach (xd, dm->devices) - { - dpdk_update_link_state (xd, now); - } - - { - /* - * Extra set up for bond interfaces: - * 1. Setup MACs for bond interfaces and their slave links which was set - * in dpdk_port_setup() but needs to be done again here to take effect. - * 2. Set up info for bond interface related CLI support. - */ - int nports = rte_eth_dev_count (); - if (nports > 0) - { - for (i = 0; i < nports; i++) - { - struct rte_eth_dev_info dev_info; - rte_eth_dev_info_get (i, &dev_info); - if (!dev_info.driver_name) - dev_info.driver_name = dev_info.pci_dev->driver->driver.name; - - ASSERT (dev_info.driver_name); - if (strncmp (dev_info.driver_name, "rte_bond_pmd", 12) == 0) - { - u8 addr[6]; - u8 slink[16]; - int nlink = rte_eth_bond_slaves_get (i, slink, 16); - if (nlink > 0) - { - vnet_hw_interface_t *bhi; - ethernet_interface_t *bei; - int rv; - - /* Get MAC of 1st slave link */ - rte_eth_macaddr_get (slink[0], - (struct ether_addr *) addr); - /* Set MAC of bounded interface to that of 1st slave link */ - rv = - rte_eth_bond_mac_address_set (i, - (struct ether_addr *) - addr); - if (rv < 0) - clib_warning ("Failed to set MAC address"); - - /* Populate MAC of bonded interface in VPP hw tables */ - bhi = - vnet_get_hw_interface (vnm, - dm->devices[i].vlib_hw_if_index); - bei = - pool_elt_at_index (em->interfaces, bhi->hw_instance); - clib_memcpy (bhi->hw_address, addr, 6); - clib_memcpy (bei->address, addr, 6); - /* Init l3 packet size allowed on bonded interface */ - bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES; - bhi->max_l3_packet_bytes[VLIB_RX] = - bhi->max_l3_packet_bytes[VLIB_TX] = - ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t); - while (nlink >= 1) - { /* for all slave links */ - int slave = slink[--nlink]; - dpdk_device_t *sdev = &dm->devices[slave]; - vnet_hw_interface_t *shi; - vnet_sw_interface_t *ssi; - /* Add MAC to all slave links except the first one */ - if (nlink) - rte_eth_dev_mac_addr_add (slave, - (struct ether_addr *) - addr, 0); - /* Set slaves bitmap for bonded interface */ - bhi->bond_info = - clib_bitmap_set (bhi->bond_info, - sdev->vlib_hw_if_index, 1); - /* Set slave link flags on slave interface */ - shi = - vnet_get_hw_interface (vnm, sdev->vlib_hw_if_index); - ssi = - vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); - shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; - ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; - - /* Set l3 packet size allowed as the lowest of slave */ - if (bhi->max_l3_packet_bytes[VLIB_RX] > - shi->max_l3_packet_bytes[VLIB_RX]) - bhi->max_l3_packet_bytes[VLIB_RX] = - bhi->max_l3_packet_bytes[VLIB_TX] = - shi->max_l3_packet_bytes[VLIB_RX]; - - /* Set max packet size allowed as the lowest of slave */ - if (bhi->max_packet_bytes > shi->max_packet_bytes) - bhi->max_packet_bytes = shi->max_packet_bytes; - } - } - } - } - } - } - - while (1) - { - /* - * check each time through the loop in case intervals are changed - */ - f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ? - dm->link_state_poll_interval : dm->stat_poll_interval; - - vlib_process_wait_for_event_or_clock (vm, min_wait); - - if (dm->admin_up_down_in_progress) - /* skip the poll if an admin up down is in progress (on any interface) */ - continue; - - vec_foreach (xd, dm->devices) - { - f64 now = vlib_time_now (vm); - if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval) - dpdk_update_counters (xd, now); - if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) - dpdk_update_link_state (xd, now); - - } - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_process_node,static) = { - .function = dpdk_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "dpdk-process", - .process_log2_n_stack_bytes = 17, -}; -/* *INDENT-ON* */ - -int -dpdk_set_stat_poll_interval (f64 interval) -{ - if (interval < DPDK_MIN_STATS_POLL_INTERVAL) - return (VNET_API_ERROR_INVALID_VALUE); - - dpdk_main.stat_poll_interval = interval; - - return 0; -} - -int -dpdk_set_link_state_poll_interval (f64 interval) -{ - if (interval < DPDK_MIN_LINK_POLL_INTERVAL) - return (VNET_API_ERROR_INVALID_VALUE); - - dpdk_main.link_state_poll_interval = interval; - - return 0; -} - -clib_error_t * -dpdk_init (vlib_main_t * vm) -{ - dpdk_main_t *dm = &dpdk_main; - vlib_node_t *ei; - clib_error_t *error = 0; - vlib_thread_main_t *tm = vlib_get_thread_main (); - - /* verify that structs are cacheline aligned */ - STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0, - "Cache line marker must be 1st element in dpdk_device_t"); - STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) == - CLIB_CACHE_LINE_BYTES, - "Data in cache line 0 is bigger than cache line size"); - STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, - "Cache line marker must be 1st element in frame_queue_trace_t"); - - dm->vlib_main = vm; - dm->vnet_main = vnet_get_main (); - dm->conf = &dpdk_config_main; - - ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); - if (ei == 0) - return clib_error_return (0, "ethernet-input node AWOL"); - - dm->ethernet_input_node_index = ei->index; - - dm->conf->nchannels = 4; - dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; - vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); - - dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword)); - dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword)); - dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword)); - - /* $$$ use n_thread_stacks since it's known-good at this point */ - vec_validate (dm->recycle, tm->n_thread_stacks - 1); - - /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ - dm->buffer_flags_template = - (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID - | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); - - dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; - dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL; - - /* init CLI */ - if ((error = vlib_call_init_function (vm, dpdk_cli_init))) - return error; - - return error; -} - -VLIB_INIT_FUNCTION (dpdk_init); - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/cli.c b/src/vnet/devices/dpdk/ipsec/cli.c deleted file mode 100644 index f9d3a5d0..00000000 --- a/src/vnet/devices/dpdk/ipsec/cli.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -static void -dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display) -{ - dpdk_config_main_t *conf = &dpdk_config_main; - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - u32 i, skip_master; - - if (!conf->cryptodev) - { - vlib_cli_output (vm, "DPDK Cryptodev support is disabled\n"); - return; - } - - if (detail_display) - vlib_cli_output (vm, "worker\t%10s\t%15s\tdir\tdev\tqp\n", - "cipher", "auth"); - else - vlib_cli_output (vm, "worker\tcrypto device id(type)\n"); - - skip_master = vlib_num_workers () > 0; - - for (i = 0; i < tm->n_vlib_mains; i++) - { - uword key, data; - u32 cpu_index = vlib_mains[i]->cpu_index; - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - u8 *s = 0; - - if (skip_master) - { - skip_master = 0; - continue; - } - - if (!detail_display) - { - i32 last_cdev = -1; - crypto_qp_data_t *qpd; - - s = format (s, "%u\t", cpu_index); - - /* *INDENT-OFF* */ - vec_foreach (qpd, cwm->qp_data) - { - u32 dev_id = qpd->dev_id; - - if ((u16) last_cdev != dev_id) - { - struct rte_cryptodev_info cdev_info; - - rte_cryptodev_info_get (dev_id, &cdev_info); - - s = format(s, "%u(%s)\t", dev_id, cdev_info.feature_flags & - RTE_CRYPTODEV_FF_HW_ACCELERATED ? "HW" : "SW"); - } - last_cdev = dev_id; - } - /* *INDENT-ON* */ - vlib_cli_output (vm, "%s", s); - } - else - { - char cipher_str[15], auth_str[15]; - struct rte_cryptodev_capabilities cap; - crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; - /* *INDENT-OFF* */ - hash_foreach (key, data, cwm->algo_qp_map, - ({ - cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; - cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER; - cap.sym.cipher.algo = p_key->cipher_algo; - check_algo_is_supported (&cap, cipher_str); - cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; - cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AUTH; - cap.sym.auth.algo = p_key->auth_algo; - check_algo_is_supported (&cap, auth_str); - vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n", - vlib_mains[i]->cpu_index, cipher_str, auth_str, - p_key->is_outbound ? "out" : "in", - cwm->qp_data[data].dev_id, - cwm->qp_data[data].qp_id); - })); - /* *INDENT-ON* */ - } - } -} - -static clib_error_t * -lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u16 detail = 0; - clib_error_t *error = NULL; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "verbose")) - detail = 1; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - goto done; - } - } - - dpdk_ipsec_show_mapping (vm, detail); - -done: - unformat_free (line_input); - - return error; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (lcore_cryptodev_map, static) = { - .path = "show crypto device mapping", - .short_help = - "show cryptodev device mapping ", - .function = lcore_cryptodev_map_fn, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/crypto_node.c b/src/vnet/devices/dpdk/ipsec/crypto_node.c deleted file mode 100644 index e8fef235..00000000 --- a/src/vnet/devices/dpdk/ipsec/crypto_node.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - *------------------------------------------------------------------ - * crypto_node.c - DPDK Cryptodev input node - * - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include -#include -#include - -#include -#include -#include - -#define foreach_dpdk_crypto_input_next \ - _(DROP, "error-drop") \ - _(ENCRYPT_POST, "dpdk-esp-encrypt-post") \ - _(DECRYPT_POST, "dpdk-esp-decrypt-post") - -typedef enum -{ -#define _(f,s) DPDK_CRYPTO_INPUT_NEXT_##f, - foreach_dpdk_crypto_input_next -#undef _ - DPDK_CRYPTO_INPUT_N_NEXT, -} dpdk_crypto_input_next_t; - -#define foreach_dpdk_crypto_input_error \ - _(DQ_COPS, "Crypto ops dequeued") \ - _(COP_FAILED, "Crypto op failed") - -typedef enum -{ -#define _(f,s) DPDK_CRYPTO_INPUT_ERROR_##f, - foreach_dpdk_crypto_input_error -#undef _ - DPDK_CRYPTO_INPUT_N_ERROR, -} dpdk_crypto_input_error_t; - -static char *dpdk_crypto_input_error_strings[] = { -#define _(n, s) s, - foreach_dpdk_crypto_input_error -#undef _ -}; - -vlib_node_registration_t dpdk_crypto_input_node; - -typedef struct -{ - u32 cdev; - u32 qp; - u32 status; - u32 sa_idx; - u32 next_index; -} dpdk_crypto_input_trace_t; - -static u8 * -format_dpdk_crypto_input_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - dpdk_crypto_input_trace_t *t = va_arg (*args, dpdk_crypto_input_trace_t *); - - s = format (s, "dpdk_crypto: cryptodev-id %u queue-pair %u next-index %d", - t->cdev, t->qp, t->next_index); - - s = format (s, " status %u sa-idx %u\n", t->status, t->sa_idx); - - return s; -} - -static_always_inline u32 -dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, - crypto_qp_data_t * qpd) -{ - u32 n_deq, *to_next = 0, next_index, n_cops, def_next_index; - struct rte_crypto_op **cops = qpd->cops; - - if (qpd->inflights == 0) - return 0; - - if (qpd->is_outbound) - def_next_index = DPDK_CRYPTO_INPUT_NEXT_ENCRYPT_POST; - else - def_next_index = DPDK_CRYPTO_INPUT_NEXT_DECRYPT_POST; - - n_cops = rte_cryptodev_dequeue_burst (qpd->dev_id, qpd->qp_id, - cops, VLIB_FRAME_SIZE); - n_deq = n_cops; - next_index = def_next_index; - - qpd->inflights -= n_cops; - ASSERT (qpd->inflights >= 0); - - while (n_cops > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_cops > 0 && n_left_to_next > 0) - { - u32 bi0, next0; - vlib_buffer_t *b0 = 0; - struct rte_crypto_op *cop; - struct rte_crypto_sym_op *sym_cop; - - cop = cops[0]; - cops += 1; - n_cops -= 1; - n_left_to_next -= 1; - - next0 = def_next_index; - - if (PREDICT_FALSE (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) - { - next0 = DPDK_CRYPTO_INPUT_NEXT_DROP; - vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, - DPDK_CRYPTO_INPUT_ERROR_COP_FAILED, - 1); - } - cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; - - sym_cop = (struct rte_crypto_sym_op *) (cop + 1); - b0 = vlib_buffer_from_rte_mbuf (sym_cop->m_src); - bi0 = vlib_get_buffer_index (vm, b0); - - to_next[0] = bi0; - to_next += 1; - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - vlib_trace_next_frame (vm, node, next0); - dpdk_crypto_input_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->cdev = qpd->dev_id; - tr->qp = qpd->qp_id; - tr->status = cop->status; - tr->next_index = next0; - tr->sa_idx = vnet_buffer (b0)->ipsec.sad_index; - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - crypto_free_cop (qpd, qpd->cops, n_deq); - - vlib_node_increment_counter (vm, dpdk_crypto_input_node.index, - DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, n_deq); - return n_deq; -} - -static uword -dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - u32 cpu_index = os_get_cpu_number (); - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - crypto_qp_data_t *qpd; - u32 n_deq = 0; - - /* *INDENT-OFF* */ - vec_foreach (qpd, cwm->qp_data) - n_deq += dpdk_crypto_dequeue(vm, node, qpd); - /* *INDENT-ON* */ - - return n_deq; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_crypto_input_node) = -{ - .function = dpdk_crypto_input_fn, - .name = "dpdk-crypto-input", - .format_trace = format_dpdk_crypto_input_trace, - .type = VLIB_NODE_TYPE_INPUT, - .state = VLIB_NODE_STATE_DISABLED, - .n_errors = DPDK_CRYPTO_INPUT_N_ERROR, - .error_strings = dpdk_crypto_input_error_strings, - .n_next_nodes = DPDK_CRYPTO_INPUT_N_NEXT, - .next_nodes = - { -#define _(s,n) [DPDK_CRYPTO_INPUT_NEXT_##s] = n, - foreach_dpdk_crypto_input_next -#undef _ - }, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_crypto_input_node, dpdk_crypto_input_fn) -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/dir.dox b/src/vnet/devices/dpdk/ipsec/dir.dox deleted file mode 100644 index ffebfc4d..00000000 --- a/src/vnet/devices/dpdk/ipsec/dir.dox +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - @dir vnet/vnet/devices/dpdk/ipsec - @brief IPSec ESP encrypt/decrypt using DPDK Cryptodev API -*/ diff --git a/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md b/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md deleted file mode 100644 index fed2fe0e..00000000 --- a/src/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md +++ /dev/null @@ -1,86 +0,0 @@ -# VPP IPSec implementation using DPDK Cryptodev API {#dpdk_crypto_ipsec_doc} - -This document is meant to contain all related information about implementation and usability. - - -## VPP IPsec with DPDK Cryptodev - -DPDK Cryptodev is an asynchronous crypto API that supports both Hardware and Software implementations (for more details refer to [DPDK Cryptography Device Library documentation](http://dpdk.org/doc/guides/prog_guide/cryptodev_lib.html)). - -When DPDK support is enabled and there are enough Cryptodev resources for all workers, the node graph is reconfigured by adding and changing default next nodes. - -The following nodes are added: -* dpdk-crypto-input : polling input node, basically dequeuing from crypto devices. -* dpdk-esp-encrypt : internal node. -* dpdk-esp-decrypt : internal node. -* dpdk-esp-encrypt-post : internal node. -* dpdk-esp-decrypt-post : internal node. - -Set new default next nodes: -* for esp encryption: esp-encrypt -> dpdk-esp-encrypt -* for esp decryption: esp-decrypt -> dpdk-esp-decrypt - - -### How to enable VPP IPSec with DPDK Cryptodev support - -DPDK Cryptodev is supported in DPDK enabled VPP. -By default, only HW Cryptodev is supported but needs to be explicetly enabled with the following config option: - -``` -dpdk { - enable-cryptodev -} -``` - -To enable SW Cryptodev support (AESNI-MB-PMD and GCM-PMD), we need the following env option: - - vpp_uses_dpdk_cryptodev_sw=yes - -A couple of ways to achive this: -* uncomment/add it in the platforms config (ie. build-data/platforms/vpp.mk) -* set the option when building vpp (ie. make vpp_uses_dpdk_cryptodev_sw=yes build-release) - -When enabling SW Cryptodev support, it means that you need to pre-build the required crypto libraries needed by those SW Cryptodev PMDs. - - -### Crypto Resources allocation - -VPP allocates crypto resources based on a best effort approach: -* first allocate Hardware crypto resources, then Software. -* if there are not enough crypto resources for all workers, the graph node is not modifed, therefore the default VPP IPsec implementation based in OpenSSL is used. The following message is displayed: - - 0: dpdk_ipsec_init: not enough cryptodevs for ipsec - - -### Configuration example - -To enable DPDK Cryptodev the user just need to provide the startup.conf option -as mentioned previously. - -Example startup.conf: - -``` -dpdk { - socket-mem 1024,1024 - num-mbufs 131072 - dev 0000:81:00.0 - dev 0000:81:00.1 - enable-cryptodev - dev 0000:85:01.0 - dev 0000:85:01.1 - vdev cryptodev_aesni_mb_pmd,socket_id=1 - vdev cryptodev_aesni_mb_pmd,socket_id=1 -} -``` - -In the above configuration: -* 0000:85:01.0 and 0000:85:01.1 are crypto BDFs and they require the same driver binding as DPDK Ethernet devices but they do not support any extra configuration options. -* Two AESNI-MB Software Cryptodev PMDs are created in NUMA node 1. - -For further details refer to [DPDK Crypto Device Driver documentation](http://dpdk.org/doc/guides/cryptodevs/index.html) - -### Operational data - -The following CLI command displays the Cryptodev/Worker mapping: - - show crypto device mapping [verbose] diff --git a/src/vnet/devices/dpdk/ipsec/esp.h b/src/vnet/devices/dpdk/ipsec/esp.h deleted file mode 100644 index d0b27618..00000000 --- a/src/vnet/devices/dpdk/ipsec/esp.h +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __DPDK_ESP_H__ -#define __DPDK_ESP_H__ - -#include -#include -#include - -typedef struct -{ - enum rte_crypto_cipher_algorithm algo; - u8 key_len; - u8 iv_len; -} dpdk_esp_crypto_alg_t; - -typedef struct -{ - enum rte_crypto_auth_algorithm algo; - u8 trunc_size; -} dpdk_esp_integ_alg_t; - -typedef struct -{ - dpdk_esp_crypto_alg_t *esp_crypto_algs; - dpdk_esp_integ_alg_t *esp_integ_algs; -} dpdk_esp_main_t; - -dpdk_esp_main_t dpdk_esp_main; - -static_always_inline void -dpdk_esp_init () -{ - dpdk_esp_main_t *em = &dpdk_esp_main; - dpdk_esp_integ_alg_t *i; - dpdk_esp_crypto_alg_t *c; - - vec_validate (em->esp_crypto_algs, IPSEC_CRYPTO_N_ALG - 1); - - c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_128]; - c->algo = RTE_CRYPTO_CIPHER_AES_CBC; - c->key_len = 16; - c->iv_len = 16; - - c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_192]; - c->algo = RTE_CRYPTO_CIPHER_AES_CBC; - c->key_len = 24; - c->iv_len = 16; - - c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_256]; - c->algo = RTE_CRYPTO_CIPHER_AES_CBC; - c->key_len = 32; - c->iv_len = 16; - - c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_GCM_128]; - c->algo = RTE_CRYPTO_CIPHER_AES_GCM; - c->key_len = 16; - c->iv_len = 8; - - vec_validate (em->esp_integ_algs, IPSEC_INTEG_N_ALG - 1); - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA1_96]; - i->algo = RTE_CRYPTO_AUTH_SHA1_HMAC; - i->trunc_size = 12; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_96]; - i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; - i->trunc_size = 12; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_128]; - i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC; - i->trunc_size = 16; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_384_192]; - i->algo = RTE_CRYPTO_AUTH_SHA384_HMAC; - i->trunc_size = 24; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256]; - i->algo = RTE_CRYPTO_AUTH_SHA512_HMAC; - i->trunc_size = 32; - - i = &em->esp_integ_algs[IPSEC_INTEG_ALG_AES_GCM_128]; - i->algo = RTE_CRYPTO_AUTH_AES_GCM; - i->trunc_size = 16; -} - -static_always_inline int -translate_crypto_algo (ipsec_crypto_alg_t crypto_algo, - struct rte_crypto_sym_xform *cipher_xform) -{ - switch (crypto_algo) - { - case IPSEC_CRYPTO_ALG_NONE: - cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL; - break; - case IPSEC_CRYPTO_ALG_AES_CBC_128: - case IPSEC_CRYPTO_ALG_AES_CBC_192: - case IPSEC_CRYPTO_ALG_AES_CBC_256: - cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; - break; - case IPSEC_CRYPTO_ALG_AES_GCM_128: - cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_GCM; - break; - default: - return -1; - } - - cipher_xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; - - return 0; -} - -static_always_inline int -translate_integ_algo (ipsec_integ_alg_t integ_alg, - struct rte_crypto_sym_xform *auth_xform, int use_esn) -{ - switch (integ_alg) - { - case IPSEC_INTEG_ALG_NONE: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_NULL; - auth_xform->auth.digest_length = 0; - break; - case IPSEC_INTEG_ALG_SHA1_96: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC; - auth_xform->auth.digest_length = 12; - break; - case IPSEC_INTEG_ALG_SHA_256_96: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; - auth_xform->auth.digest_length = 12; - break; - case IPSEC_INTEG_ALG_SHA_256_128: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC; - auth_xform->auth.digest_length = 16; - break; - case IPSEC_INTEG_ALG_SHA_384_192: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA384_HMAC; - auth_xform->auth.digest_length = 24; - break; - case IPSEC_INTEG_ALG_SHA_512_256: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA512_HMAC; - auth_xform->auth.digest_length = 32; - break; - case IPSEC_INTEG_ALG_AES_GCM_128: - auth_xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM; - auth_xform->auth.digest_length = 16; - auth_xform->auth.add_auth_data_length = use_esn ? 12 : 8; - break; - default: - return -1; - } - - auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; - - return 0; -} - -static_always_inline int -create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, - u8 is_outbound) -{ - u32 cpu_index = os_get_cpu_number (); - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - struct rte_crypto_sym_xform cipher_xform = { 0 }; - struct rte_crypto_sym_xform auth_xform = { 0 }; - struct rte_crypto_sym_xform *xfs; - uword key = 0, *data; - crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; - - if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - sa->crypto_key_len -= 4; - clib_memcpy (&sa->salt, &sa->crypto_key[sa->crypto_key_len], 4); - } - else - { - u32 seed = (u32) clib_cpu_time_now (); - sa->salt = random_u32 (&seed); - } - - cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; - cipher_xform.cipher.key.data = sa->crypto_key; - cipher_xform.cipher.key.length = sa->crypto_key_len; - - auth_xform.type = RTE_CRYPTO_SYM_XFORM_AUTH; - auth_xform.auth.key.data = sa->integ_key; - auth_xform.auth.key.length = sa->integ_key_len; - - if (translate_crypto_algo (sa->crypto_alg, &cipher_xform) < 0) - return -1; - p_key->cipher_algo = cipher_xform.cipher.algo; - - if (translate_integ_algo (sa->integ_alg, &auth_xform, sa->use_esn) < 0) - return -1; - p_key->auth_algo = auth_xform.auth.algo; - - if (is_outbound) - { - cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; - auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; - cipher_xform.next = &auth_xform; - xfs = &cipher_xform; - } - else - { - cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; - auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_VERIFY; - auth_xform.next = &cipher_xform; - xfs = &auth_xform; - } - - p_key->is_outbound = is_outbound; - - data = hash_get (cwm->algo_qp_map, key); - if (!data) - return -1; - - sa_sess->sess = - rte_cryptodev_sym_session_create (cwm->qp_data[*data].dev_id, xfs); - - if (!sa_sess->sess) - return -1; - - sa_sess->qp_index = (u8) * data; - - return 0; -} - -#endif /* __DPDK_ESP_H__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/esp_decrypt.c b/src/vnet/devices/dpdk/ipsec/esp_decrypt.c deleted file mode 100644 index 76007609..00000000 --- a/src/vnet/devices/dpdk/ipsec/esp_decrypt.c +++ /dev/null @@ -1,594 +0,0 @@ -/* - * esp_decrypt.c : IPSec ESP Decrypt node using DPDK Cryptodev - * - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include - -#define foreach_esp_decrypt_next \ -_(DROP, "error-drop") \ -_(IP4_INPUT, "ip4-input") \ -_(IP6_INPUT, "ip6-input") - -#define _(v, s) ESP_DECRYPT_NEXT_##v, -typedef enum { - foreach_esp_decrypt_next -#undef _ - ESP_DECRYPT_N_NEXT, -} esp_decrypt_next_t; - -#define foreach_esp_decrypt_error \ - _(RX_PKTS, "ESP pkts received") \ - _(DECRYPTION_FAILED, "ESP decryption failed") \ - _(REPLAY, "SA replayed packet") \ - _(NOT_IP, "Not IP packet (dropped)") \ - _(ENQ_FAIL, "Enqueue failed (buffer full)") \ - _(NO_CRYPTODEV, "Cryptodev not configured") \ - _(BAD_LEN, "Invalid ciphertext length") \ - _(UNSUPPORTED, "Cipher/Auth not supported") - - -typedef enum { -#define _(sym,str) ESP_DECRYPT_ERROR_##sym, - foreach_esp_decrypt_error -#undef _ - ESP_DECRYPT_N_ERROR, -} esp_decrypt_error_t; - -static char * esp_decrypt_error_strings[] = { -#define _(sym,string) string, - foreach_esp_decrypt_error -#undef _ -}; - -vlib_node_registration_t dpdk_esp_decrypt_node; - -typedef struct { - ipsec_crypto_alg_t crypto_alg; - ipsec_integ_alg_t integ_alg; -} esp_decrypt_trace_t; - -/* packet trace format function */ -static u8 * format_esp_decrypt_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *); - - s = format (s, "esp: crypto %U integrity %U", - format_ipsec_crypto_alg, t->crypto_alg, - format_ipsec_integ_alg, t->integ_alg); - return s; -} - -static uword -dpdk_esp_decrypt_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, *from, *to_next, next_index; - ipsec_main_t *im = &ipsec_main; - u32 cpu_index = os_get_cpu_number(); - dpdk_crypto_main_t * dcm = &dpdk_crypto_main; - dpdk_esp_main_t * em = &dpdk_esp_main; - u32 i; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - if (PREDICT_FALSE(!dcm->workers_main)) - { - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_NO_CRYPTODEV, n_left_from); - vlib_buffer_free(vm, from, n_left_from); - return n_left_from; - } - - crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, cpu_index); - u32 n_qps = vec_len(cwm->qp_data); - struct rte_crypto_op ** cops_to_enq[n_qps]; - u32 n_cop_qp[n_qps], * bi_to_enq[n_qps]; - - for (i = 0; i < n_qps; i++) - { - bi_to_enq[i] = cwm->qp_data[i].bi; - cops_to_enq[i] = cwm->qp_data[i].cops; - } - - memset(n_cop_qp, 0, n_qps * sizeof(u32)); - - crypto_alloc_cops(); - - next_index = ESP_DECRYPT_NEXT_DROP; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, sa_index0 = ~0, seq, icv_size, iv_size; - vlib_buffer_t * b0; - esp_header_t * esp0; - ipsec_sa_t * sa0; - struct rte_mbuf * mb0 = 0; - const int BLOCK_SIZE = 16; - crypto_sa_session_t * sa_sess; - void * sess; - u16 qp_index; - struct rte_crypto_op * cop = 0; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - - b0 = vlib_get_buffer (vm, bi0); - esp0 = vlib_buffer_get_current (b0); - - sa_index0 = vnet_buffer(b0)->ipsec.sad_index; - sa0 = pool_elt_at_index (im->sad, sa_index0); - - seq = clib_host_to_net_u32(esp0->seq); - - /* anti-replay check */ - if (sa0->use_anti_replay) - { - int rv = 0; - - if (PREDICT_TRUE(sa0->use_esn)) - rv = esp_replay_check_esn(sa0, seq); - else - rv = esp_replay_check(sa0, seq); - - if (PREDICT_FALSE(rv)) - { - clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_REPLAY, 1); - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - } - - sa0->total_data_size += b0->current_length; - - if (PREDICT_FALSE(sa0->integ_alg == IPSEC_INTEG_ALG_NONE) || - PREDICT_FALSE(sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE)) - { - clib_warning ("SPI %u : only cipher + auth supported", sa0->spi); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_UNSUPPORTED, 1); - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - - sa_sess = pool_elt_at_index(cwm->sa_sess_d[0], sa_index0); - - if (PREDICT_FALSE(!sa_sess->sess)) - { - int ret = create_sym_sess(sa0, sa_sess, 0); - - if (PREDICT_FALSE (ret)) - { - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - } - - sess = sa_sess->sess; - qp_index = sa_sess->qp_index; - - ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); - cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); - ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); - - cops_to_enq[qp_index][0] = cop; - cops_to_enq[qp_index] += 1; - n_cop_qp[qp_index] += 1; - bi_to_enq[qp_index][0] = bi0; - bi_to_enq[qp_index] += 1; - - rte_crypto_op_attach_sym_session(cop, sess); - - icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; - iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; - - /* Convert vlib buffer to mbuf */ - mb0 = rte_mbuf_from_vlib_buffer(b0); - mb0->data_len = b0->current_length; - mb0->pkt_len = b0->current_length; - mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; - - /* Outer IP header has already been stripped */ - u16 payload_len = rte_pktmbuf_pkt_len(mb0) - sizeof (esp_header_t) - - iv_size - icv_size; - - if ((payload_len & (BLOCK_SIZE - 1)) || (payload_len <= 0)) - { - clib_warning ("payload %u not multiple of %d\n", - payload_len, BLOCK_SIZE); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_BAD_LEN, 1); - vec_add (vec_elt (cwm->qp_data, qp_index).free_cops, &cop, 1); - bi_to_enq[qp_index] -= 1; - cops_to_enq[qp_index] -= 1; - n_cop_qp[qp_index] -= 1; - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - - struct rte_crypto_sym_op *sym_cop = (struct rte_crypto_sym_op *)(cop + 1); - - sym_cop->m_src = mb0; - sym_cop->cipher.data.offset = sizeof (esp_header_t) + iv_size; - sym_cop->cipher.data.length = payload_len; - - u8 *iv = rte_pktmbuf_mtod_offset(mb0, void*, sizeof (esp_header_t)); - dpdk_cop_priv_t * priv = (dpdk_cop_priv_t *)(sym_cop + 1); - - if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - dpdk_gcm_cnt_blk *icb = &priv->cb; - icb->salt = sa0->salt; - clib_memcpy(icb->iv, iv, 8); - icb->cnt = clib_host_to_net_u32(1); - sym_cop->cipher.iv.data = (u8 *)icb; - sym_cop->cipher.iv.phys_addr = cop->phys_addr + - (uintptr_t)icb - (uintptr_t)cop; - sym_cop->cipher.iv.length = 16; - - u8 *aad = priv->aad; - clib_memcpy(aad, iv - sizeof(esp_header_t), 8); - sym_cop->auth.aad.data = aad; - sym_cop->auth.aad.phys_addr = cop->phys_addr + - (uintptr_t)aad - (uintptr_t)cop; - if (sa0->use_esn) - { - *((u32*)&aad[8]) = sa0->seq_hi; - sym_cop->auth.aad.length = 12; - } - else - { - sym_cop->auth.aad.length = 8; - } - - sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.length = icv_size; - - } - else - { - sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(mb0, void*, - sizeof (esp_header_t)); - sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(mb0, - sizeof (esp_header_t)); - sym_cop->cipher.iv.length = iv_size; - - if (sa0->use_esn) - { - dpdk_cop_priv_t* priv = (dpdk_cop_priv_t*) (sym_cop + 1); - u8* payload_end = rte_pktmbuf_mtod_offset( - mb0, u8*, sizeof(esp_header_t) + iv_size + payload_len); - - clib_memcpy (priv->icv, payload_end, icv_size); - *((u32*) payload_end) = sa0->seq_hi; - sym_cop->auth.data.offset = 0; - sym_cop->auth.data.length = sizeof(esp_header_t) + iv_size - + payload_len + sizeof(sa0->seq_hi); - sym_cop->auth.digest.data = priv->icv; - sym_cop->auth.digest.phys_addr = cop->phys_addr - + (uintptr_t) priv->icv - (uintptr_t) cop; - sym_cop->auth.digest.length = icv_size; - } - else - { - sym_cop->auth.data.offset = 0; - sym_cop->auth.data.length = sizeof(esp_header_t) + - iv_size + payload_len; - - sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.length = icv_size; - } - } - -trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->crypto_alg = sa0->crypto_alg; - tr->integ_alg = sa0->integ_alg; - } - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_RX_PKTS, - from_frame->n_vectors); - crypto_qp_data_t *qpd; - /* *INDENT-OFF* */ - vec_foreach_index (i, cwm->qp_data) - { - u32 enq; - - qpd = vec_elt_at_index(cwm->qp_data, i); - enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, - qpd->cops, n_cop_qp[i]); - qpd->inflights += enq; - - if (PREDICT_FALSE(enq < n_cop_qp[i])) - { - crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); - vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); - - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_ENQ_FAIL, - n_cop_qp[i] - enq); - } - } - /* *INDENT-ON* */ - - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dpdk_esp_decrypt_node) = { - .function = dpdk_esp_decrypt_node_fn, - .name = "dpdk-esp-decrypt", - .vector_size = sizeof (u32), - .format_trace = format_esp_decrypt_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(esp_decrypt_error_strings), - .error_strings = esp_decrypt_error_strings, - - .n_next_nodes = ESP_DECRYPT_N_NEXT, - .next_nodes = { -#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, - foreach_esp_decrypt_next -#undef _ - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_node, dpdk_esp_decrypt_node_fn) - -/* - * Decrypt Post Node - */ - -#define foreach_esp_decrypt_post_error \ - _(PKTS, "ESP post pkts") - -typedef enum { -#define _(sym,str) ESP_DECRYPT_POST_ERROR_##sym, - foreach_esp_decrypt_post_error -#undef _ - ESP_DECRYPT_POST_N_ERROR, -} esp_decrypt_post_error_t; - -static char * esp_decrypt_post_error_strings[] = { -#define _(sym,string) string, - foreach_esp_decrypt_post_error -#undef _ -}; - -vlib_node_registration_t dpdk_esp_decrypt_post_node; - -static u8 * format_esp_decrypt_post_trace (u8 * s, va_list * args) -{ - return s; -} - -static uword -dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, *from, *to_next = 0, next_index; - ipsec_sa_t * sa0; - u32 sa_index0 = ~0; - ipsec_main_t *im = &ipsec_main; - dpdk_esp_main_t *em = &dpdk_esp_main; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - esp_footer_t * f0; - u32 bi0, next0, icv_size, iv_size; - vlib_buffer_t * b0 = 0; - ip4_header_t *ih4 = 0, *oh4 = 0; - ip6_header_t *ih6 = 0, *oh6 = 0; - u8 tunnel_mode = 1; - u8 transport_ip6 = 0; - - next0 = ESP_DECRYPT_NEXT_DROP; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - sa_index0 = vnet_buffer(b0)->ipsec.sad_index; - sa0 = pool_elt_at_index (im->sad, sa_index0); - - to_next[0] = bi0; - to_next += 1; - - icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; - iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; - - if (sa0->use_anti_replay) - { - esp_header_t * esp0 = vlib_buffer_get_current (b0); - u32 seq; - seq = clib_host_to_net_u32(esp0->seq); - if (PREDICT_TRUE(sa0->use_esn)) - esp_replay_advance_esn(sa0, seq); - else - esp_replay_advance(sa0, seq); - } - - ih4 = (ip4_header_t *) (b0->data + sizeof(ethernet_header_t)); - vlib_buffer_advance (b0, sizeof (esp_header_t) + iv_size); - - b0->current_length -= (icv_size + 2); - b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; - f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (b0) + - b0->current_length); - b0->current_length -= f0->pad_length; - - /* transport mode */ - if (PREDICT_FALSE(!sa0->is_tunnel && !sa0->is_tunnel_ip6)) - { - tunnel_mode = 0; - - if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) != 0x40)) - { - if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) == 0x60)) - transport_ip6 = 1; - else - { - clib_warning("next header: 0x%x", f0->next_header); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_NOT_IP, 1); - goto trace; - } - } - } - - if (PREDICT_TRUE (tunnel_mode)) - { - if (PREDICT_TRUE(f0->next_header == IP_PROTOCOL_IP_IN_IP)) - next0 = ESP_DECRYPT_NEXT_IP4_INPUT; - else if (f0->next_header == IP_PROTOCOL_IPV6) - next0 = ESP_DECRYPT_NEXT_IP6_INPUT; - else - { - clib_warning("next header: 0x%x", f0->next_header); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_DECRYPTION_FAILED, - 1); - goto trace; - } - } - /* transport mode */ - else - { - if (PREDICT_FALSE(transport_ip6)) - { - ih6 = (ip6_header_t *) (b0->data + sizeof(ethernet_header_t)); - vlib_buffer_advance (b0, -sizeof(ip6_header_t)); - oh6 = vlib_buffer_get_current (b0); - memmove(oh6, ih6, sizeof(ip6_header_t)); - - next0 = ESP_DECRYPT_NEXT_IP6_INPUT; - oh6->protocol = f0->next_header; - oh6->payload_length = - clib_host_to_net_u16 ( - vlib_buffer_length_in_chain(vm, b0) - - sizeof (ip6_header_t)); - } - else - { - vlib_buffer_advance (b0, -sizeof(ip4_header_t)); - oh4 = vlib_buffer_get_current (b0); - memmove(oh4, ih4, sizeof(ip4_header_t)); - - next0 = ESP_DECRYPT_NEXT_IP4_INPUT; - oh4->ip_version_and_header_length = 0x45; - oh4->fragment_id = 0; - oh4->flags_and_fragment_offset = 0; - oh4->protocol = f0->next_header; - oh4->length = clib_host_to_net_u16 ( - vlib_buffer_length_in_chain (vm, b0)); - oh4->checksum = ip4_header_checksum (oh4); - } - } - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0; - -trace: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->crypto_alg = sa0->crypto_alg; - tr->integ_alg = sa0->integ_alg; - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, dpdk_esp_decrypt_post_node.index, - ESP_DECRYPT_POST_ERROR_PKTS, - from_frame->n_vectors); - - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dpdk_esp_decrypt_post_node) = { - .function = dpdk_esp_decrypt_post_node_fn, - .name = "dpdk-esp-decrypt-post", - .vector_size = sizeof (u32), - .format_trace = format_esp_decrypt_post_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(esp_decrypt_post_error_strings), - .error_strings = esp_decrypt_post_error_strings, - - .n_next_nodes = ESP_DECRYPT_N_NEXT, - .next_nodes = { -#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n, - foreach_esp_decrypt_next -#undef _ - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_post_node, dpdk_esp_decrypt_post_node_fn) diff --git a/src/vnet/devices/dpdk/ipsec/esp_encrypt.c b/src/vnet/devices/dpdk/ipsec/esp_encrypt.c deleted file mode 100644 index 6eb1afc9..00000000 --- a/src/vnet/devices/dpdk/ipsec/esp_encrypt.c +++ /dev/null @@ -1,609 +0,0 @@ -/* - * esp_encrypt.c : IPSec ESP encrypt node using DPDK Cryptodev - * - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include - -#define foreach_esp_encrypt_next \ -_(DROP, "error-drop") \ -_(IP4_LOOKUP, "ip4-lookup") \ -_(IP6_LOOKUP, "ip6-lookup") \ -_(INTERFACE_OUTPUT, "interface-output") - -#define _(v, s) ESP_ENCRYPT_NEXT_##v, -typedef enum -{ - foreach_esp_encrypt_next -#undef _ - ESP_ENCRYPT_N_NEXT, -} esp_encrypt_next_t; - -#define foreach_esp_encrypt_error \ - _(RX_PKTS, "ESP pkts received") \ - _(SEQ_CYCLED, "sequence number cycled") \ - _(ENQ_FAIL, "Enqueue failed (buffer full)") \ - _(NO_CRYPTODEV, "Cryptodev not configured") \ - _(UNSUPPORTED, "Cipher/Auth not supported") - - -typedef enum -{ -#define _(sym,str) ESP_ENCRYPT_ERROR_##sym, - foreach_esp_encrypt_error -#undef _ - ESP_ENCRYPT_N_ERROR, -} esp_encrypt_error_t; - -static char *esp_encrypt_error_strings[] = { -#define _(sym,string) string, - foreach_esp_encrypt_error -#undef _ -}; - -vlib_node_registration_t dpdk_esp_encrypt_node; - -typedef struct -{ - u32 spi; - u32 seq; - ipsec_crypto_alg_t crypto_alg; - ipsec_integ_alg_t integ_alg; -} esp_encrypt_trace_t; - -/* packet trace format function */ -static u8 * -format_esp_encrypt_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *); - - s = format (s, "esp: spi %u seq %u crypto %U integrity %U", - t->spi, t->seq, - format_ipsec_crypto_alg, t->crypto_alg, - format_ipsec_integ_alg, t->integ_alg); - return s; -} - -static uword -dpdk_esp_encrypt_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, *from, *to_next, next_index; - ipsec_main_t *im = &ipsec_main; - u32 cpu_index = os_get_cpu_number (); - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - dpdk_esp_main_t *em = &dpdk_esp_main; - u32 i; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - if (PREDICT_FALSE (!dcm->workers_main)) - { - /* Likely there are not enough cryptodevs, so drop frame */ - vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, - ESP_ENCRYPT_ERROR_NO_CRYPTODEV, - n_left_from); - vlib_buffer_free (vm, from, n_left_from); - return n_left_from; - } - - crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, cpu_index); - u32 n_qps = vec_len (cwm->qp_data); - struct rte_crypto_op **cops_to_enq[n_qps]; - u32 n_cop_qp[n_qps], *bi_to_enq[n_qps]; - - for (i = 0; i < n_qps; i++) - { - bi_to_enq[i] = cwm->qp_data[i].bi; - cops_to_enq[i] = cwm->qp_data[i].cops; - } - - memset (n_cop_qp, 0, n_qps * sizeof (u32)); - - crypto_alloc_cops (); - - next_index = ESP_ENCRYPT_NEXT_DROP; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, next0; - vlib_buffer_t *b0 = 0; - u32 sa_index0; - ipsec_sa_t *sa0; - ip4_and_esp_header_t *ih0, *oh0 = 0; - ip6_and_esp_header_t *ih6_0, *oh6_0 = 0; - struct rte_mbuf *mb0 = 0; - esp_footer_t *f0; - u8 is_ipv6; - u8 ip_hdr_size; - u8 next_hdr_type; - u8 transport_mode = 0; - const int BLOCK_SIZE = 16; - u32 iv_size; - u16 orig_sz; - crypto_sa_session_t *sa_sess; - void *sess; - struct rte_crypto_op *cop = 0; - u16 qp_index; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - - b0 = vlib_get_buffer (vm, bi0); - sa_index0 = vnet_buffer (b0)->ipsec.sad_index; - sa0 = pool_elt_at_index (im->sad, sa_index0); - - if (PREDICT_FALSE (esp_seq_advance (sa0))) - { - clib_warning ("sequence number counter has cycled SPI %u", - sa0->spi); - vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, - ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1); - //TODO: rekey SA - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - - sa0->total_data_size += b0->current_length; - - sa_sess = pool_elt_at_index (cwm->sa_sess_d[1], sa_index0); - if (PREDICT_FALSE (!sa_sess->sess)) - { - int ret = create_sym_sess (sa0, sa_sess, 1); - - if (PREDICT_FALSE (ret)) - { - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - } - - qp_index = sa_sess->qp_index; - sess = sa_sess->sess; - - ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0); - cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops); - ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED); - - cops_to_enq[qp_index][0] = cop; - cops_to_enq[qp_index] += 1; - n_cop_qp[qp_index] += 1; - bi_to_enq[qp_index][0] = bi0; - bi_to_enq[qp_index] += 1; - - ssize_t adv; - iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; - ih0 = vlib_buffer_get_current (b0); - orig_sz = b0->current_length; - is_ipv6 = (ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60; - /* is ipv6 */ - if (PREDICT_TRUE (sa0->is_tunnel)) - { - if (PREDICT_TRUE (!is_ipv6)) - adv = -sizeof (ip4_and_esp_header_t); - else - adv = -sizeof (ip6_and_esp_header_t); - } - else - { - adv = -sizeof (esp_header_t); - if (PREDICT_TRUE (!is_ipv6)) - orig_sz -= sizeof (ip4_header_t); - else - orig_sz -= sizeof (ip6_header_t); - } - - /*transport mode save the eth header before it is overwritten */ - if (PREDICT_FALSE (!sa0->is_tunnel)) - { - ethernet_header_t *ieh0 = (ethernet_header_t *) - ((u8 *) vlib_buffer_get_current (b0) - - sizeof (ethernet_header_t)); - ethernet_header_t *oeh0 = - (ethernet_header_t *) ((u8 *) ieh0 + (adv - iv_size)); - clib_memcpy (oeh0, ieh0, sizeof (ethernet_header_t)); - } - - vlib_buffer_advance (b0, adv - iv_size); - - /* XXX IP6/ip4 and IP4/IP6 not supported, only IP4/IP4 and IP6/IP6 */ - - /* is ipv6 */ - if (PREDICT_FALSE (is_ipv6)) - { - ih6_0 = (ip6_and_esp_header_t *) ih0; - ip_hdr_size = sizeof (ip6_header_t); - oh6_0 = vlib_buffer_get_current (b0); - - if (PREDICT_TRUE (sa0->is_tunnel)) - { - next_hdr_type = IP_PROTOCOL_IPV6; - oh6_0->ip6.ip_version_traffic_class_and_flow_label = - ih6_0->ip6.ip_version_traffic_class_and_flow_label; - } - else - { - next_hdr_type = ih6_0->ip6.protocol; - memmove (oh6_0, ih6_0, sizeof (ip6_header_t)); - } - - oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP; - oh6_0->ip6.hop_limit = 254; - oh6_0->esp.spi = clib_net_to_host_u32 (sa0->spi); - oh6_0->esp.seq = clib_net_to_host_u32 (sa0->seq); - } - else - { - ip_hdr_size = sizeof (ip4_header_t); - oh0 = vlib_buffer_get_current (b0); - - if (PREDICT_TRUE (sa0->is_tunnel)) - { - next_hdr_type = IP_PROTOCOL_IP_IN_IP; - oh0->ip4.tos = ih0->ip4.tos; - } - else - { - next_hdr_type = ih0->ip4.protocol; - memmove (oh0, ih0, sizeof (ip4_header_t)); - } - - oh0->ip4.ip_version_and_header_length = 0x45; - oh0->ip4.fragment_id = 0; - oh0->ip4.flags_and_fragment_offset = 0; - oh0->ip4.ttl = 254; - oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP; - oh0->esp.spi = clib_net_to_host_u32 (sa0->spi); - oh0->esp.seq = clib_net_to_host_u32 (sa0->seq); - } - - if (PREDICT_TRUE (sa0->is_tunnel && !sa0->is_tunnel_ip6)) - { - oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32; - oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32; - - /* in tunnel mode send it back to FIB */ - next0 = ESP_ENCRYPT_NEXT_IP4_LOOKUP; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - } - else if (sa0->is_tunnel && sa0->is_tunnel_ip6) - { - oh6_0->ip6.src_address.as_u64[0] = - sa0->tunnel_src_addr.ip6.as_u64[0]; - oh6_0->ip6.src_address.as_u64[1] = - sa0->tunnel_src_addr.ip6.as_u64[1]; - oh6_0->ip6.dst_address.as_u64[0] = - sa0->tunnel_dst_addr.ip6.as_u64[0]; - oh6_0->ip6.dst_address.as_u64[1] = - sa0->tunnel_dst_addr.ip6.as_u64[1]; - - /* in tunnel mode send it back to FIB */ - next0 = ESP_ENCRYPT_NEXT_IP6_LOOKUP; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - } - else - { - next0 = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT; - transport_mode = 1; - } - - ASSERT (sa0->crypto_alg < IPSEC_CRYPTO_N_ALG); - ASSERT (sa0->crypto_alg != IPSEC_CRYPTO_ALG_NONE); - - int blocks = 1 + (orig_sz + 1) / BLOCK_SIZE; - - /* pad packet in input buffer */ - u8 pad_bytes = BLOCK_SIZE * blocks - 2 - orig_sz; - u8 i; - u8 *padding = vlib_buffer_get_current (b0) + b0->current_length; - - for (i = 0; i < pad_bytes; ++i) - padding[i] = i + 1; - - f0 = vlib_buffer_get_current (b0) + b0->current_length + pad_bytes; - f0->pad_length = pad_bytes; - f0->next_header = next_hdr_type; - b0->current_length += pad_bytes + 2 + - em->esp_integ_algs[sa0->integ_alg].trunc_size; - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = - vnet_buffer (b0)->sw_if_index[VLIB_RX]; - b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; - - struct rte_crypto_sym_op *sym_cop; - sym_cop = (struct rte_crypto_sym_op *) (cop + 1); - - dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *) (sym_cop + 1); - - vnet_buffer (b0)->unused[0] = next0; - - mb0 = rte_mbuf_from_vlib_buffer (b0); - mb0->data_len = b0->current_length; - mb0->pkt_len = b0->current_length; - mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; - - rte_crypto_op_attach_sym_session (cop, sess); - - sym_cop->m_src = mb0; - - dpdk_gcm_cnt_blk *icb = &priv->cb; - icb->salt = sa0->salt; - icb->iv[0] = sa0->seq; - icb->iv[1] = sa0->seq_hi; - - if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - icb->cnt = clib_host_to_net_u32 (1); - clib_memcpy (vlib_buffer_get_current (b0) + ip_hdr_size + - sizeof (esp_header_t), icb->iv, 8); - sym_cop->cipher.data.offset = - ip_hdr_size + sizeof (esp_header_t) + iv_size; - sym_cop->cipher.data.length = BLOCK_SIZE * blocks; - sym_cop->cipher.iv.length = 16; - } - else - { - sym_cop->cipher.data.offset = - ip_hdr_size + sizeof (esp_header_t); - sym_cop->cipher.data.length = BLOCK_SIZE * blocks + iv_size; - sym_cop->cipher.iv.length = iv_size; - } - - sym_cop->cipher.iv.data = (u8 *) icb; - sym_cop->cipher.iv.phys_addr = cop->phys_addr + (uintptr_t) icb - - (uintptr_t) cop; - - - ASSERT (sa0->integ_alg < IPSEC_INTEG_N_ALG); - ASSERT (sa0->integ_alg != IPSEC_INTEG_ALG_NONE); - - if (PREDICT_FALSE (sa0->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128)) - { - u8 *aad = priv->aad; - clib_memcpy (aad, vlib_buffer_get_current (b0) + ip_hdr_size, - 8); - sym_cop->auth.aad.data = aad; - sym_cop->auth.aad.phys_addr = cop->phys_addr + - (uintptr_t) aad - (uintptr_t) cop; - - if (PREDICT_FALSE (sa0->use_esn)) - { - *((u32 *) & aad[8]) = sa0->seq_hi; - sym_cop->auth.aad.length = 12; - } - else - { - sym_cop->auth.aad.length = 8; - } - } - else - { - sym_cop->auth.data.offset = ip_hdr_size; - sym_cop->auth.data.length = b0->current_length - ip_hdr_size - - em->esp_integ_algs[sa0->integ_alg].trunc_size; - - if (PREDICT_FALSE (sa0->use_esn)) - { - u8 *payload_end = - vlib_buffer_get_current (b0) + b0->current_length; - *((u32 *) payload_end) = sa0->seq_hi; - sym_cop->auth.data.length += sizeof (sa0->seq_hi); - } - } - sym_cop->auth.digest.data = vlib_buffer_get_current (b0) + - b0->current_length - - em->esp_integ_algs[sa0->integ_alg].trunc_size; - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset (mb0, - b0->current_length - - - em->esp_integ_algs - [sa0->integ_alg].trunc_size); - sym_cop->auth.digest.length = - em->esp_integ_algs[sa0->integ_alg].trunc_size; - - - if (PREDICT_FALSE (is_ipv6)) - { - oh6_0->ip6.payload_length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (ip6_header_t)); - } - else - { - oh0->ip4.length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4); - } - - if (transport_mode) - vlib_buffer_advance (b0, -sizeof (ethernet_header_t)); - - trace: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - esp_encrypt_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->spi = sa0->spi; - tr->seq = sa0->seq - 1; - tr->crypto_alg = sa0->crypto_alg; - tr->integ_alg = sa0->integ_alg; - } - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, - ESP_ENCRYPT_ERROR_RX_PKTS, - from_frame->n_vectors); - crypto_qp_data_t *qpd; - /* *INDENT-OFF* */ - vec_foreach_index (i, cwm->qp_data) - { - u32 enq; - - qpd = vec_elt_at_index(cwm->qp_data, i); - enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, - qpd->cops, n_cop_qp[i]); - qpd->inflights += enq; - - if (PREDICT_FALSE(enq < n_cop_qp[i])) - { - crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq); - vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq); - - vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index, - ESP_ENCRYPT_ERROR_ENQ_FAIL, - n_cop_qp[i] - enq); - } - } - /* *INDENT-ON* */ - - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dpdk_esp_encrypt_node) = -{ - .function = dpdk_esp_encrypt_node_fn,.name = "dpdk-esp-encrypt",.flags = - VLIB_NODE_FLAG_IS_OUTPUT,.vector_size = sizeof (u32),.format_trace = - format_esp_encrypt_trace,.n_errors = - ARRAY_LEN (esp_encrypt_error_strings),.error_strings = - esp_encrypt_error_strings,.n_next_nodes = 1,.next_nodes = - { - [ESP_ENCRYPT_NEXT_DROP] = "error-drop",} -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_node, dpdk_esp_encrypt_node_fn) -/* - * ESP Encrypt Post Node - */ -#define foreach_esp_encrypt_post_error \ - _(PKTS, "ESP post pkts") - typedef enum - { -#define _(sym,str) ESP_ENCRYPT_POST_ERROR_##sym, - foreach_esp_encrypt_post_error -#undef _ - ESP_ENCRYPT_POST_N_ERROR, - } esp_encrypt_post_error_t; - - static char *esp_encrypt_post_error_strings[] = { -#define _(sym,string) string, - foreach_esp_encrypt_post_error -#undef _ - }; - -vlib_node_registration_t dpdk_esp_encrypt_post_node; - -static u8 * -format_esp_encrypt_post_trace (u8 * s, va_list * args) -{ - return s; -} - -static uword -dpdk_esp_encrypt_post_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, *from, *to_next = 0, next_index; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, next0; - vlib_buffer_t *b0 = 0; - - bi0 = from[0]; - from += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - to_next[0] = bi0; - to_next += 1; - - next0 = vnet_buffer (b0)->unused[0]; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, bi0, - next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, dpdk_esp_encrypt_post_node.index, - ESP_ENCRYPT_POST_ERROR_PKTS, - from_frame->n_vectors); - - return from_frame->n_vectors; -} - -VLIB_REGISTER_NODE (dpdk_esp_encrypt_post_node) = -{ - .function = dpdk_esp_encrypt_post_node_fn,.name = - "dpdk-esp-encrypt-post",.vector_size = sizeof (u32),.format_trace = - format_esp_encrypt_post_trace,.type = VLIB_NODE_TYPE_INTERNAL,.n_errors = - ARRAY_LEN (esp_encrypt_post_error_strings),.error_strings = - esp_encrypt_post_error_strings,.n_next_nodes = - ESP_ENCRYPT_N_NEXT,.next_nodes = - { -#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n, - foreach_esp_encrypt_next -#undef _ - } -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_post_node, - dpdk_esp_encrypt_post_node_fn) -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/ipsec.c b/src/vnet/devices/dpdk/ipsec/ipsec.c deleted file mode 100644 index 05c17c99..00000000 --- a/src/vnet/devices/dpdk/ipsec/ipsec.c +++ /dev/null @@ -1,430 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include - -#define DPDK_CRYPTO_NB_SESS_OBJS 20000 -#define DPDK_CRYPTO_CACHE_SIZE 512 -#define DPDK_CRYPTO_PRIV_SIZE 128 -#define DPDK_CRYPTO_N_QUEUE_DESC 1024 -#define DPDK_CRYPTO_NB_COPS (1024 * 4) - -static int -add_del_sa_sess (u32 sa_index, u8 is_add) -{ - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - crypto_worker_main_t *cwm; - u8 skip_master = vlib_num_workers () > 0; - - /* *INDENT-OFF* */ - vec_foreach (cwm, dcm->workers_main) - { - crypto_sa_session_t *sa_sess; - u8 is_outbound; - - if (skip_master) - { - skip_master = 0; - continue; - } - - for (is_outbound = 0; is_outbound < 2; is_outbound++) - { - if (is_add) - { - pool_get (cwm->sa_sess_d[is_outbound], sa_sess); - } - else - { - u8 dev_id; - - sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index); - dev_id = cwm->qp_data[sa_sess->qp_index].dev_id; - - if (!sa_sess->sess) - continue; - - if (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess)) - { - clib_warning("failed to free session"); - return -1; - } - memset(sa_sess, 0, sizeof(sa_sess[0])); - } - } - } - /* *INDENT-OFF* */ - - return 0; -} - -static void -update_qp_data (crypto_worker_main_t * cwm, - u8 cdev_id, u16 qp_id, u8 is_outbound, u16 * idx) -{ - crypto_qp_data_t *qpd; - - /* *INDENT-OFF* */ - vec_foreach_index (*idx, cwm->qp_data) - { - qpd = vec_elt_at_index(cwm->qp_data, *idx); - - if (qpd->dev_id == cdev_id && qpd->qp_id == qp_id && - qpd->is_outbound == is_outbound) - return; - } - /* *INDENT-ON* */ - - vec_add2 (cwm->qp_data, qpd, 1); - - qpd->dev_id = cdev_id; - qpd->qp_id = qp_id; - qpd->is_outbound = is_outbound; -} - -/* - * return: - * 0: already exist - * 1: mapped - */ -static int -add_mapping (crypto_worker_main_t * cwm, - u8 cdev_id, u16 qp, u8 is_outbound, - const struct rte_cryptodev_capabilities *cipher_cap, - const struct rte_cryptodev_capabilities *auth_cap) -{ - u16 qp_index; - uword key = 0, data, *ret; - crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; - - p_key->cipher_algo = (u8) cipher_cap->sym.cipher.algo; - p_key->auth_algo = (u8) auth_cap->sym.auth.algo; - p_key->is_outbound = is_outbound; - - ret = hash_get (cwm->algo_qp_map, key); - if (ret) - return 0; - - update_qp_data (cwm, cdev_id, qp, is_outbound, &qp_index); - - data = (uword) qp_index; - hash_set (cwm->algo_qp_map, key, data); - - return 1; -} - -/* - * return: - * 0: already exist - * 1: mapped - */ -static int -add_cdev_mapping (crypto_worker_main_t * cwm, - struct rte_cryptodev_info *dev_info, u8 cdev_id, - u16 qp, u8 is_outbound) -{ - const struct rte_cryptodev_capabilities *i, *j; - u32 mapped = 0; - - for (i = dev_info->capabilities; i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++) - { - if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER) - continue; - - if (check_algo_is_supported (i, NULL) != 0) - continue; - - for (j = dev_info->capabilities; j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; - j++) - { - if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH) - continue; - - if (check_algo_is_supported (j, NULL) != 0) - continue; - - mapped |= add_mapping (cwm, cdev_id, qp, is_outbound, i, j); - } - } - - return mapped; -} - -static int -check_cryptodev_queues () -{ - u32 n_qs = 0; - u8 cdev_id; - u32 n_req_qs = 2; - - if (vlib_num_workers () > 0) - n_req_qs = vlib_num_workers () * 2; - - for (cdev_id = 0; cdev_id < rte_cryptodev_count (); cdev_id++) - { - struct rte_cryptodev_info cdev_info; - - rte_cryptodev_info_get (cdev_id, &cdev_info); - - if (! - (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) - continue; - - n_qs += cdev_info.max_nb_queue_pairs; - } - - if (n_qs >= n_req_qs) - return 0; - else - return -1; -} - -static clib_error_t * -dpdk_ipsec_check_support (ipsec_sa_t * sa) -{ - if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) - { - if (sa->integ_alg != IPSEC_INTEG_ALG_NONE) - return clib_error_return (0, "unsupported integ-alg %U with " - "crypto-algo aes-gcm-128", - format_ipsec_integ_alg, sa->integ_alg); - sa->integ_alg = IPSEC_INTEG_ALG_AES_GCM_128; - } - else - { - if (sa->integ_alg == IPSEC_INTEG_ALG_NONE || - sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128) - return clib_error_return (0, "unsupported integ-alg %U", - format_ipsec_integ_alg, sa->integ_alg); - } - - return 0; -} - -static uword -dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, - vlib_frame_t * f) -{ - dpdk_config_main_t *conf = &dpdk_config_main; - ipsec_main_t *im = &ipsec_main; - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - struct rte_cryptodev_config dev_conf; - struct rte_cryptodev_qp_conf qp_conf; - struct rte_cryptodev_info cdev_info; - struct rte_mempool *rmp; - i32 dev_id, ret; - u32 i, skip_master; - - if (!conf->cryptodev) - { - clib_warning ("DPDK Cryptodev support is disabled, " - "default to OpenSSL IPsec"); - return 0; - } - - if (check_cryptodev_queues () < 0) - { - conf->cryptodev = 0; - clib_warning ("not enough Cryptodevs, default to OpenSSL IPsec"); - return 0; - } - - vec_alloc (dcm->workers_main, tm->n_vlib_mains); - _vec_len (dcm->workers_main) = tm->n_vlib_mains; - - fprintf (stdout, "DPDK Cryptodevs info:\n"); - fprintf (stdout, "dev_id\tn_qp\tnb_obj\tcache_size\n"); - /* HW cryptodevs have higher dev_id, use HW first */ - for (dev_id = rte_cryptodev_count () - 1; dev_id >= 0; dev_id--) - { - u16 max_nb_qp, qp = 0; - skip_master = vlib_num_workers () > 0; - - rte_cryptodev_info_get (dev_id, &cdev_info); - - if (! - (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING)) - continue; - - max_nb_qp = cdev_info.max_nb_queue_pairs; - - for (i = 0; i < tm->n_vlib_mains; i++) - { - u8 is_outbound; - crypto_worker_main_t *cwm; - uword *map; - - if (skip_master) - { - skip_master = 0; - continue; - } - - cwm = vec_elt_at_index (dcm->workers_main, i); - map = cwm->algo_qp_map; - - if (!map) - { - map = hash_create (0, sizeof (crypto_worker_qp_key_t)); - if (!map) - { - clib_warning ("unable to create hash table for worker %u", - vlib_mains[i]->cpu_index); - goto error; - } - cwm->algo_qp_map = map; - } - - for (is_outbound = 0; is_outbound < 2 && qp < max_nb_qp; - is_outbound++) - qp += add_cdev_mapping (cwm, &cdev_info, dev_id, qp, is_outbound); - } - - if (qp == 0) - continue; - - dev_conf.socket_id = rte_cryptodev_socket_id (dev_id); - dev_conf.nb_queue_pairs = cdev_info.max_nb_queue_pairs; - dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_SESS_OBJS; - dev_conf.session_mp.cache_size = DPDK_CRYPTO_CACHE_SIZE; - - ret = rte_cryptodev_configure (dev_id, &dev_conf); - if (ret < 0) - { - clib_warning ("cryptodev %u config error", dev_id); - goto error; - } - - qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC; - for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++) - { - ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf, - dev_conf.socket_id); - if (ret < 0) - { - clib_warning ("cryptodev %u qp %u setup error", dev_id, qp); - goto error; - } - } - vec_validate_aligned (dcm->cop_pools, dev_conf.socket_id, - CLIB_CACHE_LINE_BYTES); - - if (!vec_elt (dcm->cop_pools, dev_conf.socket_id)) - { - u8 *pool_name = format (0, "crypto_op_pool_socket%u%c", - dev_conf.socket_id, 0); - - rmp = rte_crypto_op_pool_create ((char *) pool_name, - RTE_CRYPTO_OP_TYPE_SYMMETRIC, - DPDK_CRYPTO_NB_COPS * - (1 + vlib_num_workers ()), - DPDK_CRYPTO_CACHE_SIZE, - DPDK_CRYPTO_PRIV_SIZE, - dev_conf.socket_id); - vec_free (pool_name); - - if (!rmp) - { - clib_warning ("failed to allocate mempool on socket %u", - dev_conf.socket_id); - goto error; - } - vec_elt (dcm->cop_pools, dev_conf.socket_id) = rmp; - } - - fprintf (stdout, "%u\t%u\t%u\t%u\n", dev_id, dev_conf.nb_queue_pairs, - DPDK_CRYPTO_NB_SESS_OBJS, DPDK_CRYPTO_CACHE_SIZE); - } - - dpdk_esp_init (); - - /* Add new next node and set as default */ - vlib_node_t *node, *next_node; - - next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-encrypt"); - ASSERT (next_node); - node = vlib_get_node_by_name (vm, (u8 *) "ipsec-output-ip4"); - ASSERT (node); - im->esp_encrypt_node_index = next_node->index; - im->esp_encrypt_next_index = - vlib_node_add_next (vm, node->index, next_node->index); - - next_node = vlib_get_node_by_name (vm, (u8 *) "dpdk-esp-decrypt"); - ASSERT (next_node); - node = vlib_get_node_by_name (vm, (u8 *) "ipsec-input-ip4"); - ASSERT (node); - im->esp_decrypt_node_index = next_node->index; - im->esp_decrypt_next_index = - vlib_node_add_next (vm, node->index, next_node->index); - - im->cb.check_support_cb = dpdk_ipsec_check_support; - im->cb.add_del_sa_sess_cb = add_del_sa_sess; - - if (vec_len (vlib_mains) == 0) - vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index, - VLIB_NODE_STATE_POLLING); - else - for (i = 1; i < tm->n_vlib_mains; i++) - vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index, - VLIB_NODE_STATE_POLLING); - - /* TODO cryptodev counters */ - - return 0; - -error: - ; - crypto_worker_main_t *cwm; - struct rte_mempool **mp; - /* *INDENT-OFF* */ - vec_foreach (cwm, dcm->workers_main) - hash_free (cwm->algo_qp_map); - - vec_foreach (mp, dcm->cop_pools) - { - if (mp) - rte_mempool_free (mp[0]); - } - /* *INDENT-ON* */ - vec_free (dcm->workers_main); - vec_free (dcm->cop_pools); - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_ipsec_process_node,static) = { - .function = dpdk_ipsec_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "dpdk-ipsec-process", - .process_log2_n_stack_bytes = 17, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/ipsec/ipsec.h b/src/vnet/devices/dpdk/ipsec/ipsec.h deleted file mode 100644 index 3465b361..00000000 --- a/src/vnet/devices/dpdk/ipsec/ipsec.h +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Copyright (c) 2016 Intel and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __DPDK_IPSEC_H__ -#define __DPDK_IPSEC_H__ - -#include - -#undef always_inline -#include -#include - -#if CLIB_DEBUG > 0 -#define always_inline static inline -#else -#define always_inline static inline __attribute__ ((__always_inline__)) -#endif - - -#define MAX_QP_PER_LCORE 16 - -typedef struct -{ - u32 salt; - u32 iv[2]; - u32 cnt; -} dpdk_gcm_cnt_blk; - -typedef struct -{ - dpdk_gcm_cnt_blk cb; - union - { - u8 aad[12]; - u8 icv[64]; - }; -} dpdk_cop_priv_t; - -typedef struct -{ - u8 cipher_algo; - u8 auth_algo; - u8 is_outbound; -} crypto_worker_qp_key_t; - -typedef struct -{ - u16 dev_id; - u16 qp_id; - u16 is_outbound; - i16 inflights; - u32 bi[VLIB_FRAME_SIZE]; - struct rte_crypto_op *cops[VLIB_FRAME_SIZE]; - struct rte_crypto_op **free_cops; -} crypto_qp_data_t; - -typedef struct -{ - u8 qp_index; - void *sess; -} crypto_sa_session_t; - -typedef struct -{ - crypto_sa_session_t *sa_sess_d[2]; - crypto_qp_data_t *qp_data; - uword *algo_qp_map; -} crypto_worker_main_t; - -typedef struct -{ - struct rte_mempool **cop_pools; - crypto_worker_main_t *workers_main; -} dpdk_crypto_main_t; - -dpdk_crypto_main_t dpdk_crypto_main; - -extern vlib_node_registration_t dpdk_crypto_input_node; - -#define CRYPTO_N_FREE_COPS (VLIB_FRAME_SIZE * 3) - -static_always_inline void -crypto_alloc_cops () -{ - dpdk_crypto_main_t *dcm = &dpdk_crypto_main; - u32 cpu_index = os_get_cpu_number (); - crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index]; - unsigned socket_id = rte_socket_id (); - crypto_qp_data_t *qpd; - - /* *INDENT-OFF* */ - vec_foreach (qpd, cwm->qp_data) - { - u32 l = vec_len (qpd->free_cops); - - if (PREDICT_FALSE (l < VLIB_FRAME_SIZE)) - { - u32 n_alloc; - - if (PREDICT_FALSE (!qpd->free_cops)) - vec_alloc (qpd->free_cops, CRYPTO_N_FREE_COPS); - - n_alloc = rte_crypto_op_bulk_alloc (dcm->cop_pools[socket_id], - RTE_CRYPTO_OP_TYPE_SYMMETRIC, - &qpd->free_cops[l], - CRYPTO_N_FREE_COPS - l - 1); - - _vec_len (qpd->free_cops) = l + n_alloc; - } - } - /* *INDENT-ON* */ -} - -static_always_inline void -crypto_free_cop (crypto_qp_data_t * qpd, struct rte_crypto_op **cops, u32 n) -{ - u32 l = vec_len (qpd->free_cops); - - if (l + n >= CRYPTO_N_FREE_COPS) - { - l -= VLIB_FRAME_SIZE; - rte_mempool_put_bulk (cops[0]->mempool, - (void **) &qpd->free_cops[l], VLIB_FRAME_SIZE); - } - clib_memcpy (&qpd->free_cops[l], cops, sizeof (*cops) * n); - - _vec_len (qpd->free_cops) = l + n; -} - -static_always_inline int -check_algo_is_supported (const struct rte_cryptodev_capabilities *cap, - char *name) -{ - struct - { - uint8_t cipher_algo; - enum rte_crypto_sym_xform_type type; - union - { - enum rte_crypto_auth_algorithm auth; - enum rte_crypto_cipher_algorithm cipher; - }; - char *name; - } supported_algo[] = - { - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_NULL,.name = "NULL"}, - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_AES_CBC,.name = "AES_CBC"}, - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_AES_CTR,.name = "AES_CTR"}, - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_3DES_CBC,.name = "3DES-CBC"}, - { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_SHA1_HMAC,.name = "HMAC-SHA1"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_SHA256_HMAC,.name = "HMAC-SHA256"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_SHA384_HMAC,.name = "HMAC-SHA384"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_SHA512_HMAC,.name = "HMAC-SHA512"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_AES_XCBC_MAC,.name = "AES-XCBC-MAC"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_AES_GCM,.name = "AES-GCM"}, - { - /* tail */ - .type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED},}; - uint32_t i = 0; - - if (cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC) - return -1; - - while (supported_algo[i].type != RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED) - { - if (cap->sym.xform_type == supported_algo[i].type) - { - if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER && - cap->sym.cipher.algo == supported_algo[i].cipher) || - (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH && - cap->sym.auth.algo == supported_algo[i].auth)) - { - if (name) - strcpy (name, supported_algo[i].name); - return 0; - } - } - - i++; - } - - return -1; -} - -#endif /* __DPDK_IPSEC_H__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/main.c b/src/vnet/devices/dpdk/main.c deleted file mode 100644 index 9ea3aa04..00000000 --- a/src/vnet/devices/dpdk/main.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - - -/* - * Called by the dpdk driver's rte_delay_us() function. - * Return 0 to have the dpdk do a regular delay loop. - * Return 1 if to skip the delay loop because we are suspending - * the calling vlib process instead. - */ -int -rte_delay_us_override (unsigned us) -{ - vlib_main_t *vm; - - /* Don't bother intercepting for short delays */ - if (us < 10) - return 0; - - /* - * Only intercept if we are in a vlib process. - * If we are called from a vlib worker thread or the vlib main - * thread then do not intercept. (Must not be called from an - * independent pthread). - */ - if (os_get_cpu_number () == 0) - { - /* - * We're in the vlib main thread or a vlib process. Make sure - * the process is running and we're not still initializing. - */ - vm = vlib_get_main (); - if (vlib_in_process_context (vm)) - { - /* Only suspend for the admin_down_process */ - vlib_process_t *proc = vlib_get_current_process (vm); - if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) || - (proc->node_runtime.function != admin_up_down_process)) - return 0; - - f64 delay = 1e-6 * us; - vlib_process_suspend (vm, delay); - return 1; - } - } - return 0; // no override -} - -static void -rte_delay_us_override_cb (unsigned us) -{ - if (rte_delay_us_override (us) == 0) - rte_delay_us_block (us); -} - -static clib_error_t * dpdk_main_init (vlib_main_t * vm) -{ - clib_error_t * error = 0; - - if ((error = vlib_call_init_function (vm, dpdk_init))) - return error; - - /* register custom delay function */ - rte_delay_us_callback_register (rte_delay_us_override_cb); - - return error; -} - -VLIB_INIT_FUNCTION (dpdk_main_init); - diff --git a/src/vnet/devices/dpdk/node.c b/src/vnet/devices/dpdk/node.c deleted file mode 100644 index 0d64ae08..00000000 --- a/src/vnet/devices/dpdk/node.c +++ /dev/null @@ -1,674 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dpdk_priv.h" - -static char *dpdk_error_strings[] = { -#define _(n,s) s, - foreach_dpdk_error -#undef _ -}; - -always_inline int -vlib_buffer_is_ip4 (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) b->data; - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); -} - -always_inline int -vlib_buffer_is_ip6 (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) b->data; - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); -} - -always_inline int -vlib_buffer_is_mpls (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) b->data; - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); -} - -always_inline u32 -dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0) -{ - if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0))) - if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0)) - return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; - else - return VNET_DEVICE_INPUT_NEXT_IP4_INPUT; - else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0))) - return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; - else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) - return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; - else - return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; -} - -always_inline int -dpdk_mbuf_is_vlan (struct rte_mbuf *mb) -{ - return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) == - RTE_PTYPE_L2_ETHER_VLAN; -} - -always_inline int -dpdk_mbuf_is_ip4 (struct rte_mbuf *mb) -{ - return RTE_ETH_IS_IPV4_HDR (mb->packet_type) != 0; -} - -always_inline int -dpdk_mbuf_is_ip6 (struct rte_mbuf *mb) -{ - return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0; -} - -always_inline u32 -dpdk_rx_next_from_mb (struct rte_mbuf * mb, vlib_buffer_t * b0) -{ - if (PREDICT_FALSE (dpdk_mbuf_is_vlan (mb))) - return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb))) - return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; - else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb))) - return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; - else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) - return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; - else - return dpdk_rx_next_from_etype (mb, b0); -} - -always_inline void -dpdk_rx_error_from_mb (struct rte_mbuf *mb, u32 * next, u8 * error) -{ - if (mb->ol_flags & PKT_RX_IP_CKSUM_BAD) - { - *error = DPDK_ERROR_IP_CHECKSUM_ERROR; - *next = VNET_DEVICE_INPUT_NEXT_DROP; - } - else - *error = DPDK_ERROR_NONE; -} - -void -dpdk_rx_trace (dpdk_main_t * dm, - vlib_node_runtime_t * node, - dpdk_device_t * xd, - u16 queue_id, u32 * buffers, uword n_buffers) -{ - vlib_main_t *vm = vlib_get_main (); - u32 *b, n_left; - u32 next0; - - n_left = n_buffers; - b = buffers; - - while (n_left >= 1) - { - u32 bi0; - vlib_buffer_t *b0; - dpdk_rx_dma_trace_t *t0; - struct rte_mbuf *mb; - u8 error0; - - bi0 = b[0]; - n_left -= 1; - - b0 = vlib_get_buffer (vm, bi0); - mb = rte_mbuf_from_vlib_buffer (b0); - - if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) - next0 = xd->per_interface_next_index; - else if (PREDICT_TRUE - ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) - next0 = dpdk_rx_next_from_mb (mb, b0); - else - next0 = dpdk_rx_next_from_etype (mb, b0); - - dpdk_rx_error_from_mb (mb, &next0, &error0); - - vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); - t0->queue_index = queue_id; - t0->device_index = xd->device_index; - t0->buffer_index = bi0; - - clib_memcpy (&t0->mb, mb, sizeof (t0->mb)); - clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); - clib_memcpy (t0->buffer.pre_data, b0->data, - sizeof (t0->buffer.pre_data)); - clib_memcpy (&t0->data, mb->buf_addr + mb->data_off, sizeof (t0->data)); - - b += 1; - } -} - -static inline u32 -dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id) -{ - u32 n_buffers; - u32 n_left; - u32 n_this_chunk; - - n_left = VLIB_FRAME_SIZE; - n_buffers = 0; - - if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) - { - while (n_left) - { - n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id, - xd->rx_vectors[queue_id] + - n_buffers, n_left); - n_buffers += n_this_chunk; - n_left -= n_this_chunk; - - /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */ - if (n_this_chunk < 32) - break; - } - } - else - { - ASSERT (0); - } - - return n_buffers; -} - - -static_always_inline void -dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, - struct rte_mbuf *mb, vlib_buffer_free_list_t * fl) -{ - u8 nb_seg = 1; - struct rte_mbuf *mb_seg = 0; - vlib_buffer_t *b_seg, *b_chain = 0; - mb_seg = mb->next; - b_chain = b; - - while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) - { - ASSERT (mb_seg != 0); - - b_seg = vlib_buffer_from_rte_mbuf (mb_seg); - vlib_buffer_init_for_free_list (b_seg, fl); - - ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - ASSERT (b_seg->current_data == 0); - - /* - * The driver (e.g. virtio) may not put the packet data at the start - * of the segment, so don't assume b_seg->current_data == 0 is correct. - */ - b_seg->current_data = - (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data; - - b_seg->current_length = mb_seg->data_len; - b->total_length_not_including_first_buffer += mb_seg->data_len; - - b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT; - b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg); - - b_chain = b_seg; - mb_seg = mb_seg->next; - nb_seg++; - } -} - -static_always_inline void -dpdk_prefetch_buffer (struct rte_mbuf *mb) -{ - vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); - CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE); -} - -/* - * This function is used when there are no worker threads. - * The main thread performs IO and forwards the packets. - */ -static_always_inline u32 -dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, - vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id) -{ - u32 n_buffers; - u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - u32 n_left_to_next, *to_next; - u32 mb_index; - vlib_main_t *vm = vlib_get_main (); - uword n_rx_bytes = 0; - u32 n_trace, trace_cnt __attribute__ ((unused)); - vlib_buffer_free_list_t *fl; - u32 buffer_flags_template; - - if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) - return 0; - - n_buffers = dpdk_rx_burst (dm, xd, queue_id); - - if (n_buffers == 0) - { - return 0; - } - - buffer_flags_template = dm->buffer_flags_template; - - vec_reset_length (xd->d_trace_buffers[cpu_index]); - trace_cnt = n_trace = vlib_get_trace_count (vm, node); - - if (n_trace > 0) - { - u32 n = clib_min (n_trace, n_buffers); - mb_index = 0; - - while (n--) - { - struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++]; - vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); - vec_add1 (xd->d_trace_buffers[cpu_index], - vlib_get_buffer_index (vm, b)); - } - } - - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - - mb_index = 0; - - while (n_buffers > 0) - { - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 bi0, next0, l3_offset0; - u32 bi1, next1, l3_offset1; - u32 bi2, next2, l3_offset2; - u32 bi3, next3, l3_offset3; - u8 error0, error1, error2, error3; - u64 or_ol_flags; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_buffers > 8 && n_left_to_next > 4) - { - struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; - struct rte_mbuf *mb1 = xd->rx_vectors[queue_id][mb_index + 1]; - struct rte_mbuf *mb2 = xd->rx_vectors[queue_id][mb_index + 2]; - struct rte_mbuf *mb3 = xd->rx_vectors[queue_id][mb_index + 3]; - - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 4]); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 5]); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 6]); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 7]); - - if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) - { - if (PREDICT_FALSE (mb0->nb_segs > 1)) - dpdk_prefetch_buffer (mb0->next); - if (PREDICT_FALSE (mb1->nb_segs > 1)) - dpdk_prefetch_buffer (mb1->next); - if (PREDICT_FALSE (mb2->nb_segs > 1)) - dpdk_prefetch_buffer (mb2->next); - if (PREDICT_FALSE (mb3->nb_segs > 1)) - dpdk_prefetch_buffer (mb3->next); - } - - ASSERT (mb0); - ASSERT (mb1); - ASSERT (mb2); - ASSERT (mb3); - - or_ol_flags = (mb0->ol_flags | mb1->ol_flags | - mb2->ol_flags | mb3->ol_flags); - b0 = vlib_buffer_from_rte_mbuf (mb0); - b1 = vlib_buffer_from_rte_mbuf (mb1); - b2 = vlib_buffer_from_rte_mbuf (mb2); - b3 = vlib_buffer_from_rte_mbuf (mb3); - - vlib_buffer_init_for_free_list (b0, fl); - vlib_buffer_init_for_free_list (b1, fl); - vlib_buffer_init_for_free_list (b2, fl); - vlib_buffer_init_for_free_list (b3, fl); - - bi0 = vlib_get_buffer_index (vm, b0); - bi1 = vlib_get_buffer_index (vm, b1); - bi2 = vlib_get_buffer_index (vm, b2); - bi3 = vlib_get_buffer_index (vm, b3); - - to_next[0] = bi0; - to_next[1] = bi1; - to_next[2] = bi2; - to_next[3] = bi3; - to_next += 4; - n_left_to_next -= 4; - - if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) - { - next0 = next1 = next2 = next3 = xd->per_interface_next_index; - } - else if (PREDICT_TRUE - ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) - { - next0 = dpdk_rx_next_from_mb (mb0, b0); - next1 = dpdk_rx_next_from_mb (mb1, b1); - next2 = dpdk_rx_next_from_mb (mb2, b2); - next3 = dpdk_rx_next_from_mb (mb3, b3); - } - else - { - next0 = dpdk_rx_next_from_etype (mb0, b0); - next1 = dpdk_rx_next_from_etype (mb1, b1); - next2 = dpdk_rx_next_from_etype (mb2, b2); - next3 = dpdk_rx_next_from_etype (mb3, b3); - } - - if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD)) - { - dpdk_rx_error_from_mb (mb0, &next0, &error0); - dpdk_rx_error_from_mb (mb1, &next1, &error1); - dpdk_rx_error_from_mb (mb2, &next2, &error2); - dpdk_rx_error_from_mb (mb3, &next3, &error3); - b0->error = node->errors[error0]; - b1->error = node->errors[error1]; - b2->error = node->errors[error2]; - b3->error = node->errors[error3]; - } - else - { - b0->error = b1->error = node->errors[DPDK_ERROR_NONE]; - b2->error = b3->error = node->errors[DPDK_ERROR_NONE]; - } - - l3_offset0 = device_input_next_node_advance[next0]; - l3_offset1 = device_input_next_node_advance[next1]; - l3_offset2 = device_input_next_node_advance[next2]; - l3_offset3 = device_input_next_node_advance[next3]; - - b0->current_data = l3_offset0 + mb0->data_off; - b1->current_data = l3_offset1 + mb1->data_off; - b2->current_data = l3_offset2 + mb2->data_off; - b3->current_data = l3_offset3 + mb3->data_off; - - b0->current_data -= RTE_PKTMBUF_HEADROOM; - b1->current_data -= RTE_PKTMBUF_HEADROOM; - b2->current_data -= RTE_PKTMBUF_HEADROOM; - b3->current_data -= RTE_PKTMBUF_HEADROOM; - - b0->current_length = mb0->data_len - l3_offset0; - b1->current_length = mb1->data_len - l3_offset1; - b2->current_length = mb2->data_len - l3_offset2; - b3->current_length = mb3->data_len - l3_offset3; - - b0->flags = buffer_flags_template; - b1->flags = buffer_flags_template; - b2->flags = buffer_flags_template; - b3->flags = buffer_flags_template; - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b2)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b3)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b2)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b3)->sw_if_index[VLIB_TX] = (u32) ~ 0; - - n_rx_bytes += mb0->pkt_len; - n_rx_bytes += mb1->pkt_len; - n_rx_bytes += mb2->pkt_len; - n_rx_bytes += mb3->pkt_len; - - /* Process subsequent segments of multi-segment packets */ - if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) - { - dpdk_process_subseq_segs (vm, b0, mb0, fl); - dpdk_process_subseq_segs (vm, b1, mb1, fl); - dpdk_process_subseq_segs (vm, b2, mb2, fl); - dpdk_process_subseq_segs (vm, b3, mb3, fl); - } - - /* - * Turn this on if you run into - * "bad monkey" contexts, and you want to know exactly - * which nodes they've visited... See main.c... - */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3); - - /* Do we have any driver RX features configured on the interface? */ - vnet_feature_start_device_input_x4 (xd->vlib_sw_if_index, - &next0, &next1, &next2, &next3, - b0, b1, b2, b3, - l3_offset0, l3_offset1, - l3_offset2, l3_offset3); - - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, bi2, bi3, - next0, next1, next2, next3); - n_buffers -= 4; - mb_index += 4; - } - while (n_buffers > 0 && n_left_to_next > 0) - { - struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; - - ASSERT (mb0); - - b0 = vlib_buffer_from_rte_mbuf (mb0); - - /* Prefetch one next segment if it exists. */ - if (PREDICT_FALSE (mb0->nb_segs > 1)) - dpdk_prefetch_buffer (mb0->next); - - vlib_buffer_init_for_free_list (b0, fl); - - bi0 = vlib_get_buffer_index (vm, b0); - - to_next[0] = bi0; - to_next++; - n_left_to_next--; - - if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) - next0 = xd->per_interface_next_index; - else if (PREDICT_TRUE - ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) - next0 = dpdk_rx_next_from_mb (mb0, b0); - else - next0 = dpdk_rx_next_from_etype (mb0, b0); - - dpdk_rx_error_from_mb (mb0, &next0, &error0); - b0->error = node->errors[error0]; - - l3_offset0 = device_input_next_node_advance[next0]; - - b0->current_data = l3_offset0; - b0->current_data += mb0->data_off - RTE_PKTMBUF_HEADROOM; - b0->current_length = mb0->data_len - l3_offset0; - - b0->flags = buffer_flags_template; - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - n_rx_bytes += mb0->pkt_len; - - /* Process subsequent segments of multi-segment packets */ - dpdk_process_subseq_segs (vm, b0, mb0, fl); - - /* - * Turn this on if you run into - * "bad monkey" contexts, and you want to know exactly - * which nodes they've visited... See main.c... - */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - - /* Do we have any driver RX features configured on the interface? */ - vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0, - b0, l3_offset0); - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - n_buffers--; - mb_index++; - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0)) - { - dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index], - vec_len (xd->d_trace_buffers[cpu_index])); - vlib_set_trace_count (vm, node, n_trace - - vec_len (xd->d_trace_buffers[cpu_index])); - } - - vlib_increment_combined_counter - (vnet_get_main ()->interface_main.combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, - cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes); - - vnet_device_increment_rx_packets (cpu_index, mb_index); - - return mb_index; -} - -static inline void -poll_rate_limit (dpdk_main_t * dm) -{ - /* Limit the poll rate by sleeping for N msec between polls */ - if (PREDICT_FALSE (dm->poll_sleep != 0)) - { - struct timespec ts, tsrem; - - ts.tv_sec = 0; - ts.tv_nsec = 1000 * 1000 * dm->poll_sleep; /* 1ms */ - - while (nanosleep (&ts, &tsrem) < 0) - { - ts = tsrem; - } - } -} - -/** \brief Main DPDK input node - @node dpdk-input - - This is the main DPDK input node: across each assigned interface, - call rte_eth_rx_burst(...) or similar to obtain a vector of - packets to process. Handle early packet discard. Derive @c - vlib_buffer_t metadata from struct rte_mbuf metadata, - Depending on the resulting metadata: adjust b->current_data, - b->current_length and dispatch directly to - ip4-input-no-checksum, or ip6-input. Trace the packet if required. - - @param vm vlib_main_t corresponding to the current thread - @param node vlib_node_runtime_t - @param f vlib_frame_t input-node, not used. - - @par Graph mechanics: buffer metadata, next index usage - - @em Uses: - - struct rte_mbuf mb->ol_flags - - PKT_RX_IP_CKSUM_BAD - - RTE_ETH_IS_xxx_HDR(mb->packet_type) - - packet classification result - - @em Sets: - - b->error if the packet is to be dropped immediately - - b->current_data, b->current_length - - adjusted as needed to skip the L2 header in direct-dispatch cases - - vnet_buffer(b)->sw_if_index[VLIB_RX] - - rx interface sw_if_index - - vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0 - - required by ipX-lookup - - b->flags - - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc. - - Next Nodes: - - Static arcs to: error-drop, ethernet-input, - ip4-input-no-checksum, ip6-input, mpls-input - - per-interface redirection, controlled by - xd->per_interface_next_index -*/ - -static uword -dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - uword n_rx_packets = 0; - dpdk_device_and_queue_t *dq; - u32 cpu_index = os_get_cpu_number (); - - /* - * Poll all devices on this cpu for input/interrupts. - */ - /* *INDENT-OFF* */ - vec_foreach (dq, dm->devices_by_cpu[cpu_index]) - { - xd = vec_elt_at_index(dm->devices, dq->device); - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id); - } - /* *INDENT-ON* */ - - poll_rate_limit (dm); - - return n_rx_packets; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_input_node) = { - .function = dpdk_input, - .type = VLIB_NODE_TYPE_INPUT, - .name = "dpdk-input", - .sibling_of = "device-input", - - /* Will be enabled if/when hardware is detected. */ - .state = VLIB_NODE_STATE_DISABLED, - - .format_buffer = format_ethernet_header_with_length, - .format_trace = format_dpdk_rx_dma_trace, - - .n_errors = DPDK_N_ERROR, - .error_strings = dpdk_error_strings, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input); -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/dpdk/qos_doc.md b/src/vnet/devices/dpdk/qos_doc.md deleted file mode 100644 index 7c064246..00000000 --- a/src/vnet/devices/dpdk/qos_doc.md +++ /dev/null @@ -1,411 +0,0 @@ -# QoS Hierarchical Scheduler {#qos_doc} - -The Quality-of-Service (QoS) scheduler performs egress-traffic management by -prioritizing the transmission of the packets of different type services and -subcribers based on the Service Level Agreements (SLAs). The QoS scheduler can -be enabled on one or more NIC output interfaces depending upon the -requirement. - - -## Overview - -The QoS schdeuler supports a number of scheduling and shaping levels which -construct hierarchical-tree. The first level in the hierarchy is port (i.e. -the physical interface) that constitutes the root node of the tree. The -subsequent level is subport which represents the group of the -users/subscribers. The individual user/subscriber is represented by the pipe -at the next level. Each user can have different traffic type based on the -criteria of specific loss rate, jitter, and latency. These traffic types are -represented at the traffic-class level in the form of different traffic- -classes. The last level contains number of queues which are grouped together -to host the packets of the specific class type traffic. - -The QoS scheduler implementation requires flow classification, enqueue and -dequeue operations. The flow classification is mandatory stage for HQoS where -incoming packets are classified by mapping the packet fields information to -5-tuple (HQoS subport, pipe, traffic class, queue within traffic class, and -color) and storing that information in mbuf sched field. The enqueue operation -uses this information to determine the queue for storing the packet, and at -this stage, if the specific queue is full, QoS drops the packet. The dequeue -operation consists of scheduling the packet based on its length and available -credits, and handing over the scheduled packet to the output interface. - -For more information on QoS Scheduler, please refer DPDK Programmer's Guide- -http://dpdk.org/doc/guides/prog_guide/qos_framework.html - - -### QoS Schdeuler Parameters - -Following illustrates the default HQoS configuration for each 10GbE output -port: - -Single subport (subport 0): - - Subport rate set to 100% of port rate - - Each of the 4 traffic classes has rate set to 100% of port rate - -4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration: - - Pipe rate set to 1/4K of port rate - - Each of the 4 traffic classes has rate set to 100% of pipe rate - - Within each traffic class, the byte-level WRR weights for the 4 queues are set to 1:1:1:1 - - -#### Port configuration - -``` -port { - rate 1250000000 /* Assuming 10GbE port */ - frame_overhead 24 /* Overhead fields per Ethernet frame: - * 7B (Preamble) + - * 1B (Start of Frame Delimiter (SFD)) + - * 4B (Frame Check Sequence (FCS)) + - * 12B (Inter Frame Gap (IFG)) - */ - mtu 1522 /* Assuming Ethernet/IPv4 pkt (FCS not included) */ - n_subports_per_port 1 /* Number of subports per output interface */ - n_pipes_per_subport 4096 /* Number of pipes (users/subscribers) */ - queue_sizes 64 64 64 64 /* Packet queue size for each traffic class. - * All queues within the same pipe traffic class - * have the same size. Queues from different - * pipes serving the same traffic class have - * the same size. */ -} -``` - - -#### Subport configuration - -``` -subport 0 { - tb_rate 1250000000 /* Subport level token bucket rate (bytes per second) */ - tb_size 1000000 /* Subport level token bucket size (bytes) */ - tc0_rate 1250000000 /* Subport level token bucket rate for traffic class 0 (bytes per second) */ - tc1_rate 1250000000 /* Subport level token bucket rate for traffic class 1 (bytes per second) */ - tc2_rate 1250000000 /* Subport level token bucket rate for traffic class 2 (bytes per second) */ - tc3_rate 1250000000 /* Subport level token bucket rate for traffic class 3 (bytes per second) */ - tc_period 10 /* Time interval for refilling the token bucket associated with traffic class (Milliseconds) */ - pipe 0 4095 profile 0 /* pipes (users/subscribers) configured with pipe profile 0 */ -} -``` - - -#### Pipe configuration - -``` -pipe_profile 0 { - tb_rate 305175 /* Pipe level token bucket rate (bytes per second) */ - tb_size 1000000 /* Pipe level token bucket size (bytes) */ - tc0_rate 305175 /* Pipe level token bucket rate for traffic class 0 (bytes per second) */ - tc1_rate 305175 /* Pipe level token bucket rate for traffic class 1 (bytes per second) */ - tc2_rate 305175 /* Pipe level token bucket rate for traffic class 2 (bytes per second) */ - tc3_rate 305175 /* Pipe level token bucket rate for traffic class 3 (bytes per second) */ - tc_period 40 /* Time interval for refilling the token bucket associated with traffic class at pipe level (Milliseconds) */ - tc3_oversubscription_weight 1 /* Weight traffic class 3 oversubscription */ - tc0_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 0 */ - tc1_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 1 */ - tc2_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 2 */ - tc3_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 3 */ -} -``` - - -#### Random Early Detection (RED) parameters per traffic class and color (Green / Yellow / Red) - -``` -red { - tc0_wred_min 48 40 32 /* Minimum threshold for traffic class 0 queue (min_th) in number of packets */ - tc0_wred_max 64 64 64 /* Maximum threshold for traffic class 0 queue (max_th) in number of packets */ - tc0_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 0 queue (maxp = 1 / maxp_inv) */ - tc0_wred_weight 9 9 9 /* Traffic Class 0 queue weight */ - tc1_wred_min 48 40 32 /* Minimum threshold for traffic class 1 queue (min_th) in number of packets */ - tc1_wred_max 64 64 64 /* Maximum threshold for traffic class 1 queue (max_th) in number of packets */ - tc1_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 1 queue (maxp = 1 / maxp_inv) */ - tc1_wred_weight 9 9 9 /* Traffic Class 1 queue weight */ - tc2_wred_min 48 40 32 /* Minimum threshold for traffic class 2 queue (min_th) in number of packets */ - tc2_wred_max 64 64 64 /* Maximum threshold for traffic class 2 queue (max_th) in number of packets */ - tc2_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 2 queue (maxp = 1 / maxp_inv) */ - tc2_wred_weight 9 9 9 /* Traffic Class 2 queue weight */ - tc3_wred_min 48 40 32 /* Minimum threshold for traffic class 3 queue (min_th) in number of packets */ - tc3_wred_max 64 64 64 /* Maximum threshold for traffic class 3 queue (max_th) in number of packets */ - tc3_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 3 queue (maxp = 1 / maxp_inv) */ - tc3_wred_weight 9 9 9 /* Traffic Class 3 queue weight */ -} -``` - - -### DPDK QoS Scheduler Integration in VPP - -The Hierarchical Quaity-of-Service (HQoS) scheduler object could be seen as -part of the logical NIC output interface. To enable HQoS on specific output -interface, vpp startup.conf file has to be configured accordingly. The output -interface that requires HQoS, should have "hqos" parameter specified in dpdk -section. Another optional parameter "hqos-thread" has been defined which can -be used to associate the output interface with specific hqos thread. In cpu -section of the config file, "corelist-hqos-threads" is introduced to assign -logical cpu cores to run the HQoS threads. A HQoS thread can run multiple HQoS -objects each associated with different output interfaces. All worker threads -instead of writing packets to NIC TX queue directly, write the packets to a -software queues. The hqos_threads read the software queues, and enqueue the -packets to HQoS objects, as well as dequeue packets from HQOS objects and -write them to NIC output interfaces. The worker threads need to be able to -send the packets to any output interface, therefore, each HQoS object -associated with NIC output interface should have software queues equal to -worker threads count. - -Following illustrates the sample startup configuration file with 4x worker -threads feeding 2x hqos threads that handle each QoS scheduler for 1x output -interface. - -``` -dpdk { - socket-mem 16384,16384 - - dev 0000:02:00.0 { - num-rx-queues 2 - hqos - } - dev 0000:06:00.0 { - num-rx-queues 2 - hqos - } - - num-mbufs 1000000 -} - -cpu { - main-core 0 - corelist-workers 1, 2, 3, 4 - corelist-hqos-threads 5, 6 -} -``` - - -### QoS scheduler CLI Commands - -Each QoS scheduler instance is initialised with default parameters required to -configure hqos port, subport, pipe and queues. Some of the parameters can be -re-configured in run-time through CLI commands. - - -#### Configuration - -Following commands can be used to configure QoS scheduler parameters. - -The command below can be used to set the subport level parameters such as -token bucket rate (bytes per seconds), token bucket size (bytes), traffic -class rates (bytes per seconds) and token update period (Milliseconds). - -``` -set dpdk interface hqos subport subport [rate ] - [bktsize ] [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] -``` - -For setting the pipe profile, following command can be used. - -``` -set dpdk interface hqos pipe subport pipe - profile -``` - -To assign QoS scheduler instance to the specific thread, following command can -be used. - -``` -set dpdk interface hqos placement thread -``` - -The command below is used to set the packet fields required for classifiying -the incoming packet. As a result of classification process, packet field -information will be mapped to 5 tuples (subport, pipe, traffic class, pipe, -color) and stored in packet mbuf. - -``` -set dpdk interface hqos pktfield id subport|pipe|tc offset - mask -``` - -The DSCP table entries used for idenfiying the traffic class and queue can be set using the command below; - -``` -set dpdk interface hqos tctbl entry tc queue -``` - - -#### Show Command - -The QoS Scheduler configuration can displayed using the command below. - -``` - vpp# show dpdk interface hqos TenGigabitEthernet2/0/0 - Thread: - Input SWQ size = 4096 packets - Enqueue burst size = 256 packets - Dequeue burst size = 220 packets - Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000 (subport) - Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000 (pipe) - Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc (tc) - Packet field 2 tc translation table: ([Mapped Value Range]: tc/queue tc/queue ...) - [ 0 .. 15]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - [16 .. 31]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - [32 .. 47]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - [48 .. 63]: 0/0 0/1 0/2 0/3 1/0 1/1 1/2 1/3 2/0 2/1 2/2 2/3 3/0 3/1 3/2 3/3 - Port: - Rate = 1250000000 bytes/second - MTU = 1514 bytes - Frame overhead = 24 bytes - Number of subports = 1 - Number of pipes per subport = 4096 - Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets - Number of pipe profiles = 1 - Subport 0: - Rate = 120000000 bytes/second - Token bucket size = 1000000 bytes - Traffic class rate: TC0 = 120000000, TC1 = 120000000, TC2 = 120000000, TC3 = 120000000 bytes/second - TC period = 10 milliseconds - Pipe profile 0: - Rate = 305175 bytes/second - Token bucket size = 1000000 bytes - Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second - TC period = 40 milliseconds - TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 - TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1 -``` - -The QoS Scheduler placement over the logical cpu cores can be displayed using -below command. - -``` - vpp# show dpdk interface hqos placement - Thread 5 (vpp_hqos-threads_0 at lcore 5): - TenGigabitEthernet2/0/0 queue 0 - Thread 6 (vpp_hqos-threads_1 at lcore 6): - TenGigabitEthernet4/0/1 queue 0 -``` - - -### QoS Scheduler Binary APIs - -This section explans the available binary APIs for configuring QoS scheduler -parameters in run-time. - -The following API can be used to set the pipe profile of a pipe that belongs -to a given subport: - -``` -sw_interface_set_dpdk_hqos_pipe rx | sw_if_index - subport pipe profile -``` - -The data structures used for set the pipe profile parameter are as follows; - -``` - /** \\brief DPDK interface HQoS pipe profile set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param pipe - pipe ID within its subport - @param profile - pipe profile ID - */ - define sw_interface_set_dpdk_hqos_pipe { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 pipe; - u32 profile; - }; - - /** \\brief DPDK interface HQoS pipe profile set reply - @param context - sender context, to match reply w/ request - @param retval - request return code - */ - define sw_interface_set_dpdk_hqos_pipe_reply { - u32 context; - i32 retval; - }; -``` - -The following API can be used to set the subport level parameters, for -example- token bucket rate (bytes per seconds), token bucket size (bytes), -traffic class rate (bytes per seconds) and tokens update period. - -``` -sw_interface_set_dpdk_hqos_subport rx | sw_if_index - subport [rate ] [bktsize ] - [tc0 ] [tc1 ] [tc2 ] [tc3 ] [period ] -``` - -The data structures used for set the subport level parameter are as follows; - -``` - /** \\brief DPDK interface HQoS subport parameters set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param subport - subport ID - @param tb_rate - subport token bucket rate (measured in bytes/second) - @param tb_size - subport token bucket size (measured in credits) - @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) - @param tc_period - enforcement period for rates (measured in milliseconds) - */ - define sw_interface_set_dpdk_hqos_subport { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 subport; - u32 tb_rate; - u32 tb_size; - u32 tc_rate[4]; - u32 tc_period; - }; - - /** \\brief DPDK interface HQoS subport parameters set reply - @param context - sender context, to match reply w/ request - @param retval - request return code - */ - define sw_interface_set_dpdk_hqos_subport_reply { - u32 context; - i32 retval; - }; -``` - -The following API can be used set the DSCP table entry. The DSCP table have -64 entries to map the packet DSCP field onto traffic class and hqos input -queue. - -``` -sw_interface_set_dpdk_hqos_tctbl rx | sw_if_index - entry tc queue -``` - -The data structures used for setting DSCP table entries are given below. - -``` - /** \\brief DPDK interface HQoS tctbl entry set request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param sw_if_index - the interface - @param entry - entry index ID - @param tc - traffic class (0 .. 3) - @param queue - traffic class queue (0 .. 3) - */ - define sw_interface_set_dpdk_hqos_tctbl { - u32 client_index; - u32 context; - u32 sw_if_index; - u32 entry; - u32 tc; - u32 queue; - }; - - /** \\brief DPDK interface HQoS tctbl entry set reply - @param context - sender context, to match reply w/ request - @param retval - request return code - */ - define sw_interface_set_dpdk_hqos_tctbl_reply { - u32 context; - i32 retval; - }; -``` diff --git a/src/vnet/devices/dpdk/thread.c b/src/vnet/devices/dpdk/thread.c deleted file mode 100644 index 475dd142..00000000 --- a/src/vnet/devices/dpdk/thread.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static clib_error_t * -dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id) -{ - int r; - r = rte_eal_remote_launch (fp, (void *) w, lcore_id); - if (r) - return clib_error_return (0, "Failed to launch thread %u", lcore_id); - return 0; -} - -static clib_error_t * -dpdk_thread_set_lcore (u32 thread, u16 lcore) -{ - return 0; -} - -static vlib_thread_callbacks_t callbacks = { - .vlib_launch_thread_cb = &dpdk_launch_thread, - .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore, -}; - -static clib_error_t * -dpdk_thread_init (vlib_main_t * vm) -{ - vlib_thread_cb_register (vm, &callbacks); - return 0; -} - -VLIB_INIT_FUNCTION (dpdk_thread_init); - -/** @endcond */ -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index 3083b614..dd23a909 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -328,17 +328,6 @@ typedef struct int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_intf_details_t ** out_vuids); -// CLI commands to be used from dpdk -clib_error_t *vhost_user_connect_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd); -clib_error_t *vhost_user_delete_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd); -clib_error_t *show_vhost_user_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd); - #endif /* diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c index 49b475cf..e37bccee 100644 --- a/src/vnet/ipsec/ipsec_api.c +++ b/src/vnet/ipsec/ipsec_api.c @@ -79,11 +79,7 @@ static void vl_api_ipsec_spd_add_del_t_handler vl_api_ipsec_spd_add_del_reply_t *rmp; int rv; -#if DPDK > 0 rv = ipsec_add_del_spd (vm, ntohl (mp->spd_id), mp->is_add); -#else - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif REPLY_MACRO (VL_API_IPSEC_SPD_ADD_DEL_REPLY); #endif diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c index e15faeb8..4a65b024 100644 --- a/src/vnet/pg/input.c +++ b/src/vnet/pg/input.c @@ -1212,10 +1212,10 @@ pg_stream_fill_helper (pg_main_t * pg, /* * Historically, the pg maintained its own free lists and - * device drivers tx paths would return pkts. With the DPDK, - * that doesn't happen. + * device drivers tx paths would return pkts. */ - if (DPDK == 0 && !(s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE)) + if (vm->buffer_main->extern_buffer_mgmt == 0 && + !(s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE)) f->buffer_init_function = pg_buffer_init; f->buffer_init_function_opaque = (s - pg->streams) | ((bi - s->buffer_indices) << 24); @@ -1238,7 +1238,7 @@ pg_stream_fill_helper (pg_main_t * pg, n_alloc = n_allocated; /* Reinitialize buffers */ - if (DPDK == 0 || CLIB_DEBUG > 0 + if (vm->buffer_main->extern_buffer_mgmt == 0 || CLIB_DEBUG > 0 || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE)) init_buffers_inline (vm, s, @@ -1246,7 +1246,8 @@ pg_stream_fill_helper (pg_main_t * pg, n_alloc, (bi - s->buffer_indices) * s->buffer_bytes /* data offset */ , s->buffer_bytes, /* set_data */ - DPDK == 1 || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE) != 0); + vm->buffer_main->extern_buffer_mgmt != 0 + || (s->flags & PG_STREAM_FLAGS_DISABLE_BUFFER_RECYCLE) != 0); if (next_buffers) pg_set_next_buffer_pointers (pg, s, buffers, next_buffers, n_alloc); diff --git a/src/vnet/pg/stream.c b/src/vnet/pg/stream.c index c46875e1..560c4b07 100644 --- a/src/vnet/pg/stream.c +++ b/src/vnet/pg/stream.c @@ -442,9 +442,8 @@ pg_stream_add (pg_main_t * pg, pg_stream_t * s_init) pg_buffer_index_t *bi; int n; -#if DPDK > 0 - s->buffer_bytes = VLIB_BUFFER_DATA_SIZE; -#endif + if (vm->buffer_main->extern_buffer_mgmt) + s->buffer_bytes = VLIB_BUFFER_DATA_SIZE; if (!s->buffer_bytes) s->buffer_bytes = s->max_packet_bytes; diff --git a/src/vnet/replication.c b/src/vnet/replication.c index 02755195..86d922b5 100644 --- a/src/vnet/replication.c +++ b/src/vnet/replication.c @@ -214,9 +214,9 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl) b0->flags |= VLIB_BUFFER_IS_RECYCLED; #if (CLIB_DEBUG > 0) -#if DPDK == 0 - vlib_buffer_set_known_state (vm, bi0, VLIB_BUFFER_KNOWN_ALLOCATED); -#endif + if (vm->buffer_main->extern_buffer_mgmt == 0) + vlib_buffer_set_known_state (vm, bi0, + VLIB_BUFFER_KNOWN_ALLOCATED); #endif /* If buffer is traced, mark frame as traced */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index c4075db6..9d3abae5 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -30,9 +30,6 @@ #endif /* included_from_layer_3 */ #include -#if DPDK > 0 -#include -#endif #include #include #include diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 4cc6aa73..3871601b 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -237,58 +237,6 @@ static void *vl_api_sw_interface_set_l2_bridge_t_print FINISH; } -#if DPDK > 0 -static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print - (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_pipe "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = format (s, "subport %u pipe %u profile %u ", - ntohl (mp->subport), ntohl (mp->pipe), ntohl (mp->profile)); - - FINISH; -} - -static void *vl_api_sw_interface_set_dpdk_hqos_subport_t_print - (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_subport "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = - format (s, - "subport %u rate %u bkt_size %u tc0 %u tc1 %u tc2 %u tc3 %u period %u", - ntohl (mp->subport), ntohl (mp->tb_rate), ntohl (mp->tb_size), - ntohl (mp->tc_rate[0]), ntohl (mp->tc_rate[1]), - ntohl (mp->tc_rate[2]), ntohl (mp->tc_rate[3]), - ntohl (mp->tc_period)); - - FINISH; -} - -static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print - (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_tctbl "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = format (s, "entry %u tc %u queue %u", - ntohl (mp->entry), ntohl (mp->tc), ntohl (mp->queue)); - - FINISH; -} -#endif - static void *vl_api_bridge_domain_add_del_t_print (vl_api_bridge_domain_add_del_t * mp, void *handle) { @@ -3036,18 +2984,6 @@ vl_msg_api_custom_dump_configure (api_main_t * am) = (void *) vl_api_##f##_t_print; foreach_custom_print_function; #undef _ - -#if DPDK > 0 - /* - * manually add DPDK hqos print handlers - */ - am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE] = - (void *) vl_api_sw_interface_set_dpdk_hqos_pipe_t_print; - am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT] = - (void *) vl_api_sw_interface_set_dpdk_hqos_subport_t_print; - am->msg_print_handlers[VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL] = - (void *) vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print; -#endif } /* diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c index b28608f0..610f40ed 100644 --- a/src/vpp/api/gmon.c +++ b/src/vpp/api/gmon.c @@ -137,7 +137,8 @@ gmon_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) *gm->vector_rate_ptr = vector_rate; now = vlib_time_now (vm); dt = now - last_runtime; - input_packets = vnet_get_aggregate_rx_packets (); + // TODO + //input_packets = vnet_get_aggregate_rx_packets (); *gm->input_rate_ptr = (f64) (input_packets - last_input_packets) / dt; last_runtime = now; last_input_packets = input_packets; diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 2d6e4f37..7f9c2038 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -41,7 +41,6 @@ * SESSION APIs: .../vnet/session/{session.api session_api.c} * MPLS APIs: see .../src/vnet/mpls/{mpls.api, mpls_api.c} * SR APIs: see .../src/vnet/sr/{sr.api, sr_api.c} - * DPDK APIs: see ... /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} * CLASSIFY APIs: see ... /src/vnet/classify/{classify.api, classify_api.c} * FLOW APIs: see ... /src/vnet/flow/{flow.api, flow_api.c} * DHCP APIs: see ... /src/vnet/dhcp/{dhcpk.api, dhcp_api.c} diff --git a/src/vpp/app/l2t.c b/src/vpp/app/l2t.c deleted file mode 100644 index e1eda155..00000000 --- a/src/vpp/app/l2t.c +++ /dev/null @@ -1,562 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -#if DPDK == 0 -#include -#else -#include -#endif - -#include -#include -#include - -l2t_main_t l2t_main; - -/* $$$$ unused? - * get_interface_ethernet_address - * paints the ethernet address for a given interface - * into the supplied destination - */ -void -get_interface_ethernet_address (l2t_main_t * lm, u8 * dst, u32 sw_if_index) -{ - ethernet_main_t *em = ethernet_get_main (lm->vlib_main); - ethernet_interface_t *ei; - vnet_hw_interface_t *hi; - - hi = vnet_get_sup_hw_interface (lm->vnet_main, sw_if_index); - ei = pool_elt_at_index (em->interfaces, hi->hw_instance); - clib_memcpy (dst, ei->address, sizeof (ei->address)); -} - -/* packet trace format function */ -u8 * -format_l2t_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - l2t_trace_t *t = va_arg (*args, l2t_trace_t *); - - if (t->is_user_to_network) - s = format (s, "L2T: %U (client) -> %U (our) session %d", - format_ip6_address, &t->client_address, - format_ip6_address, &t->our_address, t->session_index); - else - s = format (s, "L2T: %U (our) -> %U (client) session %d)", - format_ip6_address, &t->our_address, - format_ip6_address, &t->client_address, t->session_index); - return s; -} - -u8 * -format_l2t_session (u8 * s, va_list * args) -{ - l2t_session_t *session = va_arg (*args, l2t_session_t *); - l2t_main_t *lm = &l2t_main; - u32 counter_index; - vlib_counter_t v; - - s = format (s, "[%d] %U (our) %U (client) vlan-id %d rx_sw_if_index %d\n", - session - lm->sessions, - format_ip6_address, &session->our_address, - format_ip6_address, &session->client_address, - clib_net_to_host_u16 (session->vlan_id), session->sw_if_index); - - s = format (s, " local cookie %llx remote cookie %llx\n", - clib_net_to_host_u64 (session->local_cookie), - clib_net_to_host_u64 (session->remote_cookie)); - - if (session->cookie_flags & L2TP_COOKIE_ROLLOVER_LOCAL) - { - s = format (s, " local rollover cookie %llx\n", - clib_net_to_host_u64 (session->lcl_ro_cookie)); - } - - s = format (s, " local session-id %d remote session-id %d\n", - clib_net_to_host_u32 (session->local_session_id), - clib_net_to_host_u32 (session->remote_session_id)); - - s = format (s, " l2 specific sublayer %s\n", - session->l2_sublayer_present ? "preset" : "absent"); - - counter_index = - session_index_to_counter_index (session - lm->sessions, - SESSION_COUNTER_USER_TO_NETWORK); - - vlib_get_combined_counter (&lm->counter_main, counter_index, &v); - if (v.packets != 0) - s = format (s, " user-to-net: %llu pkts %llu bytes\n", - v.packets, v.bytes); - - vlib_get_combined_counter (&lm->counter_main, counter_index + 1, &v); - - if (v.packets != 0) - s = format (s, " net-to-user: %llu pkts %llu bytes\n", - v.packets, v.bytes); - return s; -} - -static clib_error_t * -show_session_summary_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2t_main_t *lm = &l2t_main; - - vlib_cli_output (vm, "%d active sessions\n", pool_elts (lm->sessions)); - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (show_session_summary_command) = { - .path = "show session", - .short_help = "show session summary", - .function = show_session_summary_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_session_detail_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2t_session_t *session; - l2t_main_t *lm = &l2t_main; - - /* *INDENT-OFF* */ - pool_foreach (session, lm->sessions, - ({ - vlib_cli_output (vm, "%U", format_l2t_session, session); - })); - /* *INDENT-ON* */ - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (show_session_detail_command) = { - .path = "show session detail", - .short_help = "show session table detail", - .function = show_session_detail_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -test_counters_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - l2t_session_t *session; - l2t_main_t *lm = &l2t_main; - u32 session_index; - u32 counter_index; - u32 nincr = 0; - - /* *INDENT-OFF* */ - pool_foreach (session, lm->sessions, - ({ - session_index = session - lm->sessions; - counter_index = - session_index_to_counter_index (session_index, - SESSION_COUNTER_USER_TO_NETWORK); - vlib_increment_combined_counter (&lm->counter_main, - counter_index, - 1/*pkt*/, 1111 /*bytes*/); - vlib_increment_combined_counter (&lm->counter_main, - counter_index+1, - 1/*pkt*/, 2222 /*bytes*/); - nincr++; - })); - /* *INDENT-ON* */ - vlib_cli_output (vm, "Incremented %d active counters\n", nincr); - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (test_counters_command) = { - .path = "test counters", - .short_help = "increment all active counters", - .function = test_counters_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -clear_counters_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - l2t_session_t *session; - l2t_main_t *lm = &l2t_main; - u32 session_index; - u32 counter_index; - u32 nincr = 0; - - /* *INDENT-OFF* */ - pool_foreach (session, lm->sessions, - ({ - session_index = session - lm->sessions; - counter_index = - session_index_to_counter_index (session_index, - SESSION_COUNTER_USER_TO_NETWORK); - vlib_zero_combined_counter (&lm->counter_main, counter_index); - vlib_zero_combined_counter (&lm->counter_main, counter_index+1); - nincr++; - })); - /* *INDENT-ON* */ - vlib_cli_output (vm, "Cleared %d active counters\n", nincr); - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (clear_counters_command) = { - .path = "clear counters", - .short_help = "clear all active counters", - .function = clear_counters_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -l2tp_session_add_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - ip6_address_t client_address, our_address; - ip6_address_t *dst_address_copy, *src_address_copy; - unformat_input_t _line_input, *line_input = &_line_input; - u32 vlan_id; - u32 sw_if_index = (u32) ~ 0; - l2t_main_t *lm = &l2t_main; - l2t_session_t *s; - uword *p; - vnet_hw_interface_t *hi; - vnet_sw_interface_t *si; - u32 next_index; - uword vlan_and_sw_if_index_key; - u32 counter_index; - u64 local_cookie = (u64) ~ 0, remote_cookie = (u64) ~ 0; - u32 local_session_id = 1, remote_session_id = 1; - int our_address_set = 0, client_address_set = 0; - int l2_sublayer_present = 0; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "client %U", - unformat_ip6_address, &client_address)) - client_address_set = 1; - else if (unformat (line_input, "our %U", - unformat_ip6_address, &our_address)) - our_address_set = 1; - else if (unformat (line_input, "vlan %d", &vlan_id)) - ; - else if (unformat (line_input, "l2-interface %U", - unformat_vnet_sw_interface, - vnet_get_main (), &sw_if_index)) - ; - else if (unformat (line_input, "interface %U", - unformat_vnet_sw_interface, - vnet_get_main (), &sw_if_index)) - ; - else if (unformat (line_input, "local-cookie %llx", &local_cookie)) - ; - else if (unformat (line_input, "remote-cookie %llx", &remote_cookie)) - ; - else if (unformat (line_input, "local-session-id %d", - &local_session_id)) - ; - else if (unformat (line_input, "remote-session-id %d", - &remote_session_id)) - ; - else if (unformat (line_input, "l2-sublayer-present")) - l2_sublayer_present = 1; - else - { - error = clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - unformat_free (line_input); - return error; - } - } - - unformat_free (line_input); - - if (sw_if_index == (u32) ~ 0) - return clib_error_return (0, "l2-interface not specified"); - if (our_address_set == 0) - return clib_error_return (0, "our address not specified"); - if (client_address_set == 0) - return clib_error_return (0, "client address not specified"); - - remote_session_id = clib_host_to_net_u32 (remote_session_id); - local_session_id = clib_host_to_net_u32 (local_session_id); - - switch (lm->lookup_type) - { - case L2T_LOOKUP_SRC_ADDRESS: - p = hash_get_mem (lm->session_by_src_address, &client_address); - if (p) - return clib_error_return - (0, "Session w/ client address %U already exists", - format_ip6_address, &client_address); - break; - - case L2T_LOOKUP_DST_ADDRESS: - p = hash_get_mem (lm->session_by_dst_address, &our_address); - if (p) - return clib_error_return - (0, "Session w/ our address %U already exists", - format_ip6_address, &our_address); - break; - - case L2T_LOOKUP_SESSION_ID: - p = hash_get (lm->session_by_session_id, local_session_id); - if (p) - return clib_error_return - (0, - "Session w/ local session id %d already exists", - clib_net_to_host_u32 (local_session_id)); - break; - - default: - ASSERT (0); - } - - pool_get (lm->sessions, s); - memset (s, 0, sizeof (*s)); - clib_memcpy (&s->our_address, &our_address, sizeof (s->our_address)); - clib_memcpy (&s->client_address, &client_address, - sizeof (s->client_address)); - s->sw_if_index = sw_if_index; - s->vlan_id = clib_host_to_net_u16 (vlan_id); - s->local_cookie = clib_host_to_net_u64 (local_cookie); - l2tp_session_set_remote_cookie (s, remote_cookie); - s->local_session_id = local_session_id; - s->remote_session_id = remote_session_id; - s->l2_sublayer_present = l2_sublayer_present; - - hi = vnet_get_sup_hw_interface (lm->vnet_main, sw_if_index); - si = vnet_get_sup_sw_interface (lm->vnet_main, sw_if_index); - - next_index = vlib_node_add_next (vm, l2t_ip6_node.index, - hi->output_node_index); - s->l2_output_next_index = next_index; - s->l2_output_sw_if_index = si->sw_if_index; - - /* Setup hash table entries */ - switch (lm->lookup_type) - { - case L2T_LOOKUP_SRC_ADDRESS: - src_address_copy = clib_mem_alloc (sizeof (*src_address_copy)); - clib_memcpy (src_address_copy, &client_address, - sizeof (*src_address_copy)); - hash_set_mem (lm->session_by_src_address, src_address_copy, - s - lm->sessions); - break; - case L2T_LOOKUP_DST_ADDRESS: - dst_address_copy = clib_mem_alloc (sizeof (*dst_address_copy)); - clib_memcpy (dst_address_copy, &our_address, - sizeof (*dst_address_copy)); - hash_set_mem (lm->session_by_dst_address, dst_address_copy, - s - lm->sessions); - break; - case L2T_LOOKUP_SESSION_ID: - hash_set (lm->session_by_session_id, local_session_id, - s - lm->sessions); - break; - - default: - ASSERT (0); - } - - vlan_and_sw_if_index_key = ((uword) (s->vlan_id) << 32) | sw_if_index; - hash_set (lm->session_by_vlan_and_rx_sw_if_index, - vlan_and_sw_if_index_key, s - lm->sessions); - - /* validate counters */ - counter_index = - session_index_to_counter_index (s - lm->sessions, - SESSION_COUNTER_USER_TO_NETWORK); - vlib_validate_counter (&lm->counter_main, counter_index); - vlib_validate_counter (&lm->counter_main, counter_index + 1); - - /* Set promiscuous mode on the l2 interface */ - ethernet_set_flags (lm->vnet_main, hi->hw_if_index, - ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); - vnet_hw_interface_rx_redirect_to_node (lm->vnet_main, hi->hw_if_index, - l2t_l2_node.index); - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (l2tp_session_add_command) = { - .path = "l2tp session add", - .short_help = - "l2tp session add client our vlan local-cookie remote-cookie local-session remote-session l2-interface ", - .function = l2tp_session_add_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -l2tp_session_del_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2t_main_t *lm = &l2t_main; - u32 session_index; - l2t_session_t *s; - hash_pair_t *hp; - void *key; - uword vlan_and_sw_if_index_key; - - if (!unformat (input, "%d", &session_index)) - return clib_error_return (0, "missing session index: '%U'", - format_unformat_error, input); - - if (pool_is_free_index (lm->sessions, session_index)) - return clib_error_return (0, "session %d not in use", session_index); - - s = pool_elt_at_index (lm->sessions, session_index); - - switch (lm->lookup_type) - { - case L2T_LOOKUP_SRC_ADDRESS: - hp = hash_get_pair_mem (lm->session_by_src_address, &s->client_address); - if (hp) - { - key = (void *) (hp->key); - hash_unset_mem (lm->session_by_src_address, &s->client_address); - clib_mem_free (key); - } - else - clib_warning ("session %d src address key %U AWOL", - s - lm->sessions, - format_ip6_address, &s->client_address); - break; - - case L2T_LOOKUP_DST_ADDRESS: - hp = hash_get_pair_mem (lm->session_by_dst_address, &s->our_address); - if (hp) - { - key = (void *) (hp->key); - hash_unset_mem (lm->session_by_dst_address, &s->our_address); - clib_mem_free (key); - } - else - clib_warning ("session %d dst address key %U AWOL", - s - lm->sessions, format_ip6_address, &s->our_address); - break; - - case L2T_LOOKUP_SESSION_ID: - hash_unset (lm->session_by_session_id, s->local_session_id); - break; - - default: - ASSERT (0); - } - - vlan_and_sw_if_index_key = ((uword) (s->vlan_id) << 32) | s->sw_if_index; - - hash_unset (lm->session_by_vlan_and_rx_sw_if_index, - vlan_and_sw_if_index_key); - - pool_put (lm->sessions, s); - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (l2tp_session_del_command) = { - .path = "l2tp session delete", - .short_help = - "l2tp session delete ", - .function = l2tp_session_del_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -l2tp_session_cookie_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2t_main_t *lm = &l2t_main; - u32 session_index; - l2t_session_t *s; - u64 lcl_ro_cookie = (u64) ~ 0, rem_ro_cookie = (u64) ~ 0; - u8 cookie_flags = 0; - - if (!unformat (input, "%d", &session_index)) - return clib_error_return (0, "missing session index: '%U'", - format_unformat_error, input); - - if (pool_is_free_index (lm->sessions, session_index)) - return clib_error_return (0, "session %d not in use", session_index); - - s = pool_elt_at_index (lm->sessions, session_index); - - if (unformat (input, "commit")) - { - if (!s->cookie_flags) - { - return clib_error_return (0, "no rollover cookie ready to commit"); - } - else - { - l2tp_session_cookie_commit (s); - return 0; - } - } - if (!unformat (input, "rollover")) - return clib_error_return (0, "missing 'commit|rollover': '%U'", - format_unformat_error, input); - if (unformat (input, "local %llx", &lcl_ro_cookie)) - { - cookie_flags |= L2TP_COOKIE_ROLLOVER_LOCAL; - l2tp_session_set_local_rollover_cookie (s, lcl_ro_cookie); - } - if (unformat (input, "remote %llx", &rem_ro_cookie)) - { - cookie_flags |= L2TP_COOKIE_ROLLOVER_REMOTE; - l2tp_session_set_remote_cookie (s, rem_ro_cookie); - } - if (!cookie_flags) - return clib_error_return (0, "no rollover cookie specified"); - - return 0; -} - -/* *INDENT-OFF* */ -static VLIB_CLI_COMMAND (l2tp_session_cookie_command) = { - .path = "l2tp session cookie", - .short_help = - "l2tp session cookie commit|rollover [local ] [remote ]", - .function = l2tp_session_cookie_command_fn, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vpp/app/l2t_l2.c b/src/vpp/app/l2t_l2.c deleted file mode 100644 index 07d30d9a..00000000 --- a/src/vpp/app/l2t_l2.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -#if DPDK == 0 -#include -#include -#include -#else -#include -#endif - -#include -#include -#include - -l2t_main_t l2t_main; - -/* Statistics (not really errors) */ -#define foreach_l2t_l2_error \ -_(NETWORK_TO_USER, "L2 network to user (ip6) pkts") - -static char *l2t_l2_error_strings[] = { -#define _(sym,string) string, - foreach_l2t_l2_error -#undef _ -}; - -typedef enum -{ -#define _(sym,str) L2T_L2_ERROR_##sym, - foreach_l2t_l2_error -#undef _ - L2T_L2_N_ERROR, -} l2t_l2_error_t; - -/* - * Packets go to ethernet-input when they don't match a mapping - */ -typedef enum -{ - L2T_L2_NEXT_DROP, - L2T_L2_NEXT_ETHERNET_INPUT, - L2T_L2_NEXT_IP6_LOOKUP, - L2T_L2_N_NEXT, -} l2t_l2_next_t; - -vlib_node_registration_t l2t_l2_node; - -#define NSTAGES 3 - -static inline void -stage0 (vlib_main_t * vm, vlib_node_runtime_t * node, u32 buffer_index) -{ - vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index); - vlib_prefetch_buffer_header (b, STORE); - CLIB_PREFETCH (b->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); -} - -static inline void -stage1 (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi) -{ - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - l2t_main_t *lm = &l2t_main; - ethernet_header_t *eh; - ethernet_vlan_header_t *vh; - u32 session_index; - uword *p; - uword vlan_and_sw_if_index_key; - - /* just in case, needed to test with the tun/tap device */ - vlib_buffer_reset (b); - - eh = vlib_buffer_get_current (b); - - /* Not a VLAN pkt? send to ethernet-input... */ - if (PREDICT_FALSE (eh->type != clib_host_to_net_u16 (0x8100))) - { - vnet_buffer (b)->l2t.next_index = L2T_L2_NEXT_ETHERNET_INPUT; - return; - } - vh = (ethernet_vlan_header_t *) (eh + 1); - - /* look up session */ - vlan_and_sw_if_index_key = ((uword) (vh->priority_cfi_and_id) << 32) - | vnet_buffer (b)->sw_if_index[VLIB_RX]; - - p = hash_get (lm->session_by_vlan_and_rx_sw_if_index, - vlan_and_sw_if_index_key); - - if (PREDICT_FALSE (p == 0)) - { - /* $$$ drop here if not for our MAC? */ - vnet_buffer (b)->l2t.next_index = L2T_L2_NEXT_ETHERNET_INPUT; - return; - } - else - { - session_index = p[0]; - } - - /* Remember mapping index, prefetch the mini counter */ - vnet_buffer (b)->l2t.next_index = L2T_L2_NEXT_IP6_LOOKUP; - vnet_buffer (b)->l2t.session_index = session_index; - - /* Each mapping has 2 x (pkt, byte) counters, hence the shift */ - CLIB_PREFETCH (lm->counter_main.mini + (p[0] << 1), CLIB_CACHE_LINE_BYTES, - STORE); -} - -static inline u32 -last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi) -{ - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - l2t_main_t *lm = &l2t_main; - ethernet_header_t *eh = vlib_buffer_get_current (b); - vlib_node_t *n = vlib_get_node (vm, l2t_l2_node.index); - u32 node_counter_base_index = n->error_heap_index; - vlib_error_main_t *em = &vm->error_main; - l2tpv3_header_t *l2t; /* l2 header */ - ethernet_vlan_header_t *vh; /* 802.1q vlan header */ - u32 counter_index; - l2t_session_t *s; - ip6_header_t *ip6; - u16 payload_ethertype; - u8 dst_mac_address[6]; - u8 src_mac_address[6]; - u16 payload_length; - i32 backup; - - /* Other-than-output pkt? We're done... */ - if (vnet_buffer (b)->l2t.next_index != L2T_L2_NEXT_IP6_LOOKUP) - return vnet_buffer (b)->l2t.next_index; - - vh = (ethernet_vlan_header_t *) (eh + 1); - - em->counters[node_counter_base_index + L2T_L2_ERROR_NETWORK_TO_USER] += 1; - - counter_index = - session_index_to_counter_index (vnet_buffer (b)->l2t.session_index, - SESSION_COUNTER_NETWORK_TO_USER); - - /* per-mapping byte stats include the ethernet header */ - vlib_increment_combined_counter (&lm->counter_main, counter_index, - 1 /* packet_increment */ , - vlib_buffer_length_in_chain (vm, b) + - sizeof (ethernet_header_t)); - - s = pool_elt_at_index (lm->sessions, vnet_buffer (b)->l2t.session_index); - - /* Save src/dst MAC addresses */ -#define _(i) dst_mac_address[i] = eh->dst_address[i]; - _(0) _(1) _(2) _(3) _(4) _(5); -#undef _ -#define _(i) src_mac_address[i] = eh->src_address[i]; - _(0) _(1) _(2) _(3) _(4) _(5); -#undef _ - - payload_ethertype = vh->type; - - /* Splice out the 802.1q vlan tag */ - vlib_buffer_advance (b, 4); - eh = vlib_buffer_get_current (b); - - /* restore src/dst MAC addresses */ -#define _(i) eh->dst_address[i] = dst_mac_address[i]; - _(0) _(1) _(2) _(3) _(4) _(5); -#undef _ -#define _(i) eh->src_address[i] = src_mac_address[i]; - _(0) _(1) _(2) _(3) _(4) _(5); -#undef _ - eh->type = payload_ethertype; - - /* Paint on an l2tpv3 hdr */ - backup = sizeof (*l2t); -#if 0 - /* back up 4 bytes less if no l2 sublayer */ - backup -= s->l2_sublayer_present ? 0 : 4; -#endif - - vlib_buffer_advance (b, -backup); - l2t = vlib_buffer_get_current (b); - - l2t->session_id = s->remote_session_id; - l2t->cookie = s->remote_cookie; - -#if 0 - if (s->l2_sublayer_present) - l2t->l2_specific_sublayer = 0; -#endif - - /* Paint on an ip6 header */ - vlib_buffer_advance (b, -(sizeof (*ip6))); - ip6 = vlib_buffer_get_current (b); - - ip6->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (0x6 << 28); - - /* calculate ip6 payload length */ - payload_length = vlib_buffer_length_in_chain (vm, b); - payload_length -= sizeof (*ip6); - - ip6->payload_length = clib_host_to_net_u16 (payload_length); - ip6->protocol = 0x73; /* l2tpv3 */ - ip6->hop_limit = 0xff; - ip6->src_address.as_u64[0] = s->our_address.as_u64[0]; - ip6->src_address.as_u64[1] = s->our_address.as_u64[1]; - ip6->dst_address.as_u64[0] = s->client_address.as_u64[0]; - ip6->dst_address.as_u64[1] = s->client_address.as_u64[1]; - - return L2T_L2_NEXT_IP6_LOOKUP; -} - -#include - -static uword -l2t_l2_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return dispatch_pipeline (vm, node, frame); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (l2t_l2_node) = { - .function = l2t_l2_node_fn, - .name = "l2t-l2-input", - .vector_size = sizeof (u32), - .format_trace = format_l2t_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(l2t_l2_error_strings), - .error_strings = l2t_l2_error_strings, - - .n_next_nodes = L2T_L2_N_NEXT, - - /* edit / add dispositions here */ - .next_nodes = { - [L2T_L2_NEXT_IP6_LOOKUP] = "ip6-lookup", - [L2T_L2_NEXT_ETHERNET_INPUT] = "ethernet-input", - [L2T_L2_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (l2t_l2_node, l2t_l2_node_fn); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ -- cgit 1.2.3-korg From baf2e90a91fa862c15572491c730d01cd6d19f5d Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Sat, 25 Feb 2017 04:20:00 -0800 Subject: Remove the unused VRF ID parameter from the IP neighbour Add/Del API Change-Id: Icf0d72f6af1f98c86f78e586c354515ac69804aa Signed-off-by: Neale Ranns --- src/vat/api_format.c | 4 ---- src/vnet/ip/ip.api | 2 -- src/vpp/api/custom_dump.c | 2 -- test/test_ip4_vrf_multi_instance.py | 2 +- test/test_ip6_vrf_multi_instance.py | 2 +- test/vpp_interface.py | 8 ++++---- test/vpp_papi_provider.py | 5 +---- 7 files changed, 7 insertions(+), 18 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 14e78817..1321bade 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -7071,7 +7071,6 @@ api_ip_neighbor_add_del (vat_main_t * vam) vl_api_ip_neighbor_add_del_t *mp; u32 sw_if_index; u8 sw_if_index_set = 0; - u32 vrf_id = 0; u8 is_add = 1; u8 is_static = 0; u8 mac_address[6]; @@ -7100,8 +7099,6 @@ api_ip_neighbor_add_del (vat_main_t * vam) sw_if_index_set = 1; else if (unformat (i, "is_static")) is_static = 1; - else if (unformat (i, "vrf %d", &vrf_id)) - ; else if (unformat (i, "dst %U", unformat_ip4_address, &v4address)) v4_address_set = 1; else if (unformat (i, "dst %U", unformat_ip6_address, &v6address)) @@ -7134,7 +7131,6 @@ api_ip_neighbor_add_del (vat_main_t * vam) mp->sw_if_index = ntohl (sw_if_index); mp->is_add = is_add; - mp->vrf_id = ntohl (vrf_id); mp->is_static = is_static; if (mac_set) clib_memcpy (mp->mac_address, mac_address, 6); diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api index d982b04c..326c825b 100644 --- a/src/vnet/ip/ip.api +++ b/src/vnet/ip/ip.api @@ -125,7 +125,6 @@ define ip_neighbor_details { /** \brief IP neighbor add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param vrf_id - vrf_id, only for IP4 @param sw_if_index - interface used to reach neighbor @param is_add - 1 to add neighbor, 0 to delete @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4 @@ -137,7 +136,6 @@ define ip_neighbor_add_del { u32 client_index; u32 context; - u32 vrf_id; /* only makes sense for ip4 */ u32 sw_if_index; /* 1 = add, 0 = delete */ u8 is_add; diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 3871601b..c61e31bb 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -581,8 +581,6 @@ static void *vl_api_ip_neighbor_add_del_t_print if (mp->is_static) s = format (s, "is_static "); - s = format (s, "vrf_id %d ", ntohl (mp->vrf_id)); - if (memcmp (mp->mac_address, null_mac, 6)) s = format (s, "mac %U ", format_ethernet_address, mp->mac_address); diff --git a/test/test_ip4_vrf_multi_instance.py b/test/test_ip4_vrf_multi_instance.py index b84086ae..ddf8f593 100644 --- a/test/test_ip4_vrf_multi_instance.py +++ b/test/test_ip4_vrf_multi_instance.py @@ -190,7 +190,7 @@ class TestIp4VrfMultiInst(VppTestCase): if pg_if in self.pg_not_in_vrf: self.pg_not_in_vrf.remove(pg_if) pg_if.config_ip4() - pg_if.configure_ipv4_neighbors(vrf_id) + pg_if.configure_ipv4_neighbors() self.logger.debug(self.vapi.ppcli("show ip fib")) self.logger.debug(self.vapi.ppcli("show ip arp")) diff --git a/test/test_ip6_vrf_multi_instance.py b/test/test_ip6_vrf_multi_instance.py index b3b080eb..7bd4d89c 100644 --- a/test/test_ip6_vrf_multi_instance.py +++ b/test/test_ip6_vrf_multi_instance.py @@ -206,7 +206,7 @@ class TestIP6VrfMultiInst(VppTestCase): self.pg_not_in_vrf.remove(pg_if) pg_if.config_ip6() pg_if.disable_ipv6_ra() - pg_if.configure_ipv6_neighbors(vrf_id) + pg_if.configure_ipv6_neighbors() self.logger.debug(self.vapi.ppcli("show ip6 fib")) self.logger.debug(self.vapi.ppcli("show ip6 neighbors")) diff --git a/test/vpp_interface.py b/test/vpp_interface.py index 125d8f04..4588943d 100644 --- a/test/vpp_interface.py +++ b/test/vpp_interface.py @@ -196,7 +196,7 @@ class VppInterface(object): self.has_ip4_config = False self.has_ip4_config = False - def configure_ipv4_neighbors(self, vrf_id=0): + def configure_ipv4_neighbors(self): """For every remote host assign neighbor's MAC to IPv4 addresses. :param vrf_id: The FIB table / VRF ID. (Default value = 0) @@ -205,7 +205,7 @@ class VppInterface(object): macn = host.mac.replace(":", "").decode('hex') ipn = host.ip4n self.test.vapi.ip_neighbor_add_del( - self.sw_if_index, macn, ipn, vrf_id) + self.sw_if_index, macn, ipn) def config_ip6(self): """Configure IPv6 address on the VPP interface.""" @@ -227,7 +227,7 @@ class VppInterface(object): self.has_ip6_config = False self.has_ip6_config = False - def configure_ipv6_neighbors(self, vrf_id=0): + def configure_ipv6_neighbors(self): """For every remote host assign neighbor's MAC to IPv6 addresses. :param vrf_id: The FIB table / VRF ID. (Default value = 0) @@ -236,7 +236,7 @@ class VppInterface(object): macn = host.mac.replace(":", "").decode('hex') ipn = host.ip6n self.test.vapi.ip_neighbor_add_del( - self.sw_if_index, macn, ipn, vrf_id, is_ipv6=1) + self.sw_if_index, macn, ipn, is_ipv6=1) def unconfig(self): """Unconfigure IPv6 and IPv4 address on the VPP interface.""" diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index bebbe76d..ea574b36 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -629,7 +629,6 @@ class VppPapiProvider(object): sw_if_index, mac_address, dst_address, - vrf_id=0, is_add=1, is_ipv6=0, is_static=0, @@ -639,7 +638,6 @@ class VppPapiProvider(object): :param sw_if_index: :param mac_address: :param dst_address: - :param vrf_id: (Default value = 0) :param is_add: (Default value = 1) :param is_ipv6: (Default value = 0) :param is_static: (Default value = 0) @@ -647,8 +645,7 @@ class VppPapiProvider(object): return self.api( self.papi.ip_neighbor_add_del, - {'vrf_id': vrf_id, - 'sw_if_index': sw_if_index, + {'sw_if_index': sw_if_index, 'is_add': is_add, 'is_ipv6': is_ipv6, 'is_static': is_static, -- cgit 1.2.3-korg From a1a093d4e46e38503332a97ad216f80053a15f2b Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Thu, 2 Mar 2017 13:13:23 -0500 Subject: Clean up binary api message handler registration issues Removed a fair number of "BUG" message handlers, due to conflicts with actual message handlers in api_format.c. Vpp itself had no business receiving certain messages, up to the point where we started building in relevant code from vpp_api_test. Eliminated all but one duplicate registration complaint. That one needs attention from the vxlan team since the duplicated handlers have diverged. Change-Id: Iafce5429d2f906270643b4ea5f0130e20beb4d1d Signed-off-by: Dave Barach --- src/vat/api_format.c | 43 ++++++- src/vlib/unix/input.c | 31 ++++- src/vlibapi/api.h | 15 +++ src/vlibapi/api_shared.c | 4 + src/vnet/classify/classify_api.c | 8 -- src/vnet/devices/virtio/vhost_user_api.c | 10 +- src/vnet/dhcp/dhcp_api.c | 8 -- src/vnet/interface_api.c | 7 - src/vnet/ip/ip_api.c | 83 ------------ src/vnet/l2/l2_api.c | 22 ---- src/vnet/mpls/mpls_api.c | 28 +--- src/vpp/api/api.c | 211 ------------------------------- src/vpp/api/api_main.c | 1 - src/vpp/stats/stats.c | 7 - 14 files changed, 81 insertions(+), 397 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 1321bade..52436917 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -944,6 +944,7 @@ static void vl_api_sw_interface_details_t_handler_json } } +#if VPP_API_TEST_BUILTIN == 0 static void vl_api_sw_interface_set_flags_t_handler (vl_api_sw_interface_set_flags_t * mp) { @@ -954,6 +955,7 @@ static void vl_api_sw_interface_set_flags_t_handler mp->admin_up_down ? "admin-up" : "admin-down", mp->link_up_down ? "link-up" : "link-down"); } +#endif static void vl_api_sw_interface_set_flags_t_handler_json (vl_api_sw_interface_set_flags_t * mp) @@ -4009,7 +4011,6 @@ foreach_standard_reply_retval_handler; #define foreach_vpe_api_reply_msg \ _(CREATE_LOOPBACK_REPLY, create_loopback_reply) \ _(SW_INTERFACE_DETAILS, sw_interface_details) \ -_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ _(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply) \ _(CONTROL_PING_REPLY, control_ping_reply) \ _(CLI_REPLY, cli_reply) \ @@ -4126,11 +4127,6 @@ _(IKEV2_INITIATE_REKEY_CHILD_SA_REPLY, ikev2_initiate_rekey_child_sa_reply) \ _(DELETE_LOOPBACK_REPLY, delete_loopback_reply) \ _(BD_IP_MAC_ADD_DEL_REPLY, bd_ip_mac_add_del_reply) \ _(DHCP_COMPL_EVENT, dhcp_compl_event) \ -_(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ -_(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ -_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ -_(VNET_IP4_NBR_COUNTERS, vnet_ip4_nbr_counters) \ -_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) \ _(MAP_ADD_DOMAIN_REPLY, map_add_domain_reply) \ _(MAP_DEL_DOMAIN_REPLY, map_del_domain_reply) \ _(MAP_ADD_DEL_RULE_REPLY, map_add_del_rule_reply) \ @@ -4232,6 +4228,14 @@ _(SW_INTERFACE_SET_MTU_REPLY, sw_interface_set_mtu_reply) \ _(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) +#define foreach_standalone_reply_msg \ +_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ +_(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ +_(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ +_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ +_(VNET_IP4_NBR_COUNTERS, vnet_ip4_nbr_counters) \ +_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) + typedef struct { u8 *name; @@ -15425,7 +15429,15 @@ api_af_packet_create (vat_main_t * vam) vec_free (host_if_name); S (mp); - W2 (ret, fprintf (vam->ofp, " new sw_if_index = %d ", vam->sw_if_index)); + + /* *INDENT-OFF* */ + W2 (ret, + ({ + if (ret == 0) + fprintf (vam->ofp ? vam->ofp : stderr, + " new sw_if_index = %d\n", vam->sw_if_index); + })); + /* *INDENT-ON* */ return ret; } @@ -18417,6 +18429,9 @@ _(unset, "usage: unset ") } \ } foreach_vpe_api_reply_msg; +#if VPP_API_TEST_BUILTIN == 0 +foreach_standalone_reply_msg; +#endif #undef _ void @@ -18430,6 +18445,9 @@ vat_api_hookup (vat_main_t * vam) vl_api_##n##_t_print, \ sizeof(vl_api_##n##_t), 1); foreach_vpe_api_reply_msg; +#if VPP_API_TEST_BUILTIN == 0 + foreach_standalone_reply_msg; +#endif #undef _ #if (VPP_API_TEST_BUILTIN==0) @@ -18463,6 +18481,17 @@ vat_api_hookup (vat_main_t * vam) #undef _ } +#if VPP_API_TEST_BUILTIN +static clib_error_t * +vat_api_hookup_shim (vlib_main_t * vm) +{ + vat_api_hookup (&vat_main); + return 0; +} + +VLIB_API_INIT_FUNCTION (vat_api_hookup_shim); +#endif + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index 07096ed2..7b4183a4 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -66,6 +66,7 @@ linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) unix_main_t *um = &unix_main; linux_epoll_main_t *em = &linux_epoll_main; struct epoll_event e; + int op; memset (&e, 0, sizeof (e)); @@ -76,13 +77,29 @@ linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) e.events |= EPOLLET; e.data.u32 = f - um->file_pool; - if (epoll_ctl (em->epoll_fd, - (update_type == UNIX_FILE_UPDATE_ADD - ? EPOLL_CTL_ADD - : (update_type == UNIX_FILE_UPDATE_MODIFY - ? EPOLL_CTL_MOD - : EPOLL_CTL_DEL)), f->file_descriptor, &e) < 0) - clib_warning ("epoll_ctl"); + op = -1; + + switch (update_type) + { + case UNIX_FILE_UPDATE_ADD: + op = EPOLL_CTL_ADD; + break; + + case UNIX_FILE_UPDATE_MODIFY: + op = EPOLL_CTL_MOD; + break; + + case UNIX_FILE_UPDATE_DELETE: + op = EPOLL_CTL_DEL; + break; + + default: + clib_warning ("unknown update_type %d", update_type); + return; + } + + if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0) + clib_unix_warning ("epoll_ctl"); } static uword diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h index fcb101d7..b40ece15 100644 --- a/src/vlibapi/api.h +++ b/src/vlibapi/api.h @@ -271,6 +271,21 @@ vlib_node_t **vlib_node_unserialize (u8 * vector); #define VLIB_API_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,api_init) +/* Call given init function: used for init function dependencies. */ +#define vlib_call_api_init_function(vm, x) \ + ({ \ + extern vlib_init_function_t * _VLIB_INIT_FUNCTION_SYMBOL (x,api_init); \ + vlib_init_function_t * _f = _VLIB_INIT_FUNCTION_SYMBOL (x,api_init); \ + clib_error_t * _error = 0; \ + if (! hash_get (vm->init_functions_called, _f)) \ + { \ + hash_set1 (vm->init_functions_called, _f); \ + _error = _f (vm); \ + } \ + _error; \ + }) + + #endif /* included_api_h */ /* diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c index 1a2740e2..79921afe 100644 --- a/src/vlibapi/api_shared.c +++ b/src/vlibapi/api_shared.c @@ -667,6 +667,10 @@ vl_msg_api_config (vl_msg_api_msg_config_t * c) foreach_msg_api_vector; #undef _ + if (am->msg_names[c->id]) + clib_warning ("BUG: multiple registrations of 'vl_api_%s_t_handler'", + c->name); + am->msg_names[c->id] = c->name; am->msg_handlers[c->id] = c->handler; am->msg_cleanup_handlers[c->id] = c->cleanup; diff --git a/src/vnet/classify/classify_api.c b/src/vnet/classify/classify_api.c index 77a8b434..24c7a2b9 100644 --- a/src/vnet/classify/classify_api.c +++ b/src/vnet/classify/classify_api.c @@ -53,7 +53,6 @@ _(CLASSIFY_TABLE_IDS,classify_table_ids) \ _(CLASSIFY_TABLE_BY_INTERFACE, classify_table_by_interface) \ _(CLASSIFY_TABLE_INFO,classify_table_info) \ _(CLASSIFY_SESSION_DUMP,classify_session_dump) \ -_(CLASSIFY_SESSION_DETAILS,classify_session_details) \ _(POLICER_CLASSIFY_SET_INTERFACE, policer_classify_set_interface) \ _(POLICER_CLASSIFY_DUMP, policer_classify_dump) \ _(FLOW_CLASSIFY_SET_INTERFACE, flow_classify_set_interface) \ @@ -356,13 +355,6 @@ vl_api_classify_table_info_t_handler (vl_api_classify_table_info_t * mp) vl_msg_api_send_shmem (q, (u8 *) & rmp); } -static void -vl_api_classify_session_details_t_handler (vl_api_classify_session_details_t * - mp) -{ - clib_warning ("BUG"); -} - static void send_classify_session_details (unix_shared_memory_queue_t * q, u32 table_id, diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c index dd517c26..8dbd032b 100644 --- a/src/vnet/devices/virtio/vhost_user_api.c +++ b/src/vnet/devices/virtio/vhost_user_api.c @@ -46,8 +46,7 @@ _(CREATE_VHOST_USER_IF, create_vhost_user_if) \ _(MODIFY_VHOST_USER_IF, modify_vhost_user_if) \ _(DELETE_VHOST_USER_IF, delete_vhost_user_if) \ -_(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) \ -_(SW_INTERFACE_VHOST_USER_DETAILS, sw_interface_vhost_user_details) +_(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) /* * WARNING: replicated pending api refactor completion @@ -148,13 +147,6 @@ vl_api_delete_vhost_user_if_t_handler (vl_api_delete_vhost_user_if_t * mp) } } -static void - vl_api_sw_interface_vhost_user_details_t_handler - (vl_api_sw_interface_vhost_user_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_sw_interface_vhost_user_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, diff --git a/src/vnet/dhcp/dhcp_api.c b/src/vnet/dhcp/dhcp_api.c index bdf02cae..ce34f6a4 100644 --- a/src/vnet/dhcp/dhcp_api.c +++ b/src/vnet/dhcp/dhcp_api.c @@ -46,7 +46,6 @@ #define foreach_vpe_api_msg \ _(DHCP_PROXY_CONFIG,dhcp_proxy_config) \ _(DHCP_PROXY_DUMP,dhcp_proxy_dump) \ -_(DHCP_PROXY_DETAILS,dhcp_proxy_details) \ _(DHCP_PROXY_SET_VSS,dhcp_proxy_set_vss) \ _(DHCP_CLIENT_CONFIG, dhcp_client_config) @@ -158,13 +157,6 @@ dhcp_send_details (fib_protocol_t proto, vl_msg_api_send_shmem (q, (u8 *) & mp); } - -static void -vl_api_dhcp_proxy_details_t_handler (vl_api_dhcp_proxy_details_t * mp) -{ - clib_warning ("BUG"); -} - void dhcp_compl_event_callback (u32 client_index, u32 pid, u8 * hostname, u8 is_ipv6, u8 * host_address, u8 * router_address, diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 63f7cad4..60cd6d40 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -50,7 +50,6 @@ _(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ _(SW_INTERFACE_SET_MTU, sw_interface_set_mtu) \ _(WANT_INTERFACE_EVENTS, want_interface_events) \ _(SW_INTERFACE_DUMP, sw_interface_dump) \ -_(SW_INTERFACE_DETAILS, sw_interface_details) \ _(SW_INTERFACE_ADD_DEL_ADDRESS, sw_interface_add_del_address) \ _(SW_INTERFACE_SET_TABLE, sw_interface_set_table) \ _(SW_INTERFACE_GET_TABLE, sw_interface_get_table) \ @@ -684,12 +683,6 @@ out: REPLY_MACRO (VL_API_SW_INTERFACE_TAG_ADD_DEL_REPLY); } -static void -vl_api_sw_interface_details_t_handler (vl_api_sw_interface_details_t * mp) -{ - clib_warning ("BUG"); -} - /* * vpe_api_hookup * Add vpe's API message handlers to the table. diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index 49d941c2..ab164a5f 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -59,17 +59,12 @@ #define foreach_ip_api_msg \ _(IP_FIB_DUMP, ip_fib_dump) \ -_(IP_FIB_DETAILS, ip_fib_details) \ _(IP6_FIB_DUMP, ip6_fib_dump) \ -_(IP6_FIB_DETAILS, ip6_fib_details) \ _(IP_MFIB_DUMP, ip_mfib_dump) \ -_(IP_MFIB_DETAILS, ip_mfib_details) \ _(IP6_MFIB_DUMP, ip6_mfib_dump) \ -_(IP6_MFIB_DETAILS, ip6_mfib_details) \ _(IP_NEIGHBOR_DUMP, ip_neighbor_dump) \ _(IP_MROUTE_ADD_DEL, ip_mroute_add_del) \ _(MFIB_SIGNAL_DUMP, mfib_signal_dump) \ -_(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(IP_ADDRESS_DUMP, ip_address_dump) \ _(IP_DUMP, ip_dump) \ _(IP_NEIGHBOR_ADD_DEL, ip_neighbor_add_del) \ @@ -105,12 +100,6 @@ send_ip_neighbor_details (u8 is_ipv6, vl_msg_api_send_shmem (q, (u8 *) & mp); } -static void -vl_api_ip_neighbor_details_t_handler (vl_api_ip_neighbor_details_t * mp) -{ - clib_warning ("BUG"); -} - static void vl_api_ip_neighbor_dump_t_handler (vl_api_ip_neighbor_dump_t * mp) { @@ -185,24 +174,6 @@ copy_fib_next_hop (fib_route_path_encode_t * api_rpath, void *fp_arg) sizeof (api_rpath->rpath.frp_addr.ip6)); } -static void -vl_api_ip_fib_details_t_handler (vl_api_ip_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip_fib_details_t_endian (vl_api_ip_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip_fib_details_t_print (vl_api_ip_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_ip_fib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, @@ -316,24 +287,6 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) vec_free (lfeis); } -static void -vl_api_ip6_fib_details_t_handler (vl_api_ip6_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip6_fib_details_t_endian (vl_api_ip6_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip6_fib_details_t_print (vl_api_ip6_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_ip6_fib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, @@ -469,24 +422,6 @@ vl_api_ip6_fib_dump_t_handler (vl_api_ip6_fib_dump_t * mp) /* *INDENT-ON* */ } -static void -vl_api_ip_mfib_details_t_handler (vl_api_ip_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip_mfib_details_t_endian (vl_api_ip_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip_mfib_details_t_print (vl_api_ip_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_ip_mfib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, @@ -591,24 +526,6 @@ vl_api_ip_mfib_dump_t_handler (vl_api_ip_mfib_dump_t * mp) vec_free (api_rpaths); } -static void -vl_api_ip6_mfib_details_t_handler (vl_api_ip6_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip6_mfib_details_t_endian (vl_api_ip6_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_ip6_mfib_details_t_print (vl_api_ip6_mfib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_ip6_mfib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index a3cc49bf..a985852c 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -48,13 +48,10 @@ _(L2_XCONNECT_DUMP, l2_xconnect_dump) \ _(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \ _(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \ -_(L2_FIB_TABLE_ENTRY, l2_fib_table_entry) \ _(L2FIB_ADD_DEL, l2fib_add_del) \ _(L2_FLAGS, l2_flags) \ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ -_(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ -_(BRIDGE_DOMAIN_SW_IF_DETAILS, bridge_domain_sw_if_details) \ _(BRIDGE_FLAGS, bridge_flags) \ _(L2_INTERFACE_VLAN_TAG_REWRITE, l2_interface_vlan_tag_rewrite) \ _(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) @@ -140,12 +137,6 @@ send_l2fib_table_entry (vpe_api_main_t * am, vl_msg_api_send_shmem (q, (u8 *) & mp); } -static void -vl_api_l2_fib_table_entry_t_handler (vl_api_l2_fib_table_entry_t * mp) -{ - clib_warning ("BUG"); -} - static void vl_api_l2_fib_table_dump_t_handler (vl_api_l2_fib_table_dump_t * mp) { @@ -329,19 +320,6 @@ vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) REPLY_MACRO (VL_API_BRIDGE_DOMAIN_ADD_DEL_REPLY); } -static void -vl_api_bridge_domain_details_t_handler (vl_api_bridge_domain_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void - vl_api_bridge_domain_sw_if_details_t_handler - (vl_api_bridge_domain_sw_if_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_bridge_domain_details (unix_shared_memory_queue_t * q, l2_bridge_domain_t * bd_config, diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index ebbeba69..a36a5046 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -50,9 +50,7 @@ _(MPLS_IP_BIND_UNBIND, mpls_ip_bind_unbind) \ _(MPLS_ROUTE_ADD_DEL, mpls_route_add_del) \ _(MPLS_TUNNEL_ADD_DEL, mpls_tunnel_add_del) \ _(MPLS_TUNNEL_DUMP, mpls_tunnel_dump) \ -_(MPLS_TUNNEL_DETAILS, mpls_tunnel_details) \ -_(MPLS_FIB_DUMP, mpls_fib_dump) \ -_(MPLS_FIB_DETAILS, mpls_fib_details) +_(MPLS_FIB_DUMP, mpls_fib_dump) extern void stats_dslock_with_hint (int hint, int tag); extern void stats_dsunlock (void); @@ -280,12 +278,6 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) /* *INDENT-ON* */ } -static void -vl_api_mpls_tunnel_details_t_handler (vl_api_mpls_tunnel_details_t * mp) -{ - clib_warning ("BUG"); -} - typedef struct mpls_tunnel_send_walk_ctx_t_ { unix_shared_memory_queue_t *q; @@ -340,24 +332,6 @@ vl_api_mpls_tunnel_dump_t_handler (vl_api_mpls_tunnel_dump_t * mp) mpls_tunnel_walk (send_mpls_tunnel_entry, &ctx); } -static void -vl_api_mpls_fib_details_t_handler (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_mpls_fib_details_t_endian (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - -static void -vl_api_mpls_fib_details_t_print (vl_api_mpls_fib_details_t * mp) -{ - clib_warning ("BUG"); -} - static void send_mpls_fib_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 60fd0199..a8f471e8 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -128,12 +128,8 @@ _(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table) \ _(CLASSIFY_SET_INTERFACE_L2_TABLES, classify_set_interface_l2_tables) \ _(GET_NODE_INDEX, get_node_index) \ _(ADD_NODE_NEXT, add_node_next) \ -_(VXLAN_ADD_DEL_TUNNEL, vxlan_add_del_tunnel) \ -_(VXLAN_TUNNEL_DUMP, vxlan_tunnel_dump) \ _(L2_INTERFACE_EFP_FILTER, l2_interface_efp_filter) \ _(SHOW_VERSION, show_version) \ -_(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \ -_(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \ _(INTERFACE_NAME_RENUMBER, interface_name_renumber) \ _(WANT_IP4_ARP_EVENTS, want_ip4_arp_events) \ _(WANT_IP6_ND_EVENTS, want_ip6_nd_events) \ @@ -1436,62 +1432,6 @@ out: /* *INDENT-ON* */ } -static void vl_api_vxlan_add_del_tunnel_t_handler - (vl_api_vxlan_add_del_tunnel_t * mp) -{ - vl_api_vxlan_add_del_tunnel_reply_t *rmp; - int rv = 0; - vnet_vxlan_add_del_tunnel_args_t _a, *a = &_a; - u32 encap_fib_index; - uword *p; - ip4_main_t *im = &ip4_main; - vnet_main_t *vnm = vnet_get_main (); - u32 sw_if_index = ~0; - - p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id)); - if (!p) - { - rv = VNET_API_ERROR_NO_SUCH_FIB; - goto out; - } - encap_fib_index = p[0]; - memset (a, 0, sizeof (*a)); - - a->is_add = mp->is_add; - a->is_ip6 = mp->is_ipv6; - - /* ip addresses sent in network byte order */ - ip46_from_addr_buf (mp->is_ipv6, mp->dst_address, &a->dst); - ip46_from_addr_buf (mp->is_ipv6, mp->src_address, &a->src); - - /* Check src & dst are different */ - if (ip46_address_cmp (&a->dst, &a->src) == 0) - { - rv = VNET_API_ERROR_SAME_SRC_DST; - goto out; - } - a->mcast_sw_if_index = ntohl (mp->mcast_sw_if_index); - if (ip46_address_is_multicast (&a->dst) && - pool_is_free_index (vnm->interface_main.sw_interfaces, - a->mcast_sw_if_index)) - { - rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; - goto out; - } - a->encap_fib_index = encap_fib_index; - a->decap_next_index = ntohl (mp->decap_next_index); - a->vni = ntohl (mp->vni); - rv = vnet_vxlan_add_del_tunnel (a, &sw_if_index); - -out: - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_VXLAN_ADD_DEL_TUNNEL_REPLY, - ({ - rmp->sw_if_index = ntohl (sw_if_index); - })); - /* *INDENT-ON* */ -} - static void send_vxlan_tunnel_details (vxlan_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context) { @@ -1525,43 +1465,6 @@ static void send_vxlan_tunnel_details vl_msg_api_send_shmem (q, (u8 *) & rmp); } -static void vl_api_vxlan_tunnel_dump_t_handler - (vl_api_vxlan_tunnel_dump_t * mp) -{ - unix_shared_memory_queue_t *q; - vxlan_main_t *vxm = &vxlan_main; - vxlan_tunnel_t *t; - u32 sw_if_index; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - { - return; - } - - sw_if_index = ntohl (mp->sw_if_index); - - if (~0 == sw_if_index) - { - /* *INDENT-OFF* */ - pool_foreach (t, vxm->tunnels, - ({ - send_vxlan_tunnel_details(t, q, mp->context); - })); - /* *INDENT-ON* */ - } - else - { - if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) || - (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index])) - { - return; - } - t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]]; - send_vxlan_tunnel_details (t, q, mp->context); - } -} - static void vl_api_l2_patch_add_del_t_handler (vl_api_l2_patch_add_del_t * mp) { @@ -1585,83 +1488,6 @@ vl_api_l2_patch_add_del_t_handler (vl_api_l2_patch_add_del_t * mp) REPLY_MACRO (VL_API_L2_PATCH_ADD_DEL_REPLY); } -static void - vl_api_vxlan_gpe_add_del_tunnel_t_handler - (vl_api_vxlan_gpe_add_del_tunnel_t * mp) -{ - vl_api_vxlan_gpe_add_del_tunnel_reply_t *rmp; - int rv = 0; - vnet_vxlan_gpe_add_del_tunnel_args_t _a, *a = &_a; - u32 encap_fib_index, decap_fib_index; - u8 protocol; - uword *p; - ip4_main_t *im = &ip4_main; - u32 sw_if_index = ~0; - - - p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id)); - if (!p) - { - rv = VNET_API_ERROR_NO_SUCH_FIB; - goto out; - } - encap_fib_index = p[0]; - - protocol = mp->protocol; - - /* Interpret decap_vrf_id as an opaque if sending to other-than-ip4-input */ - if (protocol == VXLAN_GPE_INPUT_NEXT_IP4_INPUT) - { - p = hash_get (im->fib_index_by_table_id, ntohl (mp->decap_vrf_id)); - if (!p) - { - rv = VNET_API_ERROR_NO_SUCH_INNER_FIB; - goto out; - } - decap_fib_index = p[0]; - } - else - { - decap_fib_index = ntohl (mp->decap_vrf_id); - } - - /* Check src & dst are different */ - if ((mp->is_ipv6 && memcmp (mp->local, mp->remote, 16) == 0) || - (!mp->is_ipv6 && memcmp (mp->local, mp->remote, 4) == 0)) - { - rv = VNET_API_ERROR_SAME_SRC_DST; - goto out; - } - memset (a, 0, sizeof (*a)); - - a->is_add = mp->is_add; - a->is_ip6 = mp->is_ipv6; - /* ip addresses sent in network byte order */ - if (a->is_ip6) - { - clib_memcpy (&(a->local.ip6), mp->local, 16); - clib_memcpy (&(a->remote.ip6), mp->remote, 16); - } - else - { - clib_memcpy (&(a->local.ip4), mp->local, 4); - clib_memcpy (&(a->remote.ip4), mp->remote, 4); - } - a->encap_fib_index = encap_fib_index; - a->decap_fib_index = decap_fib_index; - a->protocol = protocol; - a->vni = ntohl (mp->vni); - rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index); - -out: - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_REPLY, - ({ - rmp->sw_if_index = ntohl (sw_if_index); - })); - /* *INDENT-ON* */ -} - static void send_vxlan_gpe_tunnel_details (vxlan_gpe_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context) { @@ -1696,43 +1522,6 @@ static void send_vxlan_gpe_tunnel_details vl_msg_api_send_shmem (q, (u8 *) & rmp); } -static void vl_api_vxlan_gpe_tunnel_dump_t_handler - (vl_api_vxlan_gpe_tunnel_dump_t * mp) -{ - unix_shared_memory_queue_t *q; - vxlan_gpe_main_t *vgm = &vxlan_gpe_main; - vxlan_gpe_tunnel_t *t; - u32 sw_if_index; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (q == 0) - { - return; - } - - sw_if_index = ntohl (mp->sw_if_index); - - if (~0 == sw_if_index) - { - /* *INDENT-OFF* */ - pool_foreach (t, vgm->tunnels, - ({ - send_vxlan_gpe_tunnel_details(t, q, mp->context); - })); - /* *INDENT-ON* */ - } - else - { - if ((sw_if_index >= vec_len (vgm->tunnel_index_by_sw_if_index)) || - (~0 == vgm->tunnel_index_by_sw_if_index[sw_if_index])) - { - return; - } - t = &vgm->tunnels[vgm->tunnel_index_by_sw_if_index[sw_if_index]]; - send_vxlan_gpe_tunnel_details (t, q, mp->context); - } -} - static void vl_api_interface_name_renumber_t_handler (vl_api_interface_name_renumber_t * mp) diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index 97b501e0..6ae510b1 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -48,7 +48,6 @@ api_main_init (vlib_main_t * vm) vam->vlib_main = vm; vam->my_client_index = (u32) ~ 0; init_error_string_table (vam); - vat_api_hookup (vam); rv = vat_plugin_init (vam); if (rv) clib_warning ("vat_plugin_init returned %d", rv); diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c index 5e9b0d69..c46d441a 100644 --- a/src/vpp/stats/stats.c +++ b/src/vpp/stats/stats.c @@ -46,7 +46,6 @@ stats_main_t stats_main; #define foreach_stats_msg \ _(WANT_STATS, want_stats) \ -_(WANT_STATS_REPLY, want_stats_reply) \ _(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ _(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ _(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ @@ -1226,12 +1225,6 @@ vl_api_vnet_ip6_nbr_counters_t_handler (vl_api_vnet_ip6_nbr_counters_t * mp) } } -static void -vl_api_want_stats_reply_t_handler (vl_api_want_stats_reply_t * mp) -{ - clib_warning ("BUG"); -} - static void vl_api_want_stats_t_handler (vl_api_want_stats_t * mp) { -- cgit 1.2.3-korg From 0eb22efbb54fe2e56ce8928476fffa381cb1ca8b Mon Sep 17 00:00:00 2001 From: John Lo Date: Thu, 2 Mar 2017 12:51:45 -0500 Subject: Fix create_vlan_subif API using sw_if_index as hw_if_index Also added check for bounded interface. Change-Id: I44b981d5b6fbe360e0b95c326f3f8b0e6c715468 Signed-off-by: John Lo --- src/vpp/api/api.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index a8f471e8..6b2329fa 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -492,7 +492,7 @@ vl_api_create_vlan_subif_t_handler (vl_api_create_vlan_subif_t * mp) { vl_api_create_vlan_subif_reply_t *rmp; vnet_main_t *vnm = vnet_get_main (); - u32 hw_if_index, sw_if_index = (u32) ~ 0; + u32 sw_if_index = (u32) ~ 0; vnet_hw_interface_t *hi; int rv = 0; u32 id; @@ -506,8 +506,13 @@ vl_api_create_vlan_subif_t_handler (vl_api_create_vlan_subif_t * mp) VALIDATE_SW_IF_INDEX (mp); - hw_if_index = ntohl (mp->sw_if_index); - hi = vnet_get_hw_interface (vnm, hw_if_index); + hi = vnet_get_sup_hw_interface (vnm, ntohl (mp->sw_if_index)); + + if (hi->bond_info == VNET_HW_INTERFACE_BOND_INFO_SLAVE) + { + rv = VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED; + goto out; + } id = ntohl (mp->vlan_id); if (id == 0 || id > 4095) -- cgit 1.2.3-korg From 114f846f1919e4982c7170f14d63f3cced0d887c Mon Sep 17 00:00:00 2001 From: John Lo Date: Thu, 2 Mar 2017 19:21:36 -0500 Subject: Clean up VXLAN api message handler registration issues Remove the duplcate VXLAN related API handlers from api.c and keep the proper ones in ./src/vnet/vxlan/vxlan_api.c. Change-Id: I3b17e17d735bb453d080243bfa2783ce0de64885 Signed-off-by: John Lo --- src/vpp/api/api.c | 94 ------------------------------------------------------- 1 file changed, 94 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 6b2329fa..e028fad4 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -61,8 +61,6 @@ #include #include #include -#include -#include #include #include #include @@ -109,7 +107,6 @@ _(OAM_ADD_DEL, oam_add_del) \ _(IS_ADDRESS_REACHABLE, is_address_reachable) \ _(SW_INTERFACE_SET_MPLS_ENABLE, sw_interface_set_mpls_enable) \ _(SW_INTERFACE_SET_VPATH, sw_interface_set_vpath) \ -_(SW_INTERFACE_SET_VXLAN_BYPASS, sw_interface_set_vxlan_bypass) \ _(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \ _(SW_INTERFACE_SET_L2_BRIDGE, sw_interface_set_l2_bridge) \ _(CREATE_VLAN_SUBIF, create_vlan_subif) \ @@ -373,29 +370,6 @@ vl_api_sw_interface_set_vpath_t_handler (vl_api_sw_interface_set_vpath_t * mp) REPLY_MACRO (VL_API_SW_INTERFACE_SET_VPATH_REPLY); } -static void - vl_api_sw_interface_set_vxlan_bypass_t_handler - (vl_api_sw_interface_set_vxlan_bypass_t * mp) -{ - vl_api_sw_interface_set_vxlan_bypass_reply_t *rmp; - int rv = 0; - u32 sw_if_index = ntohl (mp->sw_if_index); - - VALIDATE_SW_IF_INDEX (mp); - - if (mp->is_ipv6) - { - /* not yet implemented */ - } - else - vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-bypass", - sw_if_index, mp->enable, 0, 0); - - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_BYPASS_REPLY); -} - static void vl_api_sw_interface_set_l2_xconnect_t_handler (vl_api_sw_interface_set_l2_xconnect_t * mp) @@ -1437,39 +1411,6 @@ out: /* *INDENT-ON* */ } -static void send_vxlan_tunnel_details - (vxlan_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context) -{ - vl_api_vxlan_tunnel_details_t *rmp; - ip4_main_t *im4 = &ip4_main; - ip6_main_t *im6 = &ip6_main; - u8 is_ipv6 = !ip46_address_is_ip4 (&t->dst); - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_VXLAN_TUNNEL_DETAILS); - if (is_ipv6) - { - memcpy (rmp->src_address, t->src.ip6.as_u8, 16); - memcpy (rmp->dst_address, t->dst.ip6.as_u8, 16); - rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id); - } - else - { - memcpy (rmp->src_address, t->src.ip4.as_u8, 4); - memcpy (rmp->dst_address, t->dst.ip4.as_u8, 4); - rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id); - } - rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index); - rmp->vni = htonl (t->vni); - rmp->decap_next_index = htonl (t->decap_next_index); - rmp->sw_if_index = htonl (t->sw_if_index); - rmp->is_ipv6 = is_ipv6; - rmp->context = context; - - vl_msg_api_send_shmem (q, (u8 *) & rmp); -} - static void vl_api_l2_patch_add_del_t_handler (vl_api_l2_patch_add_del_t * mp) { @@ -1493,40 +1434,6 @@ vl_api_l2_patch_add_del_t_handler (vl_api_l2_patch_add_del_t * mp) REPLY_MACRO (VL_API_L2_PATCH_ADD_DEL_REPLY); } -static void send_vxlan_gpe_tunnel_details - (vxlan_gpe_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context) -{ - vl_api_vxlan_gpe_tunnel_details_t *rmp; - ip4_main_t *im4 = &ip4_main; - ip6_main_t *im6 = &ip6_main; - u8 is_ipv6 = !(t->flags & VXLAN_GPE_TUNNEL_IS_IPV4); - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_VXLAN_GPE_TUNNEL_DETAILS); - if (is_ipv6) - { - memcpy (rmp->local, &(t->local.ip6), 16); - memcpy (rmp->remote, &(t->remote.ip6), 16); - rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id); - rmp->decap_vrf_id = htonl (im6->fibs[t->decap_fib_index].ft_table_id); - } - else - { - memcpy (rmp->local, &(t->local.ip4), 4); - memcpy (rmp->remote, &(t->remote.ip4), 4); - rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id); - rmp->decap_vrf_id = htonl (im4->fibs[t->decap_fib_index].ft_table_id); - } - rmp->vni = htonl (t->vni); - rmp->protocol = t->protocol; - rmp->sw_if_index = htonl (t->sw_if_index); - rmp->is_ipv6 = is_ipv6; - rmp->context = context; - - vl_msg_api_send_shmem (q, (u8 *) & rmp); -} - static void vl_api_interface_name_renumber_t_handler (vl_api_interface_name_renumber_t * mp) @@ -2183,7 +2090,6 @@ vpe_api_hookup (vlib_main_t * vm) am->api_trace_cfg[VL_API_CLASSIFY_ADD_DEL_TABLE].size += 5 * sizeof (u32x4); am->api_trace_cfg[VL_API_CLASSIFY_ADD_DEL_SESSION].size += 5 * sizeof (u32x4); - am->api_trace_cfg[VL_API_VXLAN_ADD_DEL_TUNNEL].size += 16 * sizeof (u32); /* * Thread-safe API messages -- cgit 1.2.3-korg From c83c3b7f117b981b677f646a0e30f44ec70de239 Mon Sep 17 00:00:00 2001 From: Jon Loeliger Date: Thu, 23 Feb 2017 13:57:35 -0600 Subject: Implement a loopback instance allocation scheme. To support creating loopback interfaces with a specific instance number, a new CREATE_LOOPBACK_INSTANCE API call with flag is_specified and value user_instance is introduced. Presumably the existing CREATE_LOOPBACK API message will be obsoleted and revmoved. The VAT cli commands can now mention and format the new field as 'instance %d' data. If no instance number is named, the old call CREATE_LOOPBACK is used to maintain backward compatibility. However, if the instance is named, the new CREATE_LOOPBACK_INSTANCE message will be used. Both the dynamically allocated and user-requested instance number are tracked in a bitvector. If is_specified is 0, the next free instance will be used.. A request for a specific instance number will be granted if it is available. On error, the value ~0 is returned. Change-Id: I849815563a5da736dcd6bccd262ef49b963f6643 Signed-off-by: Jon Loeliger --- src/vat/api_format.c | 59 +++++++++++++++++--- src/vnet/ethernet/ethernet.h | 6 ++- src/vnet/ethernet/interface.c | 123 ++++++++++++++++++++++++++++++++++++++---- src/vpp/api/api.c | 23 +++++++- src/vpp/api/custom_dump.c | 13 +++++ src/vpp/api/test_client.c | 8 +++ src/vpp/api/vpe.api | 28 ++++++++++ 7 files changed, 241 insertions(+), 19 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 52436917..993a6e8b 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -725,6 +725,34 @@ static void vl_api_create_loopback_reply_t_handler_json vam->result_ready = 1; } +static void vl_api_create_loopback_instance_reply_t_handler + (vl_api_create_loopback_instance_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + + vam->retval = retval; + vam->regenerate_interface_table = 1; + vam->sw_if_index = ntohl (mp->sw_if_index); + vam->result_ready = 1; +} + +static void vl_api_create_loopback_instance_reply_t_handler_json + (vl_api_create_loopback_instance_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + vat_json_node_t node; + + vat_json_init_object (&node); + vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); + vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index)); + + vat_json_print (vam->ofp, &node); + vat_json_free (&node); + vam->retval = ntohl (mp->retval); + vam->result_ready = 1; +} + static void vl_api_af_packet_create_reply_t_handler (vl_api_af_packet_create_reply_t * mp) { @@ -4010,6 +4038,7 @@ foreach_standard_reply_retval_handler; #define foreach_vpe_api_reply_msg \ _(CREATE_LOOPBACK_REPLY, create_loopback_reply) \ +_(CREATE_LOOPBACK_INSTANCE_REPLY, create_loopback_instance_reply) \ _(SW_INTERFACE_DETAILS, sw_interface_details) \ _(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply) \ _(CONTROL_PING_REPLY, control_ping_reply) \ @@ -4720,8 +4749,11 @@ api_create_loopback (vat_main_t * vam) { unformat_input_t *i = vam->input; vl_api_create_loopback_t *mp; + vl_api_create_loopback_instance_t *mp_lbi; u8 mac_address[6]; u8 mac_set = 0; + u8 is_specified = 0; + u32 user_instance = 0; int ret; memset (mac_address, 0, sizeof (mac_address)); @@ -4730,16 +4762,31 @@ api_create_loopback (vat_main_t * vam) { if (unformat (i, "mac %U", unformat_ethernet_address, mac_address)) mac_set = 1; + if (unformat (i, "instance %d", &user_instance)) + is_specified = 1; else break; } - /* Construct the API message */ - M (CREATE_LOOPBACK, mp); - if (mac_set) - clib_memcpy (mp->mac_address, mac_address, sizeof (mac_address)); + if (is_specified) + { + M (CREATE_LOOPBACK_INSTANCE, mp_lbi); + mp_lbi->is_specified = is_specified; + if (is_specified) + mp_lbi->user_instance = htonl (user_instance); + if (mac_set) + clib_memcpy (mp_lbi->mac_address, mac_address, sizeof (mac_address)); + S (mp_lbi); + } + else + { + /* Construct the API message */ + M (CREATE_LOOPBACK, mp); + if (mac_set) + clib_memcpy (mp->mac_address, mac_address, sizeof (mac_address)); + S (mp); + } - S (mp); W (ret); return ret; } @@ -18021,7 +18068,7 @@ echo (vat_main_t * vam) /* List of API message constructors, CLI names map to api_xxx */ #define foreach_vpe_api_msg \ -_(create_loopback,"[mac ]") \ +_(create_loopback,"[mac ] [instance ]") \ _(sw_interface_dump,"") \ _(sw_interface_set_flags, \ " | sw_if_index admin-up | admin-down link-up | link down") \ diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h index 3acde421..ba84c69c 100644 --- a/src/vnet/ethernet/ethernet.h +++ b/src/vnet/ethernet/ethernet.h @@ -265,6 +265,9 @@ typedef struct /* Feature arc index */ u8 output_feature_arc_index; + + /* Allocated loopback instances */ + uword *bm_loopback_instances; } ethernet_main_t; ethernet_main_t ethernet_main; @@ -412,7 +415,8 @@ clib_error_t *next_by_ethertype_init (next_by_ethertype_t * l3_next); clib_error_t *next_by_ethertype_register (next_by_ethertype_t * l3_next, u32 ethertype, u32 next_index); -int vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address); +int vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address, + u8 is_specified, u32 user_instance); int vnet_delete_loopback_interface (u32 sw_if_index); int vnet_delete_sub_interface (u32 sw_if_index); diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c index 95700309..9894e3c8 100644 --- a/src/vnet/ethernet/interface.c +++ b/src/vnet/ethernet/interface.c @@ -453,13 +453,87 @@ VNET_DEVICE_CLASS (ethernet_simulated_device_class) = { }; /* *INDENT-ON* */ + +/* + * Maintain a bitmap of allocated loopback instance numbers. + */ +#define LOOPBACK_MAX_INSTANCE (16 * 1024) + +static u32 +loopback_instance_alloc (u8 is_specified, u32 want) +{ + ethernet_main_t *em = ðernet_main; + + /* + * Check for dynamically allocaetd instance number. + */ + if (!is_specified) + { + u32 bit; + + bit = clib_bitmap_first_clear (em->bm_loopback_instances); + if (bit >= LOOPBACK_MAX_INSTANCE) + { + return ~0; + } + em->bm_loopback_instances = clib_bitmap_set (em->bm_loopback_instances, + bit, 1); + return bit; + } + + /* + * In range? + */ + if (want >= LOOPBACK_MAX_INSTANCE) + { + return ~0; + } + + /* + * Already in use? + */ + if (clib_bitmap_get (em->bm_loopback_instances, want)) + { + return ~0; + } + + /* + * Grant allocation request. + */ + em->bm_loopback_instances = clib_bitmap_set (em->bm_loopback_instances, + want, 1); + + return want; +} + +static int +loopback_instance_free (u32 instance) +{ + ethernet_main_t *em = ðernet_main; + + if (instance >= LOOPBACK_MAX_INSTANCE) + { + return -1; + } + + if (clib_bitmap_get (em->bm_loopback_instances, instance) == 0) + { + return -1; + } + + em->bm_loopback_instances = clib_bitmap_set (em->bm_loopback_instances, + instance, 0); + return 0; +} + int -vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address) +vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address, + u8 is_specified, u32 user_instance) { vnet_main_t *vnm = vnet_get_main (); vlib_main_t *vm = vlib_get_main (); clib_error_t *error; - static u32 instance; + u32 instance; u8 address[6]; u32 hw_if_index; vnet_hw_interface_t *hw_if; @@ -472,6 +546,16 @@ vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address) memset (address, 0, sizeof (address)); + /* + * Allocate a loopback instance. Either select on dynamically + * or try to use the desired user_instance number. + */ + instance = loopback_instance_alloc (is_specified, user_instance); + if (instance == ~0) + { + return VNET_API_ERROR_INVALID_REGISTRATION; + } + /* * Default MAC address (dead:0000:0000 + instance) is allocated * if zero mac_address is configured. Otherwise, user-configurable MAC @@ -488,7 +572,7 @@ vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address) error = ethernet_register_interface (vnm, - ethernet_simulated_device_class.index, instance++, address, &hw_if_index, + ethernet_simulated_device_class.index, instance, address, &hw_if_index, /* flag change */ 0); if (error) @@ -520,6 +604,8 @@ create_simulated_ethernet_interfaces (vlib_main_t * vm, int rv; u32 sw_if_index; u8 mac_address[6]; + u8 is_specified = 0; + u32 user_instance = 0; memset (mac_address, 0, sizeof (mac_address)); @@ -527,11 +613,14 @@ create_simulated_ethernet_interfaces (vlib_main_t * vm, { if (unformat (input, "mac %U", unformat_ethernet_address, mac_address)) ; + if (unformat (input, "instance %d", &user_instance)) + is_specified = 1; else break; } - rv = vnet_create_loopback_interface (&sw_if_index, mac_address); + rv = vnet_create_loopback_interface (&sw_if_index, mac_address, + is_specified, user_instance); if (rv) return clib_error_return (0, "vnet_create_loopback_interface failed"); @@ -547,15 +636,15 @@ create_simulated_ethernet_interfaces (vlib_main_t * vm, * * @cliexpar * The following two command syntaxes are equivalent: - * @cliexcmd{loopback create-interface [mac ]} - * @cliexcmd{create loopback interface [mac ]} + * @cliexcmd{loopback create-interface [mac ] [instance ]} + * @cliexcmd{create loopback interface [mac ] [instance ]} * Example of how to create a loopback interface: * @cliexcmd{loopback create-interface} ?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = { .path = "loopback create-interface", - .short_help = "loopback create-interface [mac ]", + .short_help = "loopback create-interface [mac ] [instance ]", .function = create_simulated_ethernet_interfaces, }; /* *INDENT-ON* */ @@ -566,15 +655,15 @@ VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = { * * @cliexpar * The following two command syntaxes are equivalent: - * @cliexcmd{loopback create-interface [mac ]} - * @cliexcmd{create loopback interface [mac ]} + * @cliexcmd{loopback create-interface [mac ] [instance ]} + * @cliexcmd{create loopback interface [mac ] [instance ]} * Example of how to create a loopback interface: * @cliexcmd{create loopback interface} ?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (create_loopback_interface_command, static) = { .path = "create loopback interface", - .short_help = "create loopback interface [mac ]", + .short_help = "create loopback interface [mac ] [instance ]", .function = create_simulated_ethernet_interfaces, }; /* *INDENT-ON* */ @@ -594,12 +683,24 @@ vnet_delete_loopback_interface (u32 sw_if_index) { vnet_main_t *vnm = vnet_get_main (); vnet_sw_interface_t *si; + u32 hw_if_index; + vnet_hw_interface_t *hw; + u32 instance; if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) return VNET_API_ERROR_INVALID_SW_IF_INDEX; si = vnet_get_sw_interface (vnm, sw_if_index); - ethernet_delete_interface (vnm, si->hw_if_index); + hw_if_index = si->hw_if_index; + hw = vnet_get_hw_interface (vnm, hw_if_index); + instance = hw->dev_instance; + + if (loopback_instance_free (instance) < 0) + { + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + } + + ethernet_delete_interface (vnm, hw_if_index); return 0; } diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index e028fad4..f06894e8 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -116,6 +116,7 @@ _(PROXY_ARP_INTFC_ENABLE_DISABLE, proxy_arp_intfc_enable_disable) \ _(VNET_GET_SUMMARY_STATS, vnet_get_summary_stats) \ _(RESET_FIB, reset_fib) \ _(CREATE_LOOPBACK, create_loopback) \ +_(CREATE_LOOPBACK_INSTANCE, create_loopback_instance) \ _(CONTROL_PING, control_ping) \ _(CLI_REQUEST, cli_request) \ _(CLI_INBAND, cli_inband) \ @@ -1026,7 +1027,7 @@ vl_api_create_loopback_t_handler (vl_api_create_loopback_t * mp) u32 sw_if_index; int rv; - rv = vnet_create_loopback_interface (&sw_if_index, mp->mac_address); + rv = vnet_create_loopback_interface (&sw_if_index, mp->mac_address, 0, 0); /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_CREATE_LOOPBACK_REPLY, @@ -1036,6 +1037,26 @@ vl_api_create_loopback_t_handler (vl_api_create_loopback_t * mp) /* *INDENT-ON* */ } +static void vl_api_create_loopback_instance_t_handler + (vl_api_create_loopback_instance_t * mp) +{ + vl_api_create_loopback_instance_reply_t *rmp; + u32 sw_if_index; + u8 is_specified = mp->is_specified; + u32 user_instance = ntohl (mp->user_instance); + int rv; + + rv = vnet_create_loopback_interface (&sw_if_index, mp->mac_address, + is_specified, user_instance); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_CREATE_LOOPBACK_INSTANCE_REPLY, + ({ + rmp->sw_if_index = ntohl (sw_if_index); + })); + /* *INDENT-ON* */ +} + static void vl_api_delete_loopback_t_handler (vl_api_delete_loopback_t * mp) { diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index c61e31bb..ee0c4629 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -72,6 +72,18 @@ static void *vl_api_create_loopback_t_print FINISH; } +static void *vl_api_create_loopback_instance_t_print + (vl_api_create_loopback_instance_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: create_loopback "); + s = format (s, "mac %U ", format_ethernet_address, &mp->mac_address); + s = format (s, "instance %d ", ntohl (mp->user_instance)); + + FINISH; +} + static void *vl_api_delete_loopback_t_print (vl_api_delete_loopback_t * mp, void *handle) { @@ -2821,6 +2833,7 @@ foreach_custom_print_no_arg_function #undef _ #define foreach_custom_print_function \ _(CREATE_LOOPBACK, create_loopback) \ +_(CREATE_LOOPBACK_INSTANCE, create_loopback_instance) \ _(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ _(SW_INTERFACE_ADD_DEL_ADDRESS, sw_interface_add_del_address) \ _(SW_INTERFACE_SET_TABLE, sw_interface_set_table) \ diff --git a/src/vpp/api/test_client.c b/src/vpp/api/test_client.c index ceafc357..8b61f4f3 100644 --- a/src/vpp/api/test_client.c +++ b/src/vpp/api/test_client.c @@ -534,6 +534,13 @@ static void vl_api_create_loopback_reply_t_handler ntohl (mp->retval), ntohl (mp->sw_if_index)); } +static void vl_api_create_loopback_instance_reply_t_handler + (vl_api_create_loopback_instance_reply_t * mp) +{ + fformat (stdout, "create loopback status %d, sw_if_index %d\n", + ntohl (mp->retval), ntohl (mp->sw_if_index)); +} + static void vl_api_sr_tunnel_add_del_reply_t_handler (vl_api_sr_tunnel_add_del_reply_t * mp) @@ -598,6 +605,7 @@ _(SW_INTERFACE_IP6ND_RA_PREFIX_REPLY, sw_interface_ip6nd_ra_prefix_reply) \ _(SW_INTERFACE_IP6_ENABLE_DISABLE_REPLY, sw_interface_ip6_enable_disable_reply) \ _(SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS_REPLY, sw_interface_ip6_set_link_local_address_reply) \ _(CREATE_LOOPBACK_REPLY, create_loopback_reply) \ + _(CREATE_LOOPBACK_INSTANCE_REPLY, create_loopback_instance_reply) \ _(L2_PATCH_ADD_DEL_REPLY, l2_patch_add_del_reply) \ _(SR_TUNNEL_ADD_DEL_REPLY,sr_tunnel_add_del_reply) \ _(SW_INTERFACE_SET_L2_XCONNECT_REPLY, sw_interface_set_l2_xconnect_reply) \ diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 7f9c2038..a4ba180d 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -425,6 +425,34 @@ define create_loopback_reply u32 sw_if_index; }; +/** \brief Create loopback interface instance request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mac_address - mac addr to assign to the interface if none-zero + @param is_specified - if non-0, a specific user_instance is being requested + @param user_instance - requested instance, ~0 => dynamically allocate +*/ +define create_loopback_instance +{ + u32 client_index; + u32 context; + u8 mac_address[6]; + u8 is_specified; + u32 user_instance; +}; + +/** \brief Create loopback interface instance response + @param context - sender context, to match reply w/ request + @param sw_if_index - sw index of the interface that was created + @param retval - return code for the request +*/ +define create_loopback_instance_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + /** \brief Delete loopback interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request -- cgit 1.2.3-korg From fb38095d1c9d1b84850f345f0344f82b9ae2c375 Mon Sep 17 00:00:00 2001 From: Pablo Camarillo Date: Wed, 7 Dec 2016 18:34:18 +0100 Subject: Evolving SRv6 (Segment Routing for IPv6) Implements: 1.- SR Policies with several (weighted) SID lists 2.- Binding SID 3.- SR LocalSIDs with support for the following functions - End - End.X - End.DX6 - End.DX4 - End.DX2 - End.DT6 - End.DT2 - End.B6 - End.B6.Encaps 4.- SR Steering policies (to steer a traffic through an SR Policy) - Support for IPv6 traffic (IPv6 Encapsulation / SRH insertion) - Support for IPv4 traffic (IPv6 Encapsulation) - Support for L2 traffic (T.Insert / T.Encaps) 5.- Doxygen documentation 6.- Framework (APIs) to allow the definition of new SR LocalSID behaviors by means of plugins 7.- Sample SRv6 LocalSID plugin Change-Id: I2de3d126699d4f11f54c0f7f3b71420ea41fd389 Signed-off-by: Pablo Camarillo --- src/configure.ac | 1 + src/plugins/Makefile.am | 4 + src/plugins/sample_srv6_localsid.am | 23 + src/plugins/srv6-localsid/node.c | 200 ++ src/plugins/srv6-localsid/srv6_localsid_sample.c | 179 ++ src/plugins/srv6-localsid/srv6_localsid_sample.h | 61 + .../srv6-localsid/srv6_sample_localsid_doc.md | 38 + src/vat/api_format.c | 357 +- src/vnet.am | 6 +- src/vnet/ip/ip6_packet.h | 33 +- src/vnet/sr/dir.dox | 4 +- src/vnet/sr/examples/sr_multicastmap.script | 4 - src/vnet/sr/ietf_draft_05.txt | 1564 +++++++++ src/vnet/sr/rfc_draft_05.txt | 1265 -------- src/vnet/sr/sr.api | 203 +- src/vnet/sr/sr.c | 3397 +------------------- src/vnet/sr/sr.h | 361 ++- src/vnet/sr/sr_api.c | 250 +- src/vnet/sr/sr_doc.md | 161 + src/vnet/sr/sr_error.def | 20 - src/vnet/sr/sr_fix_dst_error.def | 17 - src/vnet/sr/sr_localsid.c | 1478 +++++++++ src/vnet/sr/sr_packet.h | 248 +- src/vnet/sr/sr_policy_rewrite.c | 3253 +++++++++++++++++++ src/vnet/sr/sr_steering.c | 568 ++++ src/vpp/api/custom_dump.c | 270 +- src/vpp/api/test_client.c | 1 - 27 files changed, 8352 insertions(+), 5614 deletions(-) create mode 100644 src/plugins/sample_srv6_localsid.am create mode 100644 src/plugins/srv6-localsid/node.c create mode 100755 src/plugins/srv6-localsid/srv6_localsid_sample.c create mode 100644 src/plugins/srv6-localsid/srv6_localsid_sample.h create mode 100644 src/plugins/srv6-localsid/srv6_sample_localsid_doc.md mode change 100644 => 100755 src/vnet/sr/dir.dox delete mode 100644 src/vnet/sr/examples/sr_multicastmap.script create mode 100755 src/vnet/sr/ietf_draft_05.txt delete mode 100644 src/vnet/sr/rfc_draft_05.txt mode change 100644 => 100755 src/vnet/sr/sr.c mode change 100644 => 100755 src/vnet/sr/sr.h create mode 100644 src/vnet/sr/sr_doc.md delete mode 100644 src/vnet/sr/sr_error.def delete mode 100644 src/vnet/sr/sr_fix_dst_error.def create mode 100755 src/vnet/sr/sr_localsid.c mode change 100644 => 100755 src/vnet/sr/sr_packet.h create mode 100755 src/vnet/sr/sr_policy_rewrite.c create mode 100755 src/vnet/sr/sr_steering.c (limited to 'src/vpp/api') diff --git a/src/configure.ac b/src/configure.ac index 813fe067..c22d152e 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -153,6 +153,7 @@ PLUGIN_ENABLED(ioam) PLUGIN_ENABLED(lb) PLUGIN_ENABLED(sixrd) PLUGIN_ENABLED(snat) +PLUGIN_DISABLED(srv6sample) ############################################################################### # Dependency checks diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index c8877899..7b36049e 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -61,6 +61,10 @@ if ENABLE_SNAT_PLUGIN include snat.am endif +if ENABLE_SRV6SAMPLE_PLUGIN +include sample_srv6_localsid.am +endif + include ../suffix-rules.mk # Remove *.la files diff --git a/src/plugins/sample_srv6_localsid.am b/src/plugins/sample_srv6_localsid.am new file mode 100644 index 00000000..a820ab25 --- /dev/null +++ b/src/plugins/sample_srv6_localsid.am @@ -0,0 +1,23 @@ + +# Copyright (c) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vppplugins_LTLIBRARIES += sample_srv6_localsid_plugin.la + +sample_srv6_localsid_plugin_la_SOURCES = \ + srv6-localsid/node.c \ + srv6-localsid/srv6_localsid_sample.c + +noinst_HEADERS += srv6-localsid/srv6_localsid_sample.h + +# vi:syntax=automake diff --git a/src/plugins/srv6-localsid/node.c b/src/plugins/srv6-localsid/node.c new file mode 100644 index 00000000..7bae9cd7 --- /dev/null +++ b/src/plugins/srv6-localsid/node.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +typedef struct { + u32 localsid_index; +} srv6_localsid_sample_trace_t; + +/* packet trace format function */ +static u8 * format_srv6_localsid_sample_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + srv6_localsid_sample_trace_t * t = va_arg (*args, srv6_localsid_sample_trace_t *); + s = format (s, "SRv6-sample-localsid: localsid_index %d\n", + t->localsid_index); + return s; +} + +vlib_node_registration_t srv6_localsid_sample_node; + +#define foreach_srv6_localsid_counter \ +_(PROCESSED, "srv6-sample-localsid processed packets") \ +_(NO_SRH, "(Error) No SRH.") + +typedef enum { +#define _(sym,str) SRV6_LOCALSID_COUNTER_##sym, + foreach_srv6_localsid_counter +#undef _ + SRV6_LOCALSID_N_COUNTERS, +} srv6_localsid_sample_counters; + +static char * srv6_localsid_counter_strings[] = { +#define _(sym,string) string, + foreach_srv6_localsid_counter +#undef _ +}; + +typedef enum { + SRV6_SAMPLE_LOCALSID_NEXT_ERROR, + SRV6_SAMPLE_LOCALSID_NEXT_IP6LOOKUP, + SRV6_SAMPLE_LOCALSID_N_NEXT, +} srv6_localsid_sample_next_t; + +/** + * @brief Function doing End processing. + */ +//Fixme: support OAM (hop-by-hop header) here! +static_always_inline void +end_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_sr_header_t * sr0, + u32 * next0) +{ + ip6_address_t *new_dst0; + + if(PREDICT_TRUE(ip0->protocol == IP_PROTOCOL_IPV6_ROUTE)) + { + if(PREDICT_TRUE(sr0->type == ROUTING_HEADER_TYPE_SR)) + { + if(PREDICT_TRUE(sr0->segments_left != 0)) + { + sr0->segments_left -= 1; + new_dst0 = (ip6_address_t *)(sr0->segments); + new_dst0 += sr0->segments_left; + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + } + else + { + *next0 = SRV6_SAMPLE_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SRV6_LOCALSID_COUNTER_NO_SRH]; + } + } + else + { + /* Error. Routing header of type != SR */ + *next0 = SRV6_SAMPLE_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SRV6_LOCALSID_COUNTER_NO_SRH]; + } + } +} + +/* + * @brief SRv6 Sample Localsid graph node + * WARNING: YOU MUST DO THE DUAL LOOP + */ +static uword +srv6_localsid_sample_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + u32 next_index; + u32 pkts_swapped = 0; + + ip6_sr_main_t * sm = &sr_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + u32 cpu_index = os_get_cpu_number (); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + ip6_header_t * ip0 = 0; + ip6_sr_header_t * sr0; + u32 next0 = SRV6_SAMPLE_LOCALSID_NEXT_IP6LOOKUP; + ip6_sr_localsid_t *ls0; + srv6_localsid_sample_per_sid_memory_t *ls0_mem; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + sr0 = (ip6_sr_header_t *)(ip0+1); + + /* Lookup the SR End behavior based on IP DA (adj) */ + ls0 = pool_elt_at_index (sm->localsids, vnet_buffer(b0)->ip.adj_index[VLIB_TX]); + ls0_mem = ls0->plugin_mem; + + /* SRH processing */ + end_srh_processing (node, b0, ip0, sr0, &next0); + + /* ==================================================================== */ + /* INSERT CODE HERE */ + /* Example starts here */ + //In this example we are changing the next VRF table by the one in CLI + vnet_buffer(b0)->sw_if_index[VLIB_TX] = ls0_mem->fib_table; + /* Example finishes here */ + /* ==================================================================== */ + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + srv6_localsid_sample_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->localsid_index = ls0 - sm->localsids; + } + + /* This increments the SRv6 per LocalSID counters.*/ + vlib_increment_combined_counter + (((next0 == SRV6_SAMPLE_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : &(sm->sr_ls_valid_counters)), + cpu_index, + ls0 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + + pkts_swapped ++; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (srv6_localsid_sample_node) = { + .function = srv6_localsid_sample_fn, + .name = "srv6-localsid-sample", + .vector_size = sizeof (u32), + .format_trace = format_srv6_localsid_sample_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SRV6_LOCALSID_N_COUNTERS, + .error_strings = srv6_localsid_counter_strings, + .n_next_nodes = SRV6_SAMPLE_LOCALSID_N_NEXT, + .next_nodes = { + [SRV6_SAMPLE_LOCALSID_NEXT_IP6LOOKUP] = "ip6-lookup", + [SRV6_SAMPLE_LOCALSID_NEXT_ERROR] = "error-drop", + }, +}; diff --git a/src/plugins/srv6-localsid/srv6_localsid_sample.c b/src/plugins/srv6-localsid/srv6_localsid_sample.c new file mode 100755 index 00000000..ec16547e --- /dev/null +++ b/src/plugins/srv6-localsid/srv6_localsid_sample.c @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * srv6_localsid_sample.c - Simple SRv6 LocalSID + *------------------------------------------------------------------ + */ + +#include +#include +#include + +#include +#include +#include + +unsigned char srv6_localsid_name[32] = "Sample-SRv6-LocalSID-plugin"; +unsigned char keyword_str[32] = "new_srv6_localsid"; +unsigned char def_str[64] = "This is a definition of a sample new_srv6_localsid"; +unsigned char params_str[32] = ""; + +/*****************************************/ +/* SRv6 LocalSID instantiation and removal functions */ +static int +srv6_localsid_creation_fn (ip6_sr_localsid_t *localsid) +{ + /* + * Do you want to do anything fancy upon localsid instantiation? + * You can do it here + * (If return != 0 the localsid creation will be cancelled.) + */ + /* As an example Im going to do a +1 to the fib table inserted by the user */ + srv6_localsid_sample_per_sid_memory_t *ls_mem = localsid->plugin_mem; + ls_mem->fib_table += 1; + return 0; +} + +static int +srv6_localsid_removal_fn (ip6_sr_localsid_t *localsid) +{ + /* Do you want to do anything fancy upon localsid removal? + * You can do it here + * (If return != 0 the localsid removal will be cancelled.) + */ + /* + * BTW if you stored something in localsid->plugin_mem you should clean it now + */ + + //In this example we are only cleaning the memory allocated per localsid + clib_mem_free(localsid->plugin_mem); + return 0; +} + +/**********************************/ +/* SRv6 LocalSID format functions */ +/* + * Prints nicely the parameters of a localsid + * Example: print "Table 5" + */ +u8 * +format_srv6_localsid_sample (u8 * s, va_list * args) +{ + srv6_localsid_sample_per_sid_memory_t *ls_mem = va_arg (*args, void *); + return (format (s, "Table: %u", ls_mem->fib_table)); +} + +/* + * Process the parameters of a localsid + * Example: process from: + * sr localsid address cafe::1 behavior new_srv6_localsid 5 + * everything from behavior on... so in this case 'new_srv6_localsid 5' + * Notice that it MUST match the keyword_str and params_str defined above. + */ +uword +unformat_srv6_localsid_sample (unformat_input_t * input, va_list * args) +{ + void **plugin_mem = va_arg (*args, void **); + srv6_localsid_sample_per_sid_memory_t *ls_mem; + u32 table_id; + if (unformat (input, "new_srv6_localsid %u", &table_id)) + { + /* Allocate a portion of memory */ + ls_mem = clib_mem_alloc_aligned_at_offset ( + sizeof(srv6_localsid_sample_per_sid_memory_t), 0, 0, 1); + + /* Set to zero the memory */ + memset (ls_mem, 0, sizeof(srv6_localsid_sample_per_sid_memory_t)); + + /* Our brand-new car is ready */ + ls_mem->fib_table = table_id; + + /* Dont forget to add it to the localsid */ + *plugin_mem = ls_mem; + return 1; + } + return 0; +} + +/*************************/ +/* SRv6 LocalSID FIB DPO */ +static u8 * +format_srv6_localsid_sample_dpo (u8 * s, va_list * args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (*args, u32); + + return (format (s, "SR: localsid_sample_index:[%u]", index)); +} + +void +srv6_localsid_sample_dpo_lock (dpo_id_t * dpo) +{ +} + +void +srv6_localsid_sample_dpo_unlock (dpo_id_t * dpo) +{ +} + +const static dpo_vft_t srv6_localsid_sample_vft = { + .dv_lock = srv6_localsid_sample_dpo_lock, + .dv_unlock = srv6_localsid_sample_dpo_unlock, + .dv_format = format_srv6_localsid_sample_dpo, +}; + +const static char *const srv6_localsid_sample_ip6_nodes[] = { + "srv6-localsid-sample", + NULL, +}; + +const static char *const *const srv6_localsid_sample_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = srv6_localsid_sample_ip6_nodes, +}; + +/**********************/ +static clib_error_t * srv6_localsid_sample_init (vlib_main_t * vm) +{ + srv6_localsid_sample_main_t * sm = &srv6_localsid_sample_main; + int rv = 0; + /* Create DPO */ + sm->srv6_localsid_sample_dpo_type = dpo_register_new_type ( + &srv6_localsid_sample_vft, srv6_localsid_sample_nodes); + + /* Register SRv6 LocalSID */ + rv = sr_localsid_register_function (vm, + srv6_localsid_name, + keyword_str, + def_str, + params_str, + &sm->srv6_localsid_sample_dpo_type, + format_srv6_localsid_sample, + unformat_srv6_localsid_sample, + srv6_localsid_creation_fn, + srv6_localsid_removal_fn); + if (rv < 0) + clib_error_return (0, "SRv6 LocalSID function could not be registered."); + else + sm->srv6_localsid_behavior_id = rv; + + return 0; +} + +VLIB_INIT_FUNCTION (srv6_localsid_sample_init); + +VLIB_PLUGIN_REGISTER () = { + .version = "1.0", +}; diff --git a/src/plugins/srv6-localsid/srv6_localsid_sample.h b/src/plugins/srv6-localsid/srv6_localsid_sample.h new file mode 100644 index 00000000..474b5de2 --- /dev/null +++ b/src/plugins/srv6-localsid/srv6_localsid_sample.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_srv6_localsid_sample_h__ +#define __included_srv6_localsid_sample_h__ + +#include +#include +#include +#include + +#include +#include +#include + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + + /* DPO type */ + dpo_type_t srv6_localsid_sample_dpo_type; + + /* SRv6 LocalSID behavior number */ + u32 srv6_localsid_behavior_id; + +} srv6_localsid_sample_main_t; + +/* + * This is the memory that will be stored per each localsid + * the user instantiates + */ +typedef struct { + u32 fib_table; /* Stupid index used as an example.. */ +} srv6_localsid_sample_per_sid_memory_t ; + +srv6_localsid_sample_main_t srv6_localsid_sample_main; + +format_function_t format_srv6_localsid_sample; +unformat_function_t unformat_srv6_localsid_sample; + +void srv6_localsid_sample_dpo_lock (dpo_id_t * dpo); +void srv6_localsid_sample_dpo_unlock (dpo_id_t * dpo); + +extern vlib_node_registration_t srv6_localsid_sample_node; + +#endif /* __included_sample_h__ */ diff --git a/src/plugins/srv6-localsid/srv6_sample_localsid_doc.md b/src/plugins/srv6-localsid/srv6_sample_localsid_doc.md new file mode 100644 index 00000000..b60ab5da --- /dev/null +++ b/src/plugins/srv6-localsid/srv6_sample_localsid_doc.md @@ -0,0 +1,38 @@ +# SRv6 Sample LocalSID documentation {#srv6_plugin_doc} + +## Disclaimer + +This is a memo intended to contain documentation for the sample SRv6 LocalSID behavior plugin +Everything that is not directly obvious should come here. +For any feedback on content that should be explained please mailto:pcamaril@cisco.com + +This plugin refers to Segment Routing. Please read the SR documentation first. + +## Introduction + +This plugin is an example of how an user can create a new SRv6 LocalSID behavior by using VPP plugins with the appropiate API calls to the existing SR code. + +This **example** plugin registers a new localsid behavior, with cli keyword 'new_srv6_localsid' which only takes one parameter, a fib-table. Upon recival of a packet, this plugin will enforce the next IP6 lookup in the specific fib-table specified by the user. (Indeed it will do the lookup in the fib_table n+1 (since for the shake of the example we increment the fib-table.) + +Notice that the plugin only 'defines' a new SRv6 LocalSID behavior, but the existing SR code in VNET is the one actually instantiating new LocalSIDs. Notice that there are callback functions such that when you create or remove a LocalSID you can actually setup specific parameters through the functions in this plugin. + +## Variables to watch for + +* srv6_localsid_name: This variable is the name (used as a unique key) identifying this SR LocalSID plugin. +* keyword_str: This is the CLI keyword to be used for the plugin. In this example 'new_srv6_localsid'. (i.e. sr localsid address cafe::1 behavior new_srv6_localsid ) +* def_str: This is a definition of this SR behavior. This is printed when you do 'show sr localsid behaviors'. +* params_str: This is a definition of the parameters of this localsid. This is printed when you do 'show sr localsid behaviors'. + +## Functions to watch for + +* srv6_localsid_creation_fn: This function will be called every time a new SR LocalSID is instantiated with the behavior defined in this plugin. +* srv6_localsid_removal_fn: This function will be called every time a new SR LocalSID is removed with the behavior defined in this plugin. This function tends to be used for freeing up all the memory created in the previous function. +* format_srv6_localsid_sample: This function prints nicely the parameters of every SR LocalSID using this behavior. +* unformat_srv6_localsid_sample: This function parses the CLI command when initialising a new SR LocalSID using this behavior. It parses all the parameters and ensures that the parameters are correct. +* format_srv6_localsid_sample_dpo: This function formats the 'show ip6 fib' message for the SR LocalSIDs created with this plugin behavior. + +## Graph node + +The current graph node uses the function 'end_srh_processing' to do the Segment Routing Endpoint behavior. Notice that it does not allow the cleanup of a Segment Routing header (as per the SRv6 behavior specs). +This function is identical to the one found in /src/vnet/sr/sr_localsid.c +In case that by some other reason you want to do decapsulation, or SRH clean_up you can use the functions 'end_decaps_srh_processing' or 'end_psp_srh_processing' respectively. diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 993a6e8b..53e9ca1f 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -3916,9 +3915,11 @@ _(sw_interface_ip6nd_ra_prefix_reply) \ _(sw_interface_ip6nd_ra_config_reply) \ _(set_arp_neighbor_limit_reply) \ _(l2_patch_add_del_reply) \ -_(sr_tunnel_add_del_reply) \ -_(sr_policy_add_del_reply) \ -_(sr_multicast_map_add_del_reply) \ +_(sr_policy_add_reply) \ +_(sr_policy_mod_reply) \ +_(sr_policy_del_reply) \ +_(sr_localsid_add_del_reply) \ +_(sr_steering_add_del_reply) \ _(classify_add_del_session_reply) \ _(classify_set_interface_ip_table_reply) \ _(classify_set_interface_l2_tables_reply) \ @@ -4095,9 +4096,11 @@ _(SW_INTERFACE_IP6ND_RA_CONFIG_REPLY, \ sw_interface_ip6nd_ra_config_reply) \ _(SET_ARP_NEIGHBOR_LIMIT_REPLY, set_arp_neighbor_limit_reply) \ _(L2_PATCH_ADD_DEL_REPLY, l2_patch_add_del_reply) \ -_(SR_TUNNEL_ADD_DEL_REPLY, sr_tunnel_add_del_reply) \ -_(SR_POLICY_ADD_DEL_REPLY, sr_policy_add_del_reply) \ -_(SR_MULTICAST_MAP_ADD_DEL_REPLY, sr_multicast_map_add_del_reply) \ +_(SR_POLICY_ADD_REPLY, sr_policy_add_reply) \ +_(SR_POLICY_MOD_REPLY, sr_policy_mod_reply) \ +_(SR_POLICY_DEL_REPLY, sr_policy_del_reply) \ +_(SR_LOCALSID_ADD_DEL_REPLY, sr_localsid_add_del_reply) \ +_(SR_STEERING_ADD_DEL_REPLY, sr_steering_add_del_reply) \ _(CLASSIFY_ADD_DEL_TABLE_REPLY, classify_add_del_table_reply) \ _(CLASSIFY_ADD_DEL_SESSION_REPLY, classify_add_del_session_reply) \ _(CLASSIFY_SET_INTERFACE_IP_TABLE_REPLY, \ @@ -8227,6 +8230,64 @@ api_l2_patch_add_del (vat_main_t * vam) return ret; } +u8 is_del; +u8 localsid_addr[16]; +u8 end_psp; +u8 behavior; +u32 sw_if_index; +u32 vlan_index; +u32 fib_table; +u8 nh_addr[16]; + +static int +api_sr_localsid_add_del (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_sr_localsid_add_del_t *mp; + + u8 is_del; + ip6_address_t localsid; + u8 end_psp = 0; + u8 behavior = ~0; + u32 sw_if_index; + u32 fib_table = ~(u32) 0; + ip6_address_t next_hop; + + bool nexthop_set = 0; + + int ret; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "del")) + is_del = 1; + else if (unformat (i, "address %U", unformat_ip6_address, &localsid)); + else if (unformat (i, "next-hop %U", unformat_ip6_address, &next_hop)) + nexthop_set = 1; + else if (unformat (i, "behavior %u", &behavior)); + else if (unformat (i, "sw_if_index %u", &sw_if_index)); + else if (unformat (i, "fib-table %u", &fib_table)); + else if (unformat (i, "end.psp %u", &behavior)); + else + break; + } + + M (SR_LOCALSID_ADD_DEL, mp); + + clib_memcpy (mp->localsid_addr, &localsid, sizeof (mp->localsid_addr)); + if (nexthop_set) + clib_memcpy (mp->nh_addr, &next_hop, sizeof (mp->nh_addr)); + mp->behavior = behavior; + mp->sw_if_index = ntohl (sw_if_index); + mp->fib_table = ntohl (fib_table); + mp->end_psp = end_psp; + mp->is_del = is_del; + + S (mp); + W (ret); + return ret; +} + static int api_ioam_enable (vat_main_t * vam) { @@ -8277,277 +8338,6 @@ api_ioam_disable (vat_main_t * vam) return ret; } -static int -api_sr_tunnel_add_del (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_sr_tunnel_add_del_t *mp; - int is_del = 0; - int pl_index; - ip6_address_t src_address; - int src_address_set = 0; - ip6_address_t dst_address; - u32 dst_mask_width; - int dst_address_set = 0; - u16 flags = 0; - u32 rx_table_id = 0; - u32 tx_table_id = 0; - ip6_address_t *segments = 0; - ip6_address_t *this_seg; - ip6_address_t *tags = 0; - ip6_address_t *this_tag; - ip6_address_t next_address, tag; - u8 *name = 0; - u8 *policy_name = 0; - int ret; - - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "del")) - is_del = 1; - else if (unformat (i, "name %s", &name)) - ; - else if (unformat (i, "policy %s", &policy_name)) - ; - else if (unformat (i, "rx_fib_id %d", &rx_table_id)) - ; - else if (unformat (i, "tx_fib_id %d", &tx_table_id)) - ; - else if (unformat (i, "src %U", unformat_ip6_address, &src_address)) - src_address_set = 1; - else if (unformat (i, "dst %U/%d", - unformat_ip6_address, &dst_address, &dst_mask_width)) - dst_address_set = 1; - else if (unformat (i, "next %U", unformat_ip6_address, &next_address)) - { - vec_add2 (segments, this_seg, 1); - clib_memcpy (this_seg->as_u8, next_address.as_u8, - sizeof (*this_seg)); - } - else if (unformat (i, "tag %U", unformat_ip6_address, &tag)) - { - vec_add2 (tags, this_tag, 1); - clib_memcpy (this_tag->as_u8, tag.as_u8, sizeof (*this_tag)); - } - else if (unformat (i, "clean")) - flags |= IP6_SR_HEADER_FLAG_CLEANUP; - else if (unformat (i, "protected")) - flags |= IP6_SR_HEADER_FLAG_PROTECTED; - else if (unformat (i, "InPE %d", &pl_index)) - { - if (pl_index <= 0 || pl_index > 4) - { - pl_index_range_error: - errmsg ("pl index %d out of range", pl_index); - return -99; - } - flags |= - IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE << (3 * (pl_index - 1)); - } - else if (unformat (i, "EgPE %d", &pl_index)) - { - if (pl_index <= 0 || pl_index > 4) - goto pl_index_range_error; - flags |= - IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE << (3 * (pl_index - 1)); - } - else if (unformat (i, "OrgSrc %d", &pl_index)) - { - if (pl_index <= 0 || pl_index > 4) - goto pl_index_range_error; - flags |= - IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR << (3 * (pl_index - 1)); - } - else - break; - } - - if (!src_address_set) - { - errmsg ("src address required"); - return -99; - } - - if (!dst_address_set) - { - errmsg ("dst address required"); - return -99; - } - - if (!segments) - { - errmsg ("at least one sr segment required"); - return -99; - } - - M2 (SR_TUNNEL_ADD_DEL, mp, - vec_len (segments) * sizeof (ip6_address_t) - + vec_len (tags) * sizeof (ip6_address_t)); - - clib_memcpy (mp->src_address, &src_address, sizeof (mp->src_address)); - clib_memcpy (mp->dst_address, &dst_address, sizeof (mp->dst_address)); - mp->dst_mask_width = dst_mask_width; - mp->flags_net_byte_order = clib_host_to_net_u16 (flags); - mp->n_segments = vec_len (segments); - mp->n_tags = vec_len (tags); - mp->is_add = is_del == 0; - clib_memcpy (mp->segs_and_tags, segments, - vec_len (segments) * sizeof (ip6_address_t)); - clib_memcpy (mp->segs_and_tags + - vec_len (segments) * sizeof (ip6_address_t), tags, - vec_len (tags) * sizeof (ip6_address_t)); - - mp->outer_vrf_id = ntohl (rx_table_id); - mp->inner_vrf_id = ntohl (tx_table_id); - memcpy (mp->name, name, vec_len (name)); - memcpy (mp->policy_name, policy_name, vec_len (policy_name)); - - vec_free (segments); - vec_free (tags); - - S (mp); - W (ret); - return ret; -} - -static int -api_sr_policy_add_del (vat_main_t * vam) -{ - unformat_input_t *input = vam->input; - vl_api_sr_policy_add_del_t *mp; - int is_del = 0; - u8 *name = 0; - u8 *tunnel_name = 0; - u8 **tunnel_names = 0; - - int name_set = 0; - int tunnel_set = 0; - int j = 0; - int tunnel_names_length = 1; // Init to 1 to offset the #tunnel_names counter byte - int tun_name_len = 0; // Different naming convention used as confusing these would be "bad" (TM) - int ret; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "del")) - is_del = 1; - else if (unformat (input, "name %s", &name)) - name_set = 1; - else if (unformat (input, "tunnel %s", &tunnel_name)) - { - if (tunnel_name) - { - vec_add1 (tunnel_names, tunnel_name); - /* For serializer: - - length = #bytes to store in serial vector - - +1 = byte to store that length - */ - tunnel_names_length += (vec_len (tunnel_name) + 1); - tunnel_set = 1; - tunnel_name = 0; - } - } - else - break; - } - - if (!name_set) - { - errmsg ("policy name required"); - return -99; - } - - if ((!tunnel_set) && (!is_del)) - { - errmsg ("tunnel name required"); - return -99; - } - - M2 (SR_POLICY_ADD_DEL, mp, tunnel_names_length); - - - - mp->is_add = !is_del; - - memcpy (mp->name, name, vec_len (name)); - // Since mp->tunnel_names is of type u8[0] and not a u8 *, u8 ** needs to be serialized - u8 *serial_orig = 0; - vec_validate (serial_orig, tunnel_names_length); - *serial_orig = vec_len (tunnel_names); // Store the number of tunnels as length in first byte of serialized vector - serial_orig += 1; // Move along one byte to store the length of first tunnel_name - - for (j = 0; j < vec_len (tunnel_names); j++) - { - tun_name_len = vec_len (tunnel_names[j]); - *serial_orig = tun_name_len; // Store length of tunnel name in first byte of Length/Value pair - serial_orig += 1; // Move along one byte to store the actual tunnel name - memcpy (serial_orig, tunnel_names[j], tun_name_len); - serial_orig += tun_name_len; // Advance past the copy - } - memcpy (mp->tunnel_names, serial_orig - tunnel_names_length, tunnel_names_length); // Regress serial_orig to head then copy fwd - - vec_free (tunnel_names); - vec_free (tunnel_name); - - S (mp); - W (ret); - return ret; -} - -static int -api_sr_multicast_map_add_del (vat_main_t * vam) -{ - unformat_input_t *input = vam->input; - vl_api_sr_multicast_map_add_del_t *mp; - int is_del = 0; - ip6_address_t multicast_address; - u8 *policy_name = 0; - int multicast_address_set = 0; - int ret; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "del")) - is_del = 1; - else - if (unformat - (input, "address %U", unformat_ip6_address, &multicast_address)) - multicast_address_set = 1; - else if (unformat (input, "sr-policy %s", &policy_name)) - ; - else - break; - } - - if (!is_del && !policy_name) - { - errmsg ("sr-policy name required"); - return -99; - } - - - if (!multicast_address_set) - { - errmsg ("address required"); - return -99; - } - - M (SR_MULTICAST_MAP_ADD_DEL, mp); - - mp->is_add = !is_del; - memcpy (mp->policy_name, policy_name, vec_len (policy_name)); - clib_memcpy (mp->multicast_address, &multicast_address, - sizeof (mp->multicast_address)); - - - vec_free (policy_name); - - S (mp); - W (ret); - return ret; -} - - #define foreach_tcp_proto_field \ _(src_port) \ _(dst_port) @@ -18166,14 +17956,9 @@ _(set_arp_neighbor_limit, "arp_nbr_limit [ipv6]") \ _(l2_patch_add_del, \ "rx | rx_sw_if_index tx | tx_sw_if_index \n" \ "enable | disable") \ -_(sr_tunnel_add_del, \ - "[name ] src dst / \n" \ - "(next )+ [tag ]* [clean] [reroute] \n" \ - "[policy ]") \ -_(sr_policy_add_del, \ - "name tunnel [tunnel ]* [del]") \ -_(sr_multicast_map_add_del, \ - "address [ip6 multicast address] sr-policy [policy name] [del]") \ +_(sr_localsid_add_del, \ + "(del) address next_hop behavior \n" \ + "fib-table (end.psp) sw_if_index ") \ _(classify_add_del_table, \ "buckets [skip ] [match ] [memory_size ]\n" \ " [del] [del-chain] mask \n" \ diff --git a/src/vnet.am b/src/vnet.am index d89d516e..7125a122 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -710,13 +710,15 @@ nobase_include_HEADERS += \ if WITH_LIBSSL libvnet_la_SOURCES += \ - vnet/sr/sr.c \ + vnet/sr/sr.c \ + vnet/sr/sr_localsid.c \ + vnet/sr/sr_policy_rewrite.c \ + vnet/sr/sr_steering.c \ vnet/sr/sr_api.c endif nobase_include_HEADERS += \ vnet/sr/sr_packet.h \ - vnet/sr/sr_error.def \ vnet/sr/sr.h \ vnet/sr/sr.api.h diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index 4fd14b96..6eabeef1 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -448,20 +448,47 @@ always_inline u8 ip6_ext_hdr(u8 nexthdr) * find out if nexthdr is an extension header or a protocol */ return (nexthdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) || - (nexthdr == IP_PROTOCOL_IP6_NONXT) || (nexthdr == IP_PROTOCOL_IPV6_FRAGMENTATION) || (nexthdr == IP_PROTOCOL_IPSEC_AH) || (nexthdr == IP_PROTOCOL_IPV6_ROUTE) || (nexthdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS); } -#define ip6_ext_header_len(p) (((p)->n_data_u64s+1) << 3) -#define ip6_ext_authhdr_len(p) (((p)->n_data_u64s+2) << 2) +#define ip6_ext_header_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+1) << 3) +#define ip6_ext_authhdr_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+2) << 2) always_inline void * ip6_ext_next_header (ip6_ext_header_t *ext_hdr ) { return (void *)((u8 *) ext_hdr + ip6_ext_header_len(ext_hdr)); } +/* + * Macro to find the IPv6 ext header of type t + * I is the IPv6 header + * P is the previous IPv6 ext header (NULL if none) + * M is the matched IPv6 ext header of type t + */ +#define ip6_ext_header_find_t(i, p, m, t) \ +if ((i)->protocol == t) \ +{ \ + (m) = (void *)((i)+1); \ + (p) = NULL; \ +} \ +else \ +{ \ + (m) = NULL; \ + (p) = (void *)((i)+1); \ + while (ip6_ext_hdr((p)->next_hdr) && \ + ((ip6_ext_header_t *)(p))->next_hdr != (t)) \ + { \ + (p) = ip6_ext_next_header((p)); \ + } \ + if ( ip6_ext_hdr((p)->next_hdr) == (t)) \ + { \ + (m) = (void *)(ip6_ext_next_header((p))); \ + } \ +} + + typedef CLIB_PACKED (struct { u8 next_hdr; /* Length of this header plus option data in 8 byte units. */ diff --git a/src/vnet/sr/dir.dox b/src/vnet/sr/dir.dox old mode 100644 new mode 100755 index a98b202c..3f539a58 --- a/src/vnet/sr/dir.dox +++ b/src/vnet/sr/dir.dox @@ -18,8 +18,8 @@ @brief Segment Routing code An implementation of Segment Routing as per: - draft-previdi-6man-segment-routing-header-05 + draft-ietf-6man-segment-routing-header-05 - See file: rfc_draft_05.txt + @see ietf_draft_05.txt */ \ No newline at end of file diff --git a/src/vnet/sr/examples/sr_multicastmap.script b/src/vnet/sr/examples/sr_multicastmap.script deleted file mode 100644 index 20bf7dc0..00000000 --- a/src/vnet/sr/examples/sr_multicastmap.script +++ /dev/null @@ -1,4 +0,0 @@ -sr_tunnel_add_del name sr2 src ::a:1:1:0:6 dst ff15::2/128 next ::a:1:1:0:f next ::a:1:1:0:1a next ff15::1 tag ::a:1:1:0:7 clean -sr_tunnel_add_del name sr3 src ::b:1:1:0:6 dst ff16::2/128 next ::a:1:1:0:13 next ::a:1:1:0:1a next ff15::1 tag ::a:1:1:0:7 clean -sr_policy_add_del name pol1 tunnel sr2 tunnel sr3 -sr_multicast_map_add_del address ff15::1 sr-policy pol1 diff --git a/src/vnet/sr/ietf_draft_05.txt b/src/vnet/sr/ietf_draft_05.txt new file mode 100755 index 00000000..e9bff04f --- /dev/null +++ b/src/vnet/sr/ietf_draft_05.txt @@ -0,0 +1,1564 @@ +Network Working Group S. Previdi, Ed. +Internet-Draft C. Filsfils +Intended status: Standards Track Cisco Systems, Inc. +Expires: August 5, 2017 B. Field + Comcast + I. Leung + Rogers Communications + J. Linkova + Google + E. Aries + Facebook + T. Kosugi + NTT + E. Vyncke + Cisco Systems, Inc. + D. Lebrun + Universite Catholique de Louvain + February 1, 2017 + + + IPv6 Segment Routing Header (SRH) + draft-ietf-6man-segment-routing-header-05 + +Abstract + + Segment Routing (SR) allows a node to steer a packet through a + controlled set of instructions, called segments, by prepending an SR + header to the packet. A segment can represent any instruction, + topological or service-based. SR allows to enforce a flow through + any path (topological, or application/service based) while + maintaining per-flow state only at the ingress node to the SR domain. + + Segment Routing can be applied to the IPv6 data plane with the + addition of a new type of Routing Extension Header. This draft + describes the Segment Routing Extension Header Type and how it is + used by SR capable nodes. + +Requirements Language + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [RFC2119]. + +Status of This Memo + + This Internet-Draft is submitted in full conformance with the + provisions of BCP 78 and BCP 79. + + + + +Previdi, et al. Expires August 5, 2017 [Page 1] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + Internet-Drafts are working documents of the Internet Engineering + Task Force (IETF). Note that other groups may also distribute + working documents as Internet-Drafts. The list of current Internet- + Drafts is at http://datatracker.ietf.org/drafts/current/. + + Internet-Drafts are draft documents valid for a maximum of six months + and may be updated, replaced, or obsoleted by other documents at any + time. It is inappropriate to use Internet-Drafts as reference + material or to cite them other than as "work in progress." + + This Internet-Draft will expire on August 5, 2017. + +Copyright Notice + + Copyright (c) 2017 IETF Trust and the persons identified as the + document authors. All rights reserved. + + This document is subject to BCP 78 and the IETF Trust's Legal + Provisions Relating to IETF Documents + (http://trustee.ietf.org/license-info) in effect on the date of + publication of this document. Please review these documents + carefully, as they describe your rights and restrictions with respect + to this document. Code Components extracted from this document must + include Simplified BSD License text as described in Section 4.e of + the Trust Legal Provisions and are provided without warranty as + described in the Simplified BSD License. + +Table of Contents + + 1. Segment Routing Documents . . . . . . . . . . . . . . . . . . 3 + 2. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3 + 2.1. Data Planes supporting Segment Routing . . . . . . . . . 4 + 2.2. Segment Routing (SR) Domain . . . . . . . . . . . . . . . 4 + 2.2.1. SR Domain in a Service Provider Network . . . . . . . 5 + 2.2.2. SR Domain in a Overlay Network . . . . . . . . . . . 6 + 3. Segment Routing Extension Header (SRH) . . . . . . . . . . . 7 + 3.1. SRH TLVs . . . . . . . . . . . . . . . . . . . . . . . . 9 + 3.1.1. Ingress Node TLV . . . . . . . . . . . . . . . . . . 10 + 3.1.2. Egress Node TLV . . . . . . . . . . . . . . . . . . . 11 + 3.1.3. Opaque Container TLV . . . . . . . . . . . . . . . . 11 + 3.1.4. Padding TLV . . . . . . . . . . . . . . . . . . . . . 12 + 3.1.5. HMAC TLV . . . . . . . . . . . . . . . . . . . . . . 13 + 3.2. SRH and RFC2460 behavior . . . . . . . . . . . . . . . . 14 + 4. SRH Procedures . . . . . . . . . . . . . . . . . . . . . . . 14 + 4.1. Source SR Node . . . . . . . . . . . . . . . . . . . . . 14 + 4.2. Transit Node . . . . . . . . . . . . . . . . . . . . . . 15 + 4.3. SR Segment Endpoint Node . . . . . . . . . . . . . . . . 16 + 5. Security Considerations . . . . . . . . . . . . . . . . . . . 16 + + + +Previdi, et al. Expires August 5, 2017 [Page 2] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + 5.1. Threat model . . . . . . . . . . . . . . . . . . . . . . 17 + 5.1.1. Source routing threats . . . . . . . . . . . . . . . 17 + 5.1.2. Applicability of RFC 5095 to SRH . . . . . . . . . . 17 + 5.1.3. Service stealing threat . . . . . . . . . . . . . . . 18 + 5.1.4. Topology disclosure . . . . . . . . . . . . . . . . . 18 + 5.1.5. ICMP Generation . . . . . . . . . . . . . . . . . . . 18 + 5.2. Security fields in SRH . . . . . . . . . . . . . . . . . 19 + 5.2.1. Selecting a hash algorithm . . . . . . . . . . . . . 20 + 5.2.2. Performance impact of HMAC . . . . . . . . . . . . . 21 + 5.2.3. Pre-shared key management . . . . . . . . . . . . . . 21 + 5.3. Deployment Models . . . . . . . . . . . . . . . . . . . . 22 + 5.3.1. Nodes within the SR domain . . . . . . . . . . . . . 22 + 5.3.2. Nodes outside of the SR domain . . . . . . . . . . . 22 + 5.3.3. SR path exposure . . . . . . . . . . . . . . . . . . 23 + 5.3.4. Impact of BCP-38 . . . . . . . . . . . . . . . . . . 23 + 6. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 24 + 7. Manageability Considerations . . . . . . . . . . . . . . . . 24 + 8. Contributors . . . . . . . . . . . . . . . . . . . . . . . . 24 + 9. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 24 + 10. References . . . . . . . . . . . . . . . . . . . . . . . . . 25 + 10.1. Normative References . . . . . . . . . . . . . . . . . . 25 + 10.2. Informative References . . . . . . . . . . . . . . . . . 25 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 27 + +1. Segment Routing Documents + + Segment Routing terminology is defined in + [I-D.ietf-spring-segment-routing]. + + Segment Routing use cases are described in [RFC7855] and + [I-D.ietf-spring-ipv6-use-cases]. + + Segment Routing protocol extensions are defined in + [I-D.ietf-isis-segment-routing-extensions], and + [I-D.ietf-ospf-ospfv3-segment-routing-extensions]. + +2. Introduction + + Segment Routing (SR), defined in [I-D.ietf-spring-segment-routing], + allows a node to steer a packet through a controlled set of + instructions, called segments, by prepending an SR header to the + packet. A segment can represent any instruction, topological or + service-based. SR allows to enforce a flow through any path + (topological or service/application based) while maintaining per-flow + state only at the ingress node to the SR domain. Segments can be + derived from different components: IGP, BGP, Services, Contexts, + Locators, etc. The list of segment forming the path is called the + Segment List and is encoded in the packet header. + + + +Previdi, et al. Expires August 5, 2017 [Page 3] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + SR allows the use of strict and loose source based routing paradigms + without requiring any additional signaling protocols in the + infrastructure hence delivering an excellent scalability property. + + The source based routing model described in + [I-D.ietf-spring-segment-routing] is inherited from the ones proposed + by [RFC1940] and [RFC2460]. The source based routing model offers + the support for explicit routing capability. + +2.1. Data Planes supporting Segment Routing + + Segment Routing (SR), can be instantiated over MPLS + ([I-D.ietf-spring-segment-routing-mpls]) and IPv6. This document + defines its instantiation over the IPv6 data-plane based on the use- + cases defined in [I-D.ietf-spring-ipv6-use-cases]. + + This document defines a new type of Routing Header (originally + defined in [RFC2460]) called the Segment Routing Header (SRH) in + order to convey the Segment List in the packet header as defined in + [I-D.ietf-spring-segment-routing]. Mechanisms through which segment + are known and advertised are outside the scope of this document. + + A segment is materialized by an IPv6 address. A segment identifies a + topological instruction or a service instruction. A segment can be + either: + + o global: a global segment represents an instruction supported by + all nodes in the SR domain and it is instantiated through an IPv6 + address globally known in the SR domain. + + o local: a local segment represents an instruction supported only by + the node who originates it and it is instantiated through an IPv6 + address that is known only by the local node. + +2.2. Segment Routing (SR) Domain + + We define the concept of the Segment Routing Domain (SR Domain) as + the set of nodes participating into the source based routing model. + These nodes may be connected to the same physical infrastructure + (e.g.: a Service Provider's network) as well as nodes remotely + connected to each other (e.g.: an enterprise VPN or an overlay). + + A non-exhaustive list of examples of SR Domains is: + + o The network of an operator, service provider, content provider, + enterprise including nodes, links and Autonomous Systems. + + + + + +Previdi, et al. Expires August 5, 2017 [Page 4] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o A set of nodes connected as an overlay over one or more transit + providers. The overlay nodes exchange SR-enabled traffic with + segments belonging solely to the overlay routers (the SR domain). + None of the segments in the SR-enabled packets exchanged by the + overlay belong to the transit networks + + The source based routing model through its instantiation of the + Segment Routing Header (SRH) defined in this document equally applies + to all the above examples. + + It is assumed in this document that the SRH is added to the packet by + its source, consistently with the source routing model defined in + [RFC2460]. For example: + + o At the node originating the packet (host, server). + + o At the ingress node of an SR domain where the ingress node + receives an IPv6 packet and encapsulates it into an outer IPv6 + header followed by a Segment Routing header. + +2.2.1. SR Domain in a Service Provider Network + + The following figure illustrates an SR domain consisting of an + operator's network infrastructure. + + (-------------------------- Operator 1 -----------------------) + ( ) + ( (-----AS 1-----) (-------AS 2-------) (----AS 3-------) ) + ( ( ) ( ) ( ) ) + A1--(--(--11---13--14-)--(-21---22---23--24-)--(-31---32---34--)--)--Z1 + ( ( /|\ /|\ /| ) ( |\ /|\ /|\ /| ) ( |\ /|\ /| \ ) ) + A2--(--(/ | \/ | \/ | ) ( | \/ | \/ | \/ | ) ( | \/ | \/ | \)--)--Z2 + ( ( | /\ | /\ | ) ( | /\ | /\ | /\ | ) ( | /\ | /\ | ) ) + ( ( |/ \|/ \| ) ( |/ \|/ \|/ \| ) ( |/ \|/ \| ) ) + A3--(--(--15---17--18-)--(-25---26---27--28-)--(-35---36---38--)--)--Z3 + ( ( ) ( ) ( ) ) + ( (--------------) (------------------) (---------------) ) + ( ) + (-------------------------------------------------------------) + + Figure 1: Service Provider SR Domain + + Figure 1 describes an operator network including several ASes and + delivering connectivity between endpoints. In this scenario, Segment + Routing is used within the operator networks and across the ASes + boundaries (all being under the control of the same operator). In + this case segment routing can be used in order to address use cases + such as end-to-end traffic engineering, fast re-route, egress peer + + + +Previdi, et al. Expires August 5, 2017 [Page 5] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + engineering, data-center traffic engineering as described in + [RFC7855], [I-D.ietf-spring-ipv6-use-cases] and + [I-D.ietf-spring-resiliency-use-cases]. + + Typically, an IPv6 packet received at ingress (i.e.: from outside the + SR domain), is classified according to network operator policies and + such classification results into an outer header with an SRH applied + to the incoming packet. The SRH contains the list of segment + representing the path the packet must take inside the SR domain. + Thus, the SA of the packet is the ingress node, the DA (due to SRH + procedures described in Section 4) is set as the first segment of the + path and the last segment of the path is the egress node of the SR + domain. + + The path may include intra-AS as well as inter-AS segments. It has + to be noted that all nodes within the SR domain are under control of + the same administration. When the packet reaches the egress point of + the SR domain, the outer header and its SRH are removed so that the + destination of the packet is unaware of the SR domain the packet has + traversed. + + The outer header with the SRH is no different from any other + tunneling encapsulation mechanism and allows a network operator to + implement traffic engineering mechanisms so to efficiently steer + traffic across his infrastructure. + +2.2.2. SR Domain in a Overlay Network + + The following figure illustrates an SR domain consisting of an + overlay network over multiple operator's networks. + + (--Operator 1---) (-----Operator 2-----) (--Operator 3---) + ( ) ( ) ( ) + A1--(--11---13--14--)--(--21---22---23--24--)--(-31---32---34--)--C1 + ( /|\ /|\ /| ) ( |\ /|\ /|\ /| ) ( |\ /|\ /| \ ) + A2--(/ | \/ | \/ | ) ( | \/ | \/ | \/ | ) ( | \/ | \/ | \)--C2 + ( | /\ | /\ | ) ( | /\ | /\ | /\ | ) ( | /\ | /\ | ) + ( |/ \|/ \| ) ( |/ \|/ \|/ \| ) ( |/ \|/ \| ) + A3--(--15---17--18--)--(--25---26---27--28--)--(-35---36---38--)--C3 + ( ) ( | | | ) ( ) + (---------------) (--|----|---------|--) (---------------) + | | | + B1 B2 B3 + + Figure 2: Overlay SR Domain + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 6] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + Figure 2 describes an overlay consisting of nodes connected to three + different network operators and forming a single overlay network + where Segment routing packets are exchanged. + + The overlay consists of nodes A1, A2, A3, B1, B2, B3, C1, C2 and C3. + These nodes are connected to their respective network operator and + form an overlay network. + + Each node may originate packets with an SRH which contains, in the + segment list of the SRH or in the DA, segments identifying other + overlay nodes. This implies that packets with an SRH may traverse + operator's networks but, obviously, these SRHs cannot contain an + address/segment of the transit operators 1, 2 and 3. The SRH + originated by the overlay can only contain address/segment under the + administration of the overlay (e.g. address/segments supported by A1, + A2, A3, B1, B2, B3, C1,C2 or C3). + + In this model, the operator network nodes are transit nodes and, + according to [RFC2460], MUST NOT inspect the routing extension header + since they are not the DA of the packet. + + It is a common practice in operators networks to filter out, at + ingress, any packet whose DA is the address of an internal node and + it is also possible that an operator would filter out any packet + destined to an internal address and having an extension header in it. + + This common practice does not impact the SR-enabled traffic between + the overlay nodes as the intermediate transit networks never see a + destination address belonging to their infrastructure. These SR- + enabled overlay packets will thus never be filtered by the transit + operators. + + In all cases, transit packets (i.e.: packets whose DA is outside the + domain of the operator's network) will be forwarded accordingly + without introducing any security concern in the operator's network. + This is similar to tunneled packets. + +3. Segment Routing Extension Header (SRH) + + A new type of the Routing Header (originally defined in [RFC2460]) is + defined: the Segment Routing Header (SRH) which has a new Routing + Type, (suggested value 4) to be assigned by IANA. + + The Segment Routing Header (SRH) is defined as follows: + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 7] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Next Header | Hdr Ext Len | Routing Type | Segments Left | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | First Segment | Flags | RESERVED | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Segment List[0] (128 bits IPv6 address) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | | + ... + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Segment List[n] (128 bits IPv6 address) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // // + // Optional Type Length Value objects (variable) // + // // + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Next Header: 8-bit selector. Identifies the type of header + immediately following the SRH. + + o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH + header in 8-octet units, not including the first 8 octets. + + o Routing Type: TBD, to be assigned by IANA (suggested value: 4). + + o Segments Left. Defined in [RFC2460], it contains the index, in + the Segment List, of the next segment to inspect. Segments Left + is decremented at each segment. + + o First Segment: contains the index, in the Segment List, of the + first segment of the path which is in fact the last element of the + Segment List. + + o Flags: 8 bits of flags. Following flags are defined: + + + + +Previdi, et al. Expires August 5, 2017 [Page 8] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + 0 1 2 3 4 5 6 7 + +-+-+-+-+-+-+-+-+ + |U|P|O|A|H| U | + +-+-+-+-+-+-+-+-+ + + U: Unused and for future use. SHOULD be unset on transmission + and MUST be ignored on receipt. + + P-flag: Protected flag. Set when the packet has been rerouted + through FRR mechanism by an SR endpoint node. + + O-flag: OAM flag. When set, it indicates that this packet is + an operations and management (OAM) packet. + + A-flag: Alert flag. If present, it means important Type Length + Value (TLV) objects are present. See Section 3.1 for details + on TLVs objects. + + H-flag: HMAC flag. If set, the HMAC TLV is present and is + encoded as the last TLV of the SRH. In other words, the last + 36 octets of the SRH represent the HMAC information. See + Section 3.1.5 for details on the HMAC TLV. + + o RESERVED: SHOULD be unset on transmission and MUST be ignored on + receipt. + + o Segment List[n]: 128 bit IPv6 addresses representing the nth + segment in the Segment List. The Segment List is encoded starting + from the last segment of the path. I.e., the first element of the + segment list (Segment List [0]) contains the last segment of the + path while the last segment of the Segment List (Segment List[n]) + contains the first segment of the path. The index contained in + "Segments Left" identifies the current active segment. + + o Type Length Value (TLV) are described in Section 3.1. + +3.1. SRH TLVs + + This section defines TLVs of the Segment Routing Header. + + Type Length Value (TLV) contain optional information that may be used + by the node identified in the DA of the packet. It has to be noted + that the information carried in the TLVs is not intended to be used + by the routing layer. Typically, TLVs carry information that is + consumed by other components (e.g.: OAM) than the routing function. + + Each TLV has its own length, format and semantic. The code-point + allocated (by IANA) to each TLV defines both the format and the + + + +Previdi, et al. Expires August 5, 2017 [Page 9] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + semantic of the information carried in the TLV. Multiple TLVs may be + encoded in the same SRH. + + The "Length" field of the TLV is primarily used to skip the TLV while + inspecting the SRH in case the node doesn't support or recognize the + TLV codepoint. The "Length" defines the TLV length in octets and not + including the "Type" and "Length" fields. + + The primary scope of TLVs is to give the receiver of the packet + information related to the source routed path (e.g.: where the packet + entered in the SR domain and where it is expected to exit). + + Additional TLVs may be defined in the future. + +3.1.1. Ingress Node TLV + + The Ingress Node TLV is optional and identifies the node this packet + traversed when entered the SR domain. The Ingress Node TLV has + following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | RESERVED | Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Ingress Node (16 octets) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 1). + + o Length: 18. + + o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be + ignored on receipt. + + o Flags: 8 bits. No flags are defined in this document. + + o Ingress Node: 128 bits. Defines the node where the packet is + expected to enter the SR domain. In the encapsulation case + described in Section 2.2.1, this information corresponds to the SA + of the encapsulating header. + + + + + +Previdi, et al. Expires August 5, 2017 [Page 10] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + +3.1.2. Egress Node TLV + + The Egress Node TLV is optional and identifies the node this packet + is expected to traverse when exiting the SR domain. The Egress Node + TLV has following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | RESERVED | Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Egress Node (16 octets) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 2). + + o Length: 18. + + o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be + ignored on receipt. + + o Flags: 8 bits. No flags are defined in this document. + + o Egress Node: 128 bits. Defines the node where the packet is + expected to exit the SR domain. In the encapsulation case + described in Section 2.2.1, this information corresponds to the + last segment of the SRH in the encapsulating header. + +3.1.3. Opaque Container TLV + + The Opaque Container TLV is optional and has the following format: + + + + + + + + + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 11] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | RESERVED | Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Opaque Container (16 octets) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 3). + + o Length: 18. + + o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be + ignored on receipt. + + o Flags: 8 bits. No flags are defined in this document. + + o Opaque Container: 128 bits of opaque data not relevant for the + routing layer. Typically, this information is consumed by a non- + routing component of the node receiving the packet (i.e.: the node + in the DA). + +3.1.4. Padding TLV + + The Padding TLV is optional and with the purpose of aligning the SRH + on a 8 octet boundary. The Padding TLV has the following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | Padding (variable) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // Padding (variable) // + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 4). + + o Length: 1 to 7 + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 12] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o Padding: from 1 to 7 octets of padding. Padding bits have no + semantic. They SHOULD be set to 0 on transmission and MUST be + ignored on receipt. + + The following applies to the Padding TLV: + + o Padding TLV is optional and MAY only appear once in the SRH. If + present, it MUST have a length between 1 and 7 octets. + + o The Padding TLV is used in order to align the SRH total length on + the 8 octet boundary. + + o When present, the Padding TLV MUST appear as the last TLV before + the HMAC TLV (if HMAC TLV is present). + + o When present, the Padding TLV MUST have a length from 1 to 7 in + order to align the SRH total lenght on a 8-octet boundary. + + o When a router inspecting the SRH encounters the Padding TLV, it + MUST assume that no other TLV (other than the HMAC) follow the + Padding TLV. + +3.1.5. HMAC TLV + + HMAC TLV is optional and contains the HMAC information. The HMAC TLV + has the following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | RESERVED | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | HMAC Key ID (4 octets) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | // + | HMAC (32 octets) // + | // + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 5). + + o Length: 38. + + o RESERVED: 2 octets. SHOULD be unset on transmission and MUST be + ignored on receipt. + + + + +Previdi, et al. Expires August 5, 2017 [Page 13] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o HMAC Key ID: 4 octets. + + o HMAC: 32 octets. + + o HMAC and HMAC Key ID usage is described in Section 5 + + The Following applies to the HMAC TLV: + + o When present, the HMAC TLV MUST be encoded as the last TLV of the + SRH. + + o If the HMAC TLV is present, the SRH H-Flag (Figure 4) MUST be set. + + o When the H-flag is set in the SRH, the router inspecting the SRH + MUST find the HMAC TLV in the last 38 octets of the SRH. + +3.2. SRH and RFC2460 behavior + + The SRH being a new type of the Routing Header, it also has the same + properties: + + SHOULD only appear once in the packet. + + Only the router whose address is in the DA field of the packet + header MUST inspect the SRH. + + Therefore, Segment Routing in IPv6 networks implies that the segment + identifier (i.e.: the IPv6 address of the segment) is moved into the + DA of the packet. + + The DA of the packet changes at each segment termination/completion + and therefore the final DA of the packet MUST be encoded as the last + segment of the path. + +4. SRH Procedures + + In this section we describe the different procedures on the SRH. + +4.1. Source SR Node + + A Source SR Node can be any node originating an IPv6 packet with its + IPv6 and Segment Routing Headers. This include either: + + A host originating an IPv6 packet. + + An SR domain ingress router encapsulating a received IPv6 packet + into an outer IPv6 header followed by an SRH. + + + + +Previdi, et al. Expires August 5, 2017 [Page 14] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + The mechanism through which a Segment List is derived is outside of + the scope of this document. As an example, the Segment List may be + obtained through: + + Local path computation. + + Local configuration. + + Interaction with a centralized controller delivering the path. + + Any other mechanism. + + The following are the steps of the creation of the SRH: + + Next Header and Hdr Ext Len fields are set according to [RFC2460]. + + Routing Type field is set as TBD (to be allocated by IANA, + suggested value 4). + + The Segment List is built with the FIRST segment of the path + encoded in the LAST element of the Segment List. Subsequent + segments are encoded on top of the first segment. Finally, the + LAST segment of the path is encoded in the FIRST element of the + Segment List. In other words, the Segment List is encoded in the + reverse order of the path. + + The final DA of the packet is encoded as the last segment of the + path (encoded in the first element of the Segment List). + + The DA of the packet is set with the value of the first segment + (found in the last element of the segment list). + + The Segments Left field is set to n-1 where n is the number of + elements in the Segment List. + + The First Segment field is set to n-1 where n is the number of + elements in the Segment List. + + The packet is sent out towards the first segment (i.e.: + represented in the packet DA). + + HMAC TLV may be set according to Section 5. + +4.2. Transit Node + + According to [RFC2460], the only node who is allowed to inspect the + Routing Extension Header (and therefore the SRH), is the node + corresponding to the DA of the packet. Any other transit node MUST + + + +Previdi, et al. Expires August 5, 2017 [Page 15] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + NOT inspect the underneath routing header and MUST forward the packet + towards the DA and according to the IPv6 routing table. + + In the example case described in Section 2.2.2, when SR capable nodes + are connected through an overlay spanning multiple third-party + infrastructure, it is safe to send SRH packets (i.e.: packet having a + Segment Routing Header) between each other overlay/SR-capable nodes + as long as the segment list does not include any of the transit + provider nodes. In addition, as a generic security measure, any + service provider will block any packet destined to one of its + internal routers, especially if these packets have an extended header + in it. + +4.3. SR Segment Endpoint Node + + The SR segment endpoint node is the node whose address is in the DA. + The segment endpoint node inspects the SRH and does: + + 1. IF DA = myself (segment endpoint) + 2. IF Segments Left > 0 THEN + decrement Segments Left + update DA with Segment List[Segments Left] + 3. ELSE continue IPv6 processing of the packet + End of processing. + 4. Forward the packet out + +5. Security Considerations + + This section analyzes the security threat model, the security issues + and proposed solutions related to the new Segment Routing Header. + + The Segment Routing Header (SRH) is simply another type of the + routing header as described in RFC 2460 [RFC2460] and is: + + o Added by an SR edge router when entering the segment routing + domain or by the originating host itself. The source host can + even be outside the SR domain; + + o inspected and acted upon when reaching the destination address of + the IP header per RFC 2460 [RFC2460]. + + Per RFC2460 [RFC2460], routers on the path that simply forward an + IPv6 packet (i.e. the IPv6 destination address is none of theirs) + will never inspect and process the content of the SRH. Routers whose + one interface IPv6 address equals the destination address field of + the IPv6 packet MUST parse the SRH and, if supported and if the local + configuration allows it, MUST act accordingly to the SRH content. + + + + +Previdi, et al. Expires August 5, 2017 [Page 16] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + According to RFC2460 [RFC2460], the default behavior of a non SR- + capable router upon receipt of an IPv6 packet with SRH destined to an + address of its, is to: + + o ignore the SRH completely if the Segment Left field is 0 and + proceed to process the next header in the IPv6 packet; + + o discard the IPv6 packet if Segment Left field is greater than 0, + it MAY send a Parameter Problem ICMP message back to the Source + Address. + +5.1. Threat model + +5.1.1. Source routing threats + + Using an SRH is similar to source routing, therefore it has some + well-known security issues as described in RFC4942 [RFC4942] section + 2.1.1 and RFC5095 [RFC5095]: + + o amplification attacks: where a packet could be forged in such a + way to cause looping among a set of SR-enabled routers causing + unnecessary traffic, hence a Denial of Service (DoS) against + bandwidth; + + o reflection attack: where a hacker could force an intermediate node + to appear as the immediate attacker, hence hiding the real + attacker from naive forensic; + + o bypass attack: where an intermediate node could be used as a + stepping stone (for example in a De-Militarized Zone) to attack + another host (for example in the datacenter or any back-end + server). + +5.1.2. Applicability of RFC 5095 to SRH + + First of all, the reader must remember this specific part of section + 1 of RFC5095 [RFC5095], "A side effect is that this also eliminates + benign RH0 use-cases; however, such applications may be facilitated + by future Routing Header specifications.". In short, it is not + forbidden to create new secure type of Routing Header; for example, + RFC 6554 (RPL) [RFC6554] also creates a new Routing Header type for a + specific application confined in a single network. + + In the segment routing architecture described in + [I-D.ietf-spring-segment-routing] there are basically two kinds of + nodes (routers and hosts): + + + + + +Previdi, et al. Expires August 5, 2017 [Page 17] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o nodes within the SR domain, which is within one single + administrative domain, i.e., where all nodes are trusted anyway + else the damage caused by those nodes could be worse than + amplification attacks: traffic interception, man-in-the-middle + attacks, more server DoS by dropping packets, and so on. + + o nodes outside of the SR domain, which is outside of the + administrative segment routing domain hence they cannot be trusted + because there is no physical security for those nodes, i.e., they + can be replaced by hostile nodes or can be coerced in wrong + behaviors. + + The main use case for SR consists of the single administrative domain + where only trusted nodes with SR enabled and configured participate + in SR: this is the same model as in RFC6554 [RFC6554]. All non- + trusted nodes do not participate as either SR processing is not + enabled by default or because they only process SRH from nodes within + their domain. + + Moreover, all SR nodes ignore SRH created by outsiders based on + topology information (received on a peering or internal interface) or + on presence and validity of the HMAC field. Therefore, if + intermediate nodes ONLY act on valid and authorized SRH (such as + within a single administrative domain), then there is no security + threat similar to RH-0. Hence, the RFC 5095 [RFC5095] attacks are + not applicable. + +5.1.3. Service stealing threat + + Segment routing is used for added value services, there is also a + need to prevent non-participating nodes to use those services; this + is called 'service stealing prevention'. + +5.1.4. Topology disclosure + + The SRH may also contains IPv6 addresses of some intermediate SR- + nodes in the path towards the destination, this obviously reveals + those addresses to the potentially hostile attackers if those + attackers are able to intercept packets containing SRH. On the other + hand, if the attacker can do a traceroute whose probes will be + forwarded along the SR path, then there is little learned by + intercepting the SRH itself. + +5.1.5. ICMP Generation + + Per section 4.4 of RFC2460 [RFC2460], when destination nodes (i.e. + where the destination address is one of theirs) receive a Routing + Header with unsupported Routing Type, the required behavior is: + + + +Previdi, et al. Expires August 5, 2017 [Page 18] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o If Segments Left is zero, the node must ignore the Routing header + and proceed to process the next header in the packet. + + o If Segments Left is non-zero, the node must discard the packet and + send an ICMP Parameter Problem, Code 0, message to the packet's + Source Address, pointing to the unrecognized Routing Type. + + This required behavior could be used by an attacker to force the + generation of ICMP message by any node. The attacker could send + packets with SRH (with Segment Left set to 0) destined to a node not + supporting SRH. Per RFC2460 [RFC2460], the destination node could + generate an ICMP message, causing a local CPU utilization and if the + source of the offending packet with SRH was spoofed could lead to a + reflection attack without any amplification. + + It must be noted that this is a required behavior for any unsupported + Routing Type and not limited to SRH packets. So, it is not specific + to SRH and the usual rate limiting for ICMP generation is required + anyway for any IPv6 implementation and has been implemented and + deployed for many years. + +5.2. Security fields in SRH + + This section summarizes the use of specific fields in the SRH. They + are based on a key-hashed message authentication code (HMAC). + + The security-related fields in the SRH are instantiated by the HMAC + TLV, containing: + + o HMAC Key-id, 32 bits wide; + + o HMAC, 256 bits wide (optional, exists only if HMAC Key-id is not + 0). + + The HMAC field is the output of the HMAC computation (per RFC 2104 + [RFC2104]) using a pre-shared key identified by HMAC Key-id and of + the text which consists of the concatenation of: + + o the source IPv6 address; + + o First Segment field; + + o an octet of bit flags; + + o HMAC Key-id; + + o all addresses in the Segment List. + + + + +Previdi, et al. Expires August 5, 2017 [Page 19] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + The purpose of the HMAC TLV is to verify the validity, the integrity + and the authorization of the SRH itself. If an outsider of the SR + domain does not have access to a current pre-shared secret, then it + cannot compute the right HMAC field and the first SR router on the + path processing the SRH and configured to check the validity of the + HMAC will simply reject the packet. + + The HMAC TLV is located at the end of the SRH simply because only the + router on the ingress of the SR domain needs to process it, then all + other SR nodes can ignore it (based on local policy) because they + trust the upstream router. This is to speed up forwarding operations + because SR routers which do not validate the SRH do not need to parse + the SRH until the end. + + The HMAC Key-id field allows for the simultaneous existence of + several hash algorithms (SHA-256, SHA3-256 ... or future ones) as + well as pre-shared keys. The HMAC Key-id field is opaque, i.e., it + has neither syntax nor semantic except as an index to the right + combination of pre-shared key and hash algorithm and except that a + value of 0 means that there is no HMAC field. Having an HMAC Key-id + field allows for pre-shared key roll-over when two pre-shared keys + are supported for a while when all SR nodes converged to a fresher + pre-shared key. It could also allow for interoperation among + different SR domains if allowed by local policy and assuming a + collision-free HMAC Key Id allocation. + + When a specific SRH is linked to a time-related service (such as + turbo-QoS for a 1-hour period) where the DA, Segment ID (SID) are + identical, then it is important to refresh the shared-secret + frequently as the HMAC validity period expires only when the HMAC + Key-id and its associated shared-secret expires. + +5.2.1. Selecting a hash algorithm + + The HMAC field in the HMAC TLV is 256 bit wide. Therefore, the HMAC + MUST be based on a hash function whose output is at least 256 bits. + If the output of the hash function is 256, then this output is simply + inserted in the HMAC field. If the output of the hash function is + larger than 256 bits, then the output value is truncated to 256 by + taking the least-significant 256 bits and inserting them in the HMAC + field. + + SRH implementations can support multiple hash functions but MUST + implement SHA-2 [FIPS180-4] in its SHA-256 variant. + + NOTE: SHA-1 is currently used by some early implementations used for + quick interoperations testing, the 160-bit hash value must then be + + + + +Previdi, et al. Expires August 5, 2017 [Page 20] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + right-hand padded with 96 bits set to 0. The authors understand that + this is not secure but is ok for limited tests. + +5.2.2. Performance impact of HMAC + + While adding an HMAC to each and every SR packet increases the + security, it has a performance impact. Nevertheless, it must be + noted that: + + o the HMAC field is used only when SRH is added by a device (such as + a home set-up box) which is outside of the segment routing domain. + If the SRH is added by a router in the trusted segment routing + domain, then, there is no need for an HMAC field, hence no + performance impact. + + o when present, the HMAC field MUST only be checked and validated by + the first router of the segment routing domain, this router is + named 'validating SR router'. Downstream routers may not inspect + the HMAC field. + + o this validating router can also have a cache of to improve the performance. It is not the + same use case as in IPsec where HMAC value was unique per packet, + in SRH, the HMAC value is unique per flow. + + o Last point, hash functions such as SHA-2 have been optimized for + security and performance and there are multiple implementations + with good performance. + + With the above points in mind, the performance impact of using HMAC + is minimized. + +5.2.3. Pre-shared key management + + The field HMAC Key-id allows for: + + o key roll-over: when there is a need to change the key (the hash + pre-shared secret), then multiple pre-shared keys can be used + simultaneously. The validating routing can have a table of for the currently active and future + keys. + + o different algorithms: by extending the previous table to , the validating router + can also support simultaneously several hash algorithms (see + section Section 5.2.1) + + The pre-shared secret distribution can be done: + + + +Previdi, et al. Expires August 5, 2017 [Page 21] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o in the configuration of the validating routers, either by static + configuration or any SDN oriented approach; + + o dynamically using a trusted key distribution such as [RFC6407] + + The intent of this document is NOT to define yet-another-key- + distribution-protocol. + +5.3. Deployment Models + +5.3.1. Nodes within the SR domain + + An SR domain is defined as a set of interconnected routers where all + routers at the perimeter are configured to add and act on SRH. Some + routers inside the SR domain can also act on SRH or simply forward + IPv6 packets. + + The routers inside an SR domain can be trusted to generate SRH and to + process SRH received on interfaces that are part of the SR domain. + These nodes MUST drop all SRH packets received on an interface that + is not part of the SR domain and containing an SRH whose HMAC field + cannot be validated by local policies. This includes obviously + packet with an SRH generated by a non-cooperative SR domain. + + If the validation fails, then these packets MUST be dropped, ICMP + error messages (parameter problem) SHOULD be generated (but rate + limited) and SHOULD be logged. + +5.3.2. Nodes outside of the SR domain + + Nodes outside of the SR domain cannot be trusted for physical + security; hence, they need to request by some trusted means (outside + of the scope of this document) a complete SRH for each new connection + (i.e. new destination address). The received SRH MUST include an + HMAC TLV which is computed correctly (see Section 5.2). + + When an outside node sends a packet with an SRH and towards an SR + domain ingress node, the packet MUST contain the HMAC TLV (with a + Key-id and HMAC fields) and the the destination address MUST be an + address of an SR domain ingress node . + + The ingress SR router, i.e., the router with an interface address + equals to the destination address, MUST verify the HMAC TLV. + + If the validation is successful, then the packet is simply forwarded + as usual for an SR packet. As long as the packet travels within the + SR domain, no further HMAC check needs to be done. Subsequent + + + + +Previdi, et al. Expires August 5, 2017 [Page 22] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + routers in the SR domain MAY verify the HMAC TLV when they process + the SRH (i.e. when they are the destination). + + If the validation fails, then this packet MUST be dropped, an ICMP + error message (parameter problem) SHOULD be generated (but rate + limited) and SHOULD be logged. + +5.3.3. SR path exposure + + As the intermediate SR nodes addresses appears in the SRH, if this + SRH is visible to an outsider then he/she could reuse this knowledge + to launch an attack on the intermediate SR nodes or get some insider + knowledge on the topology. This is especially applicable when the + path between the source node and the first SR domain ingress router + is on the public Internet. + + The first remark is to state that 'security by obscurity' is never + enough; in other words, the security policy of the SR domain MUST + assume that the internal topology and addressing is known by the + attacker. A simple traceroute will also give the same information + (with even more information as all intermediate nodes between SID + will also be exposed). IPsec Encapsulating Security Payload + [RFC4303] cannot be use to protect the SRH as per RFC4303 the ESP + header must appear after any routing header (including SRH). + + To prevent a user to leverage the gained knowledge by intercepting + SRH, it it recommended to apply an infrastructure Access Control List + (iACL) at the edge of the SR domain. This iACL will drop all packets + from outside the SR-domain whose destination is any address of any + router inside the domain. This security policy should be tuned for + local operations. + +5.3.4. Impact of BCP-38 + + BCP-38 [RFC2827], also known as "Network Ingress Filtering", checks + whether the source address of packets received on an interface is + valid for this interface. The use of loose source routing such as + SRH forces packets to follow a path which differs from the expected + routing. Therefore, if BCP-38 was implemented in all routers inside + the SR domain, then SR packets could be received by an interface + which is not expected one and the packets could be dropped. + + As an SR domain is usually a subset of one administrative domain, and + as BCP-38 is only deployed at the ingress routers of this + administrative domain and as packets arriving at those ingress + routers have been normally forwarded using the normal routing + information, then there is no reason why this ingress router should + + + + +Previdi, et al. Expires August 5, 2017 [Page 23] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + drop the SRH packet based on BCP-38. Routers inside the domain + commonly do not apply BCP-38; so, this is not a problem. + +6. IANA Considerations + + This document makes the following registrations in the Internet + Protocol Version 6 (IPv6) Parameters "Routing Type" registry + maintained by IANA: + + Suggested Description Reference + Value + ---------------------------------------------------------- + 4 Segment Routing Header (SRH) This document + + In addition, this document request IANA to create and maintain a new + Registry: "Segment Routing Header Type-Value Objects". The following + code-points are requested from the registry: + + Registry: Segment Routing Header Type-Value Objects + + Suggested Description Reference + Value + ----------------------------------------------------- + 1 Ingress Node TLV This document + 2 Egress Node TLV This document + 3 Opaque Container TLV This document + 4 Padding TLV This document + 5 HMAC TLV This document + +7. Manageability Considerations + + TBD + +8. Contributors + + Dave Barach, John Leddy, John Brzozowski, Pierre Francois, Nagendra + Kumar, Mark Townsley, Christian Martin, Roberta Maglione, James + Connolly, Aloys Augustin contributed to the content of this document. + +9. Acknowledgements + + The authors would like to thank Ole Troan, Bob Hinden, Fred Baker, + Brian Carpenter, Alexandru Petrescu and Punit Kumar Jaiswal for their + comments to this document. + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 24] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + +10. References + +10.1. Normative References + + [FIPS180-4] + National Institute of Standards and Technology, "FIPS + 180-4 Secure Hash Standard (SHS)", March 2012, + . + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, + DOI 10.17487/RFC2119, March 1997, + . + + [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 + (IPv6) Specification", RFC 2460, DOI 10.17487/RFC2460, + December 1998, . + + [RFC4303] Kent, S., "IP Encapsulating Security Payload (ESP)", + RFC 4303, DOI 10.17487/RFC4303, December 2005, + . + + [RFC5095] Abley, J., Savola, P., and G. Neville-Neil, "Deprecation + of Type 0 Routing Headers in IPv6", RFC 5095, + DOI 10.17487/RFC5095, December 2007, + . + + [RFC6407] Weis, B., Rowles, S., and T. Hardjono, "The Group Domain + of Interpretation", RFC 6407, DOI 10.17487/RFC6407, + October 2011, . + +10.2. Informative References + + [I-D.ietf-isis-segment-routing-extensions] + Previdi, S., Filsfils, C., Bashandy, A., Gredler, H., + Litkowski, S., Decraene, B., and j. jefftant@gmail.com, + "IS-IS Extensions for Segment Routing", draft-ietf-isis- + segment-routing-extensions-09 (work in progress), October + 2016. + + [I-D.ietf-ospf-ospfv3-segment-routing-extensions] + Psenak, P., Previdi, S., Filsfils, C., Gredler, H., + Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3 + Extensions for Segment Routing", draft-ietf-ospf-ospfv3- + segment-routing-extensions-07 (work in progress), October + 2016. + + + + +Previdi, et al. Expires August 5, 2017 [Page 25] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + [I-D.ietf-spring-ipv6-use-cases] + Brzozowski, J., Leddy, J., Townsley, W., Filsfils, C., and + R. Maglione, "IPv6 SPRING Use Cases", draft-ietf-spring- + ipv6-use-cases-08 (work in progress), January 2017. + + [I-D.ietf-spring-resiliency-use-cases] + Filsfils, C., Previdi, S., Decraene, B., and R. Shakir, + "Resiliency use cases in SPRING networks", draft-ietf- + spring-resiliency-use-cases-08 (work in progress), October + 2016. + + [I-D.ietf-spring-segment-routing] + Filsfils, C., Previdi, S., Decraene, B., Litkowski, S., + and R. Shakir, "Segment Routing Architecture", draft-ietf- + spring-segment-routing-10 (work in progress), November + 2016. + + [I-D.ietf-spring-segment-routing-mpls] + Filsfils, C., Previdi, S., Bashandy, A., Decraene, B., + Litkowski, S., Horneffer, M., Shakir, R., + jefftant@gmail.com, j., and E. Crabbe, "Segment Routing + with MPLS data plane", draft-ietf-spring-segment-routing- + mpls-06 (work in progress), January 2017. + + [RFC1940] Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D. + Zappala, "Source Demand Routing: Packet Format and + Forwarding Specification (Version 1)", RFC 1940, + DOI 10.17487/RFC1940, May 1996, + . + + [RFC2104] Krawczyk, H., Bellare, M., and R. Canetti, "HMAC: Keyed- + Hashing for Message Authentication", RFC 2104, + DOI 10.17487/RFC2104, February 1997, + . + + [RFC2827] Ferguson, P. and D. Senie, "Network Ingress Filtering: + Defeating Denial of Service Attacks which employ IP Source + Address Spoofing", BCP 38, RFC 2827, DOI 10.17487/RFC2827, + May 2000, . + + [RFC4942] Davies, E., Krishnan, S., and P. Savola, "IPv6 Transition/ + Co-existence Security Considerations", RFC 4942, + DOI 10.17487/RFC4942, September 2007, + . + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 26] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + [RFC6554] Hui, J., Vasseur, JP., Culler, D., and V. Manral, "An IPv6 + Routing Header for Source Routes with the Routing Protocol + for Low-Power and Lossy Networks (RPL)", RFC 6554, + DOI 10.17487/RFC6554, March 2012, + . + + [RFC7855] Previdi, S., Ed., Filsfils, C., Ed., Decraene, B., + Litkowski, S., Horneffer, M., and R. Shakir, "Source + Packet Routing in Networking (SPRING) Problem Statement + and Requirements", RFC 7855, DOI 10.17487/RFC7855, May + 2016, . + +Authors' Addresses + + Stefano Previdi (editor) + Cisco Systems, Inc. + Via Del Serafico, 200 + Rome 00142 + Italy + + Email: sprevidi@cisco.com + + + Clarence Filsfils + Cisco Systems, Inc. + Brussels + BE + + Email: cfilsfil@cisco.com + + + Brian Field + Comcast + 4100 East Dry Creek Road + Centennial, CO 80122 + US + + Email: Brian_Field@cable.comcast.com + + + Ida Leung + Rogers Communications + 8200 Dixie Road + Brampton, ON L6T 0C1 + CA + + Email: Ida.Leung@rci.rogers.com + + + + +Previdi, et al. Expires August 5, 2017 [Page 27] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + Jen Linkova + Google + 1600 Amphitheatre Parkway + Mountain View, CA 94043 + US + + Email: furry@google.com + + + Ebben Aries + Facebook + US + + Email: exa@fb.com + + + Tomoya Kosugi + NTT + 3-9-11, Midori-Cho Musashino-Shi, + Tokyo 180-8585 + JP + + Email: kosugi.tomoya@lab.ntt.co.jp + + + Eric Vyncke + Cisco Systems, Inc. + De Kleetlaann 6A + Diegem 1831 + Belgium + + Email: evyncke@cisco.com + + + David Lebrun + Universite Catholique de Louvain + Place Ste Barbe, 2 + Louvain-la-Neuve, 1348 + Belgium + + Email: david.lebrun@uclouvain.be + + + + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 28] \ No newline at end of file diff --git a/src/vnet/sr/rfc_draft_05.txt b/src/vnet/sr/rfc_draft_05.txt deleted file mode 100644 index bc41c181..00000000 --- a/src/vnet/sr/rfc_draft_05.txt +++ /dev/null @@ -1,1265 +0,0 @@ -Network Working Group S. Previdi, Ed. -Internet-Draft C. Filsfils -Intended status: Standards Track Cisco Systems, Inc. -Expires: June 12, 2015 B. Field - Comcast - I. Leung - Rogers Communications - December 9, 2014 - - - IPv6 Segment Routing Header (SRH) - draft-previdi-6man-segment-routing-header-05 - -Abstract - - Segment Routing (SR) allows a node to steer a packet through a - controlled set of instructions, called segments, by prepending a SR - header to the packet. A segment can represent any instruction, - topological or service-based. SR allows to enforce a flow through - any path (topological, or application/service based) while - maintaining per-flow state only at the ingress node to the SR domain. - - Segment Routing can be applied to the IPv6 data plane with the - addition of a new type of Routing Extension Header. This draft - describes the Segment Routing Extension Header Type and how it is - used by SR capable nodes. - -Requirements Language - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in RFC 2119 [RFC2119]. - -Status of This Memo - - This Internet-Draft is submitted in full conformance with the - provisions of BCP 78 and BCP 79. - - Internet-Drafts are working documents of the Internet Engineering - Task Force (IETF). Note that other groups may also distribute - working documents as Internet-Drafts. The list of current Internet- - Drafts is at http://datatracker.ietf.org/drafts/current/. - - Internet-Drafts are draft documents valid for a maximum of six months - and may be updated, replaced, or obsoleted by other documents at any - time. It is inappropriate to use Internet-Drafts as reference - material or to cite them other than as "work in progress." - - - - -Previdi, et al. Expires June 12, 2015 [Page 1] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - This Internet-Draft will expire on June 12, 2015. - -Copyright Notice - - Copyright (c) 2014 IETF Trust and the persons identified as the - document authors. All rights reserved. - - This document is subject to BCP 78 and the IETF Trust's Legal - Provisions Relating to IETF Documents - (http://trustee.ietf.org/license-info) in effect on the date of - publication of this document. Please review these documents - carefully, as they describe your rights and restrictions with respect - to this document. Code Components extracted from this document must - include Simplified BSD License text as described in Section 4.e of - the Trust Legal Provisions and are provided without warranty as - described in the Simplified BSD License. - -Table of Contents - - 1. Structure of this document . . . . . . . . . . . . . . . . . 3 - 2. Segment Routing Documents . . . . . . . . . . . . . . . . . . 3 - 3. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3 - 3.1. Data Planes supporting Segment Routing . . . . . . . . . 4 - 3.2. Illustration . . . . . . . . . . . . . . . . . . . . . . 4 - 4. Abstract Routing Model . . . . . . . . . . . . . . . . . . . 7 - 4.1. Segment Routing Global Block (SRGB) . . . . . . . . . . . 8 - 4.2. Traffic Engineering with SR . . . . . . . . . . . . . . . 9 - 4.3. Segment Routing Database . . . . . . . . . . . . . . . . 10 - 5. IPv6 Instantiation of Segment Routing . . . . . . . . . . . . 10 - 5.1. Segment Identifiers (SIDs) and SRGB . . . . . . . . . . . 10 - 5.1.1. Node-SID . . . . . . . . . . . . . . . . . . . . . . 11 - 5.1.2. Adjacency-SID . . . . . . . . . . . . . . . . . . . . 11 - 5.2. Segment Routing Extension Header (SRH) . . . . . . . . . 11 - 5.2.1. SRH and RFC2460 behavior . . . . . . . . . . . . . . 15 - 6. SRH Procedures . . . . . . . . . . . . . . . . . . . . . . . 15 - 6.1. Segment Routing Operations . . . . . . . . . . . . . . . 15 - 6.2. Segment Routing Node Functions . . . . . . . . . . . . . 16 - 6.2.1. Ingress SR Node . . . . . . . . . . . . . . . . . . . 16 - 6.2.2. Transit Non-SR Capable Node . . . . . . . . . . . . . 18 - 6.2.3. SR Intra Segment Transit Node . . . . . . . . . . . . 18 - 6.2.4. SR Segment Endpoint Node . . . . . . . . . . . . . . 18 - 6.3. FRR Flag Settings . . . . . . . . . . . . . . . . . . . . 18 - 7. SR and Tunneling . . . . . . . . . . . . . . . . . . . . . . 18 - 8. Example Use Case . . . . . . . . . . . . . . . . . . . . . . 19 - 9. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 21 - 10. Manageability Considerations . . . . . . . . . . . . . . . . 21 - 11. Security Considerations . . . . . . . . . . . . . . . . . . . 21 - 12. Contributors . . . . . . . . . . . . . . . . . . . . . . . . 21 - - - -Previdi, et al. Expires June 12, 2015 [Page 2] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - 13. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 21 - 14. References . . . . . . . . . . . . . . . . . . . . . . . . . 21 - 14.1. Normative References . . . . . . . . . . . . . . . . . . 21 - 14.2. Informative References . . . . . . . . . . . . . . . . . 21 - Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 22 - -1. Structure of this document - - Section 3 gives an introduction on SR for IPv6 networks. - - Section 4 describes the Segment Routing abstract model. - - Section 5 defines the Segment Routing Header (SRH) allowing - instantiation of SR over IPv6 dataplane. - - Section 6 details the procedures of the Segment Routing Header. - -2. Segment Routing Documents - - Segment Routing terminology is defined in - [I-D.filsfils-spring-segment-routing]. - - Segment Routing use cases are described in - [I-D.filsfils-spring-segment-routing-use-cases]. - - Segment Routing IPv6 use cases are described in - [I-D.ietf-spring-ipv6-use-cases]. - - Segment Routing protocol extensions are defined in - [I-D.ietf-isis-segment-routing-extensions], and - [I-D.psenak-ospf-segment-routing-ospfv3-extension]. - - The security mechanisms of the Segment Routing Header (SRH) are - described in [I-D.vyncke-6man-segment-routing-security]. - -3. Introduction - - Segment Routing (SR), defined in - [I-D.filsfils-spring-segment-routing], allows a node to steer a - packet through a controlled set of instructions, called segments, by - prepending a SR header to the packet. A segment can represent any - instruction, topological or service-based. SR allows to enforce a - flow through any path (topological or service/application based) - while maintaining per-flow state only at the ingress node to the SR - domain. Segments can be derived from different components: IGP, BGP, - Services, Contexts, Locators, etc. The list of segment forming the - path is called the Segment List and is encoded in the packet header. - - - - -Previdi, et al. Expires June 12, 2015 [Page 3] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - SR allows the use of strict and loose source based routing paradigms - without requiring any additional signaling protocols in the - infrastructure hence delivering an excellent scalability property. - - The source based routing model described in - [I-D.filsfils-spring-segment-routing] is inherited from the ones - proposed by [RFC1940] and [RFC2460]. The source based routing model - offers the support for explicit routing capability. - -3.1. Data Planes supporting Segment Routing - - Segment Routing (SR), can be instantiated over MPLS - ([I-D.filsfils-spring-segment-routing-mpls]) and IPv6. This document - defines its instantiation over the IPv6 data-plane based on the use- - cases defined in [I-D.ietf-spring-ipv6-use-cases]. - - Segment Routing for IPv6 (SR-IPv6) is required in networks where MPLS - data-plane is not used or, when combined with SR-MPLS, in networks - where MPLS is used in the core and IPv6 is used at the edge (home - networks, datacenters). - - This document defines a new type of Routing Header (originally - defined in [RFC2460]) called the Segment Routing Header (SRH) in - order to convey the Segment List in the packet header as defined in - [I-D.filsfils-spring-segment-routing]. Mechanisms through which - segment are known and advertised are outside the scope of this - document. - -3.2. Illustration - - In the context of Figure 1 where all the links have the same IGP - cost, let us assume that a packet P enters the SR domain at an - ingress edge router I and that the operator requests the following - requirements for packet P: - - The local service S offered by node B must be applied to packet P. - - The links AB and CE cannot be used to transport the packet P. - - Any node N along the journey of the packet should be able to - determine where the packet P entered the SR domain and where it - will exit. The intermediate node should be able to determine the - paths from the ingress edge router to itself, and from itself to - the egress edge router. - - Per-flow State for packet P should only be created at the ingress - edge router. - - - - -Previdi, et al. Expires June 12, 2015 [Page 4] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - The operator can forbid, for security reasons, anyone outside the - operator domain to exploit its intra-domain SR capabilities. - - I---A---B---C---E - \ | / \ / - \ | / F - \|/ - D - - Figure 1: An illustration of SR properties - - All these properties may be realized by instructing the ingress SR - edge router I to push the following abstract SR header on the packet - P. - - +---------------------------------------------------------------+ - | | | - | Abstract SR Header | | - | | | - | {SD, SB, SS, SF, SE}, Ptr, SI, SE | Transported | - | ^ | | Packet | - | | | | P | - | +---------------------+ | | - | | | - +---------------------------------------------------------------+ - - Figure 2: Packet P at node I - - The abstract SR header contains a source route encoded as a list of - segments {SD, SB, SS, SF, SE}, a pointer (Ptr) and the identification - of the ingress and egress SR edge routers (segments SI and SE). - - A segment identifies a topological instruction or a service - instruction. A segment can either be global or local. The - instruction associated with a global segment is recognized and - executed by any SR-capable node in the domain. The instruction - associated with a local segment is only supported by the specific - node that originates it. - - Let us assume some IGP (i.e.: ISIS and OSPF) extensions to define a - "Node Segment" as a global instruction within the IGP domain to - forward a packet along the shortest path to the specified node. Let - us further assume that within the SR domain illustrated in Figure 1, - segments SI, SD, SB, SE and SF respectively identify IGP node - segments to I, D, B, E and F. - - Let us assume that node B identifies its local service S with local - segment SS. - - - -Previdi, et al. Expires June 12, 2015 [Page 5] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - With all of this in mind, let us describe the journey of the packet - P. - - The packet P reaches the ingress SR edge router. I pushes the SR - header illustrated in Figure 2 and sets the pointer to the first - segment of the list (SD). - - SD is an instruction recognized by all the nodes in the SR domain - which causes the packet to be forwarded along the shortest path to D. - - Once at D, the pointer is incremented and the next segment is - executed (SB). - - SB is an instruction recognized by all the nodes in the SR domain - which causes the packet to be forwarded along the shortest path to B. - - Once at B, the pointer is incremented and the next segment is - executed (SS). - - SS is an instruction only recognized by node B which causes the - packet to receive service S. - - Once the service applied, the next segment is executed (SF) which - causes the packet to be forwarded along the shortest path to F. - - Once at F, the pointer is incremented and the next segment is - executed (SE). - - SE is an instruction recognized by all the nodes in the SR domain - which causes the packet to be forwarded along the shortest path to E. - - E then removes the SR header and the packet continues its journey - outside the SR domain. - - All of the requirements are met. - - First, the packet P has not used links AB and CE: the shortest-path - from I to D is I-A-D, the shortest-path from D to B is D-B, the - shortest-path from B to F is B-C-F and the shortest-path from F to E - is F-E, hence the packet path through the SR domain is I-A-D-B-C-F-E - and the links AB and CE have been avoided. - - Second, the service S supported by B has been applied on packet P. - - Third, any node along the packet path is able to identify the service - and topological journey of the packet within the SR domain. For - example, node C receives the packet illustrated in Figure 3 and hence - is able to infer where the packet entered the SR domain (SI), how it - - - -Previdi, et al. Expires June 12, 2015 [Page 6] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - got up to itself {SD, SB, SS, SE}, where it will exit the SR domain - (SE) and how it will do so {SF, SE}. - - +---------------------------------------------------------------+ - | | | - | SR Header | | - | | | - | {SD, SB, SS, SF, SE}, Ptr, SI, SE | Transported | - | ^ | | Packet | - | | | | P | - | +--------+ | | - | | | - +---------------------------------------------------------------+ - - Figure 3: Packet P at node C - - Fourth, only node I maintains per-flow state for packet P. The - entire program of topological and service instructions to be executed - by the SR domain on packet P is encoded by the ingress edge router I - in the SR header in the form of a list of segments where each segment - identifies a specific instruction. No further per-flow state is - required along the packet path. The per-flow state is in the SR - header and travels with the packet. Intermediate nodes only hold - states related to the IGP global node segments and the local IGP - adjacency segments. These segments are not per-flow specific and - hence scale very well. Typically, an intermediate node would - maintain in the order of 100's to 1000's global node segments and in - the order of 10's to 100 of local adjacency segments. Typically the - SR IGP forwarding table is expected to be much less than 10000 - entries. - - Fifth, the SR header is inserted at the entrance to the domain and - removed at the exit of the operator domain. For security reasons, - the operator can forbid anyone outside its domain to use its intra- - domain SR capability. - -4. Abstract Routing Model - - At the entrance of the SR domain, the ingress SR edge router pushes - the SR header on top of the packet. At the exit of the SR domain, - the egress SR edge router removes the SR header. - - The abstract SR header contains an ordered list of segments, a - pointer identifying the next segment to process and the - identifications of the ingress and egress SR edge routers on the path - of this packet. The pointer identifies the segment that MUST be used - by the receiving router to process the packet. This segment is - called the active segment. - - - -Previdi, et al. Expires June 12, 2015 [Page 7] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - A property of SR is that the entire source route of the packet, - including the identity of the ingress and egress edge routers is - always available with the packet. This allows for interesting - accounting and service applications. - - We define three SR-header operations: - - "PUSH": an SR header is pushed on an IP packet, or additional - segments are added at the head of the segment list. The pointer - is moved to the first entry of the added segments. - - "NEXT": the active segment is completed, the pointer is moved to - the next segment in the list. - - "CONTINUE": the active segment is not completed, the pointer is - left unchanged. - - In the future, other SR-header management operations may be defined. - - As the packet travels through the SR domain, the pointer is - incremented through the ordered list of segments and the source route - encoded by the SR ingress edge node is executed. - - A node processes an incoming packet according to the instruction - associated with the active segment. - - Any instruction might be associated with a segment: for example, an - intra-domain topological strict or loose forwarding instruction, a - service instruction, etc. - - At minimum, a segment instruction must define two elements: the - identity of the next-hop to forward the packet to (this could be the - same node or a context within the node) and which SR-header - management operation to execute. - - Each segment is known in the network through a Segment Identifier - (SID). The terms "segment" and "SID" are interchangeable. - -4.1. Segment Routing Global Block (SRGB) - - In the SR abstract model, a segment is identified by a Segment - Routing Identifier (SID). The SR abstract model doesn't mandate a - specific format for the SID (IPv6 address or other formats). - - In Segment Routing IPv6 the SID is an IPv6 address. Therefore, the - SRGB is materialized by the global IPv6 address space which - represents the set of IPv6 routable addresses in the SR domain. The - following rules apply: - - - -Previdi, et al. Expires June 12, 2015 [Page 8] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - o Each node of the SR domain MUST be configured with the Segment - Routing Global Block (SRGB). - - o All global segments must be allocated from the SRGB. Any SR - capable node MUST be able to process any global segment advertised - by any other node within the SR domain. - - o Any segment outside the SRGB has a local significance and is - called a "local segment". An SR-capable node MUST be able to - process the local segments it originates. An SR-capable node MUST - NOT support the instruction associated with a local segment - originated by a remote node. - -4.2. Traffic Engineering with SR - - An SR Traffic Engineering policy is composed of two elements: a flow - classification and a segment-list to prepend on the packets of the - flow. - - In SR, this per-flow state only exists at the ingress edge node where - the policy is defined and the SR header is pushed. - - It is outside the scope of the document to define the process that - leads to the instantiation at a node N of an SR Traffic Engineering - policy. - - [I-D.filsfils-spring-segment-routing-use-cases] illustrates various - alternatives: - - N is deriving this policy automatically (e.g. FRR). - - N is provisioned explicitly by the operator. - - N is provisioned by a controller or server (e.g.: SDN Controller). - - N is provisioned by the operator with a high-level policy which is - mapped into a path thanks to a local CSPF-based computation (e.g. - affinity/SRLG exclusion). - - N could also be provisioned by other means. - - [I-D.filsfils-spring-segment-routing-use-cases] explains why the - majority of use-cases require very short segment-lists, hence - minimizing the performance impact, if any, of inserting and - transporting the segment list. - - - - - - -Previdi, et al. Expires June 12, 2015 [Page 9] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - A SDN controller, which desires to instantiate at node N an SR - Traffic Engineering policy, collects the SR capability of node N such - as to ensure that the policy meets its capability. - -4.3. Segment Routing Database - - The Segment routing Database (SRDB) is a set of entries where each - entry is identified by a SID. The instruction associated with each - entry at least defines the identity of the next-hop to which the - packet should be forwarded and what operation should be performed on - the SR header (PUSH, CONTINUE, NEXT). - - +---------+-----------+---------------------------------+ - | Segment | Next-Hop | SR Header operation | - +---------+-----------+---------------------------------+ - | Sk | M | CONTINUE | - | Sj | N | NEXT | - | Sl | NAT Srvc | NEXT | - | Sm | FW srvc | NEXT | - | Sn | Q | NEXT | - | etc. | etc. | etc. | - +---------+-----------+---------------------------------+ - - Figure 4: SR Database - - Each SR-capable node maintains its local SRDB. SRDB entries can - either derive from local policy or from protocol segment - advertisement. - -5. IPv6 Instantiation of Segment Routing - -5.1. Segment Identifiers (SIDs) and SRGB - - Segment Routing, as described in - [I-D.filsfils-spring-segment-routing], defines Node-SID and - Adjacency-SID. When SR is used over IPv6 data-plane the following - applies. - - The SRGB is the global IPv6 address space which represents the set of - IPv6 routable addresses in the SR domain. - - Node SIDs are IPv6 addresses part of the SRGB (i.e.: routable - addresses). Adjacency-SIDs are IPv6 addresses which may not be part - of the global IPv6 address space. - - - - - - - -Previdi, et al. Expires June 12, 2015 [Page 10] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - -5.1.1. Node-SID - - The Node-SID identifies a node. With SR-IPv6 the Node-SID is an IPv6 - prefix that the operator configured on the node and that is used as - the node identifier. Typically, in case of a router, this is the - IPv6 address of the node loopback interface. Therefore, SR-IPv6 does - not require any additional SID advertisement for the Node Segment. - The Node-SID is in fact the IPv6 address of the node. - -5.1.2. Adjacency-SID - - In the SR architecture defined in - [I-D.filsfils-spring-segment-routing] the Adjacency-SID (or Adj-SID) - identifies a given interface and may be local or global (depending on - how it is advertised). A node may advertise one (or more) Adj-SIDs - allocated to a given interface so to force the forwarding of the - packet (when received with that particular Adj-SID) into the - interface regardless the routing entry for the packet destination. - The semantic of the Adj-SID is: - - Send out the packet to the interface this prefix is allocated to. - - When SR is applied to IPv6, any SID is in a global IPv6 address and - therefore, an Adj-SID has a global significance (i.e.: the IPv6 - address representing the SID is a global address). In other words, a - node that advertises the Adj-SID in the form of a global IPv6 address - representing the link/adjacency the packet has to be forwarded to, - will apply to the Adj-SID a global significance. - - Advertisement of Adj-SID may be done using multiple mechanisms among - which the ones described in ISIS and OSPF protocol extensions: - [I-D.ietf-isis-segment-routing-extensions] and - [I-D.psenak-ospf-segment-routing-ospfv3-extension]. The distinction - between local and global significance of the Adj-SID is given in the - encoding of the Adj-SID advertisement. - -5.2. Segment Routing Extension Header (SRH) - - A new type of the Routing Header (originally defined in [RFC2460]) is - defined: the Segment Routing Header (SRH) which has a new Routing - Type, (suggested value 4) to be assigned by IANA. - - As an example, if an explicit path is to be constructed across a core - network running ISIS or OSPF, the segment list will contain SIDs - representing the nodes across the path (loose or strict) which, - usually, are the IPv6 loopback interface address of each node. If - the path is across service or application entities, the segment list - - - - -Previdi, et al. Expires June 12, 2015 [Page 11] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - contains the IPv6 addresses of these services or application - instances. - - The Segment Routing Header (SRH) is defined as follows: - - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next Header | Hdr Ext Len | Routing Type | Segments Left | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | First Segment | Flags | HMAC Key ID | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Segment List[0] (128 bits ipv6 address) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | | - ... - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Segment List[n] (128 bits ipv6 address) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Policy List[0] (optional) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Policy List[1] (optional) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Policy List[2] (optional) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | | - | | - | HMAC (256 bits) | - - - -Previdi, et al. Expires June 12, 2015 [Page 12] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - | (optional) | - | | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - where: - - o Next Header: 8-bit selector. Identifies the type of header - immediately following the SRH. - - o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH - header in 8-octet units, not including the first 8 octets. - - o Routing Type: TBD, to be assigned by IANA (suggested value: 4). - - o Segments Left. Defined in [RFC2460], it contains the index, in - the Segment List, of the next segment to inspect. Segments Left - is decremented at each segment and it is used as an index in the - segment list. - - o First Segment: offset in the SRH, not including the first 8 octets - and expressed in 16-octet units, pointing to the last element of - the segment list, which is in fact the first segment of the - segment routing path. - - o Flags: 16 bits of flags. Following flags are defined: - - 1 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - |C|P|R|R| Policy Flags | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - C-flag: Clean-up flag. Set when the SRH has to be removed from - the packet when packet reaches the last segment. - - P-flag: Protected flag. Set when the packet has been rerouted - through FRR mechanism by a SR endpoint node. See Section 6.3 - for more details. - - R-flags. Reserved and for future use. - - Policy Flags. Define the type of the IPv6 addresses encoded - into the Policy List (see below). The following have been - defined: - - - - - -Previdi, et al. Expires June 12, 2015 [Page 13] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - Bits 4-6: determine the type of the first element after the - segment list. - - Bits 7-9: determine the type of the second element. - - Bits 10-12: determine the type of the third element. - - Bits 13-15: determine the type of the fourth element. - - The following values are used for the type: - - 0x0: Not present. If value is set to 0x0, it means the - element represented by these bits is not present. - - 0x1: SR Ingress. - - 0x2: SR Egress. - - 0x3: Original Source Address. - - o HMAC Key ID and HMAC field, and their use are defined in - [I-D.vyncke-6man-segment-routing-security]. - - o Segment List[n]: 128 bit IPv6 addresses representing the nth - segment in the Segment List. The Segment List is encoded starting - from the last segment of the path. I.e., the first element of the - segment list (Segment List [0]) contains the last segment of the - path while the last segment of the Segment List (Segment List[n]) - contains the first segment of the path. The index contained in - "Segments Left" identifies the current active segment. - - o Policy List. Optional addresses representing specific nodes in - the SR path such as: - - SR Ingress: a 128 bit generic identifier representing the - ingress in the SR domain (i.e.: it needs not to be a valid IPv6 - address). - - SR Egress: a 128 bit generic identifier representing the egress - in the SR domain (i.e.: it needs not to be a valid IPv6 - address). - - Original Source Address: IPv6 address originally present in the - SA field of the packet. - - The segments in the Policy List are encoded after the segment list - and they are optional. If none are in the SRH, all bits of the - Policy List Flags MUST be set to 0x0. - - - -Previdi, et al. Expires June 12, 2015 [Page 14] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - -5.2.1. SRH and RFC2460 behavior - - The SRH being a new type of the Routing Header, it also has the same - properties: - - SHOULD only appear once in the packet. - - Only the router whose address is in the DA field of the packet - header MUST inspect the SRH. - - Therefore, Segment Routing in IPv6 networks implies that the segment - identifier (i.e.: the IPv6 address of the segment) is moved into the - DA of the packet. - - The DA of the packet changes at each segment termination/completion - and therefore the original DA of the packet MUST be encoded as the - last segment of the path. - - As illustrated in Section 3.2, nodes that are within the path of a - segment will forward packets based on the DA of the packet without - inspecting the SRH. This ensures full interoperability between SR- - capable and non-SR-capable nodes. - -6. SRH Procedures - - In this section we describe the different procedures on the SRH. - -6.1. Segment Routing Operations - - When Segment Routing is instantiated over the IPv6 data plane the - following applies: - - o The segment list is encoded in the SRH. - - o The active segment is in the destination address of the packet. - - o The Segment Routing CONTINUE operation (as described in - [I-D.filsfils-spring-segment-routing]) is implemented as a - regular/plain IPv6 operation consisting of DA based forwarding. - - o The NEXT operation is implemented through the update of the DA - with the value represented by the Next Segment field in the SRH. - - o The PUSH operation is implemented through the insertion of the SRH - or the insertion of additional segments in the SRH segment list. - - - - - - -Previdi, et al. Expires June 12, 2015 [Page 15] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - -6.2. Segment Routing Node Functions - - SR packets are forwarded to segments endpoints (i.e.: nodes whose - address is in the DA field of the packet). The segment endpoint, - when receiving a SR packet destined to itself, does: - - o Inspect the SRH. - - o Determine the next active segment. - - o Update the Segments Left field (or, if requested, remove the SRH - from the packet). - - o Update the DA. - - o Send the packet to the next segment. - - The procedures applied to the SRH are related to the node function. - Following nodes functions are defined: - - Ingress SR Node. - - Transit Non-SR Node. - - Transit SR Intra Segment Node. - - SR Endpoint Node. - -6.2.1. Ingress SR Node - - Ingress Node can be a router at the edge of the SR domain or a SR- - capable host. The ingress SR node may obtain the segment list by - either: - - Local path computation. - - Local configuration. - - Interaction with an SDN controller delivering the path as a - complete SRH. - - Any other mechanism (mechanisms through which the path is acquired - are outside the scope of this document). - - When creating the SRH (either at ingress node or in the SDN - controller) the following is done: - - Next Header and Hdr Ext Len fields are set according to [RFC2460]. - - - -Previdi, et al. Expires June 12, 2015 [Page 16] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - Routing Type field is set as TBD (SRH). - - The Segment List is built with the FIRST segment of the path - encoded in the LAST element of the Segment List. Subsequent - segments are encoded on top of the first segment. Finally, the - LAST segment of the path is encoded in the FIRST element of the - Segment List. In other words, the Segment List is encoded in the - reverse order of the path. - - The original DA of the packet is encoded as the last segment of - the path (encoded in the first element of the Segment List). - - the DA of the packet is set with the value of the first segment - (found in the last element of the segment list). - - the Segments Left field is set to n-1 where n is the number of - elements in the Segment List. - - The packet is sent out towards the first segment (i.e.: - represented in the packet DA). - -6.2.1.1. Security at Ingress - - The procedures related to the Segment Routing security are detailed - in [I-D.vyncke-6man-segment-routing-security]. - - In the case where the SR domain boundaries are not under control of - the network operator (e.g.: when the SR domain edge is in a home - network), it is important to authenticate and validate the content of - any SRH being received by the network operator. In such case, the - security procedure described in - [I-D.vyncke-6man-segment-routing-security] is to be used. - - The ingress node (e.g.: the host in the home network) requests the - SRH from a control system (e.g.: an SDN controller) which delivers - the SRH with its HMAC signature on it. - - Then, the home network host can send out SR packets (with an SRH on - it) that will be validated at the ingress of the network operator - infrastructure. - - The ingress node of the network operator infrastructure, is - configured in order to validate the incoming SRH HMACs in order to - allow only packets having correct SRH according to their SA/DA - addresses. - - - - - - -Previdi, et al. Expires June 12, 2015 [Page 17] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - -6.2.2. Transit Non-SR Capable Node - - SR is interoperable with plain IPv6 forwarding. Any non SR-capable - node will forward SR packets solely based on the DA. There's no SRH - inspection. This ensures full interoperability between SR and non-SR - nodes. - -6.2.3. SR Intra Segment Transit Node - - Only the node whose address is in DA inspects and processes the SRH - (according to [RFC2460]). An intra segment transit node is not in - the DA and its forwarding is based on DA and its SR-IPv6 FIB. - -6.2.4. SR Segment Endpoint Node - - The SR segment endpoint node is the node whose address is in the DA. - The segment endpoint node inspects the SRH and does: - - 1. IF DA = myself (segment endpoint) - 2. IF Segments Left > 0 THEN - decrement Segments Left - update DA with Segment List[Segments Left] - 3. ELSE IF Segments List[Segments Left] <> DA THEN - update DA with Segments List[Segments Left] - IF Clean-up bit is set THEN remove the SRH - 4. ELSE give the packet to next PID (application) - End of processing. - 5. Forward the packet out - -6.3. FRR Flag Settings - - A node supporting SR and doing Fast Reroute (as described in - [I-D.filsfils-spring-segment-routing-use-cases], when rerouting - packets through FRR mechanisms, SHOULD inspect the rerouted packet - header and look for the SRH. If the SRH is present, the rerouting - node SHOULD set the Protected bit on all rerouted packets. - -7. SR and Tunneling - - Encapsulation can be realized in two different ways with SR-IPv6: - - Outer encapsulation. - - SRH with SA/DA original addresses. - - Outer encapsulation tunneling is the traditional method where an - additional IPv6 header is prepended to the packet. The original IPv6 - header being encapsulated, everything is preserved and the packet is - - - -Previdi, et al. Expires June 12, 2015 [Page 18] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - switched/routed according to the outer header (that could contain a - SRH). - - SRH allows encoding both original SA and DA, hence an operator may - decide to change the SA/DA at ingress and restore them at egress. - This can be achieved without outer encapsulation, by changing SA/DA - and encoding the original SA in the Policy List and in the original - DA in the Segment List. - -8. Example Use Case - - A more detailed description of use cases are available in - [I-D.ietf-spring-ipv6-use-cases]. In this section, a simple SR-IPv6 - example is illustrated. - - In the topology described in Figure 6 it is assumed an end-to-end SR - deployment. Therefore SR is supported by all nodes from A to J. - - Home Network | Backbone | Datacenter - | | - | +---+ +---+ +---+ | +---+ | - +---|---| C |---| D |---| E |---|---| I |---| - | | +---+ +---+ +---+ | +---+ | - | | | | | | | | +---+ - +---+ +---+ | | | | | | |--| X | - | A |---| B | | +---+ +---+ +---+ | +---+ | +---+ - +---+ +---+ | | F |---| G |---| H |---|---| J |---| - | +---+ +---+ +---+ | +---+ | - | | - | +-----------+ - | SDN | - | Orch/Ctlr | - +-----------+ - - Figure 6: Sample SR topology - - The following workflow applies to packets sent by host A and destined - to server X. - - - - - - - - - - - - - -Previdi, et al. Expires June 12, 2015 [Page 19] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - . Host A sends a request for a path to server X to the SDN - controller or orchestration system. - - . The SDN controller/orchestrator builds a SRH with: - . Segment List: C, F, J, X - . HMAC - that satisfies the requirements expressed in the request - by host A and based on policies applicable to host A. - - . Host A receives the SRH and insert it into the packet. - The packet has now: - . SA: A - . DA: C - . SRH with - . SL: X, J, F, C - . Segments Left: 3 (i.e.: Segment List size - 1) - . PL: C (ingress), J (egress) - Note that X is the last segment and C is the - first segment (i.e.: the SL is encoded in the reverse - path order). - . HMAC - - . When packet arrives in C (first segment), C does: - . Validate the HMAC of the SRH. - . Decrement Segments Left by one: 2 - . Update the DA with the next segment found in - Segment List[2]. DA is set to F. - . Forward the packet to F. - - . When packet arrives in F (second segment), F does: - . Decrement Segments Left by one: 1 - . Update the DA with the next segment found in - Segment List[1]. DA is set to J. - . Forward the packet to J. - - . Packet travels across G and H nodes which do plain - IPv6 forwarding based on DA. No inspection of SRH needs - to be done in these nodes. However, any SR capable node - is allowed to set the Protected bit in case of FRR - protection. - - . When packet arrives in J (third segment), J does: - . Decrement Segments Left by one: 0 - . Update the DA with the next segment found in - Segment List[0]. DA is set to X. - . If the cleanup bit is set, then node J will strip out - the SRH from the packet. - . Forward the packet to X. - - - -Previdi, et al. Expires June 12, 2015 [Page 20] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - The packet arrives in the server that may or may not support SR. The - return traffic, from server to host, may be sent using the same - procedures. - -9. IANA Considerations - - TBD - -10. Manageability Considerations - - TBD - -11. Security Considerations - - Security mechanisms applied to Segment Routing over IPv6 networks are - detailed in [I-D.vyncke-6man-segment-routing-security]. - -12. Contributors - - The authors would like to thank Dave Barach, John Leddy, John - Brzozowski, Pierre Francois, Nagendra Kumar, Mark Townsley, Christian - Martin, Roberta Maglione, Eric Vyncke, James Connolly, David Lebrun - and Fred Baker for their contribution to this document. - -13. Acknowledgements - - TBD - -14. References - -14.1. Normative References - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 - (IPv6) Specification", RFC 2460, December 1998. - -14.2. Informative References - - [I-D.filsfils-spring-segment-routing] - Filsfils, C., Previdi, S., Bashandy, A., Decraene, B., - Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R., - Ytti, S., Henderickx, W., Tantsura, J., and E. Crabbe, - "Segment Routing Architecture", draft-filsfils-spring- - segment-routing-04 (work in progress), July 2014. - - - - - -Previdi, et al. Expires June 12, 2015 [Page 21] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - [I-D.filsfils-spring-segment-routing-mpls] - Filsfils, C., Previdi, S., Bashandy, A., Decraene, B., - Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R., - Ytti, S., Henderickx, W., Tantsura, J., and E. Crabbe, - "Segment Routing with MPLS data plane", draft-filsfils- - spring-segment-routing-mpls-03 (work in progress), August - 2014. - - [I-D.filsfils-spring-segment-routing-use-cases] - Filsfils, C., Francois, P., Previdi, S., Decraene, B., - Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R., - Ytti, S., Henderickx, W., Tantsura, J., Kini, S., and E. - Crabbe, "Segment Routing Use Cases", draft-filsfils- - spring-segment-routing-use-cases-01 (work in progress), - October 2014. - - [I-D.ietf-isis-segment-routing-extensions] - Previdi, S., Filsfils, C., Bashandy, A., Gredler, H., - Litkowski, S., Decraene, B., and J. Tantsura, "IS-IS - Extensions for Segment Routing", draft-ietf-isis-segment- - routing-extensions-03 (work in progress), October 2014. - - [I-D.ietf-spring-ipv6-use-cases] - Brzozowski, J., Leddy, J., Leung, I., Previdi, S., - Townsley, W., Martin, C., Filsfils, C., and R. Maglione, - "IPv6 SPRING Use Cases", draft-ietf-spring-ipv6-use- - cases-03 (work in progress), November 2014. - - [I-D.psenak-ospf-segment-routing-ospfv3-extension] - Psenak, P., Previdi, S., Filsfils, C., Gredler, H., - Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3 - Extensions for Segment Routing", draft-psenak-ospf- - segment-routing-ospfv3-extension-02 (work in progress), - July 2014. - - [I-D.vyncke-6man-segment-routing-security] - Vyncke, E. and S. Previdi, "IPv6 Segment Routing Header - (SRH) Security Considerations", July 2014. - - [RFC1940] Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D. - Zappala, "Source Demand Routing: Packet Format and - Forwarding Specification (Version 1)", RFC 1940, May 1996. - -Authors' Addresses - - - - - - - -Previdi, et al. Expires June 12, 2015 [Page 22] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - Stefano Previdi (editor) - Cisco Systems, Inc. - Via Del Serafico, 200 - Rome 00142 - Italy - - Email: sprevidi@cisco.com - - - Clarence Filsfils - Cisco Systems, Inc. - Brussels - BE - - Email: cfilsfil@cisco.com - - - Brian Field - Comcast - 4100 East Dry Creek Road - Centennial, CO 80122 - US - - Email: Brian_Field@cable.comcast.com - - - Ida Leung - Rogers Communications - 8200 Dixie Road - Brampton, ON L6T 0C1 - CA - - Email: Ida.Leung@rci.rogers.com diff --git a/src/vnet/sr/sr.api b/src/vnet/sr/sr.api index 3d017ce5..5feadcb0 100644 --- a/src/vnet/sr/sr.api +++ b/src/vnet/sr/sr.api @@ -12,108 +12,207 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -/** \brief IPv6 segment routing tunnel add / del request + +/** \brief IPv6 SR LocalSID add/del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_del Boolean of whether its a delete instruction + @param localsid_addr IPv6 address of the localsid + @param end_psp Boolean of whether decapsulation is allowed in this function + @param behavior Type of behavior (function) for this localsid + @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table. + @param vlan_index Only for L2 xconnect. Outgoing VLAN tag. + @param fib_table FIB table in which we should install the localsid entry + @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. +*/ +define sr_localsid_add_del +{ + u32 client_index; + u32 context; + u8 is_del; + u8 localsid_addr[16]; + u8 end_psp; + u8 behavior; + u32 sw_if_index; + u32 vlan_index; + u32 fib_table; + u8 nh_addr[16]; +}; + +/** \brief IPv6 SR LocalSID add/del request response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define sr_localsid_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IPv6 SR policy add + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bsid is the bindingSID of the SR Policy + @param weight is the weight of the sid list. optional. + @param is_encap is the behavior of the SR policy. (0.SRH insert // 1.Encapsulation) + @param type is the type of the SR policy. (0.Default // 1.Spray) + @param fib_table is the VRF where to install the FIB entry for the BSID + @param segments is a vector of IPv6 address composing the segment list +*/ +define sr_policy_add +{ + u32 client_index; + u32 context; + u8 bsid_addr[16]; + u32 weight; + u8 is_encap; + u8 type; + u32 fib_table; + u8 n_segments; + u8 segments[0]; +}; + +/** \brief IPv6 SR Policy add request response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define sr_policy_add_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IPv6 SR policy modification @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param is_add - add the tunnel if non-zero, else delete it - @param name[] - tunnel name (len. 64) - @param src_address[] - - @param dst_address[] - - @param dst_mask_width - - @param inner_vrf_id - - @param outer_vrf_id - - @param flags_net_byte_order - - @param n_segments - - @param n_tags - - @param segs_and_tags[] - - @param policy_name[] - name of policy to associate this tunnel to (len. 64) + @param bsid is the bindingSID of the SR Policy + @param sr_policy_index is the index of the SR policy + @param fib_table is the VRF where to install the FIB entry for the BSID + @param operation is the operation to perform (among the top ones) + @param segments is a vector of IPv6 address composing the segment list + @param sl_index is the index of the Segment List to modify/delete + @param weight is the weight of the sid list. optional. + @param is_encap Mode. Encapsulation or SRH insertion. */ -define sr_tunnel_add_del +define sr_policy_mod { u32 client_index; u32 context; - u8 is_add; - u8 name[64]; - u8 src_address[16]; - u8 dst_address[16]; - u8 dst_mask_width; - u32 inner_vrf_id; - u32 outer_vrf_id; - u16 flags_net_byte_order; + u8 bsid_addr[16]; + u32 sr_policy_index; + u32 fib_table; + u8 operation; + u32 sl_index; + u32 weight; u8 n_segments; - u8 n_tags; - u8 policy_name[64]; - u8 segs_and_tags[0]; + u8 segments[0]; }; -/** \brief IPv6 segment routing tunnel add / del response +/** \brief IPv6 SR Policy modification request response @param context - sender context, to match reply w/ request @param retval - return value for request */ -define sr_tunnel_add_del_reply +define sr_policy_mod_reply { u32 context; i32 retval; }; -/** \brief IPv6 segment routing policy add / del request +/** \brief IPv6 SR policy deletion @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param is_add - add the tunnel if non-zero, else delete it - @param name[] - policy name (len. 64) - @param tunnel_names[] - + @param bsid is the bindingSID of the SR Policy + @param index is the index of the SR policy */ -define sr_policy_add_del +define sr_policy_del { u32 client_index; u32 context; - u8 is_add; - u8 name[64]; - u8 tunnel_names[0]; + u8 bsid_addr[16]; + u32 sr_policy_index; }; -/** \brief IPv6 segment routing policy add / del response +/** \brief IPv6 SR Policy deletion request response @param context - sender context, to match reply w/ request - @param retval - return value for request - - + @param retval - return value for request */ -define sr_policy_add_del_reply +define sr_policy_del_reply { u32 context; i32 retval; }; -/** \brief IPv6 segment routing multicast map to policy add / del request +/** \brief IPv6 SR steering add/del @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param is_add - add the tunnel if non-zero, else delete it - @param multicast_address[] - IP6 multicast address - @param policy_name[] = policy name (len.64) + @param is_del + @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index) + @param sr_policy is the index of the SR Policy (alt to bsid) + @param table_id is the VRF where to install the FIB entry for the BSID + @param prefix is the IPv4/v6 address for L3 traffic type + @param mask_width is the mask for L3 traffic type + @param sw_if_index is the incoming interface for L2 traffic + @param traffic_type describes the type of traffic */ -define sr_multicast_map_add_del +define sr_steering_add_del { u32 client_index; u32 context; - u8 is_add; - u8 multicast_address[16]; - u8 policy_name[64]; + u8 is_del; + u8 bsid_addr[16]; + u32 sr_policy_index; + u32 table_id; + u8 prefix_addr[16]; + u32 mask_width; + u32 sw_if_index; + u8 traffic_type; }; -/** \brief IPv6 segment routing multicast map to policy add / del response +/** \brief IPv6 SR steering add/del request response @param context - sender context, to match reply w/ request @param retval - return value for request */ -define sr_multicast_map_add_del_reply +define sr_steering_add_del_reply { u32 context; i32 retval; }; +/** \brief Dump the list of SR LocalSIDs + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +/**define sr_localsids_dump +{ + u32 client_index; + u32 context; +};*/ + +/** \brief Details about a single SR LocalSID + @param context - returned sender context, to match reply w/ request + @param localsid_addr IPv6 address of the localsid + @param behavior Type of behavior (function) for this localsid + @param end_psp Boolean of whether decapsulation is allowed in this function + @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table. + @param vlan_index Only for L2 xconnect. Outgoing VLAN tag. + @param fib_table FIB table in which we should install the localsid entry + @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. +*/ +/**manual_endian define sr_localsid_details +{ + u32 context; + u8 localsid_addr[16]; + u8 behavior; + u8 end_psp; + u32 sw_if_index; + u32 vlan_index; + u32 fib_table; + u8 nh_addr[16]; +};*/ + /* + * fd.io coding-style-patch-verification: ON * Local Variables: * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file diff --git a/src/vnet/sr/sr.c b/src/vnet/sr/sr.c old mode 100644 new mode 100755 index 012d4542..34344fce --- a/src/vnet/sr/sr.c +++ b/src/vnet/sr/sr.c @@ -17,9 +17,10 @@ /** * @file - * @brief Segment Routing main functions + * @brief Segment Routing initialization * */ + #include #include #include @@ -27,1179 +28,13 @@ #include #include -#include - ip6_sr_main_t sr_main; -static vlib_node_registration_t sr_local_node; - -/** - * @brief Dynamically added SR DPO type - */ -static dpo_type_t sr_dpo_type; - -/** - * @brief Dynamically added SR FIB Node type - */ -static fib_node_type_t sr_fib_node_type; - -/** - * @brief Use passed HMAC key in ip6_sr_header_t in OpenSSL HMAC routines - * - * @param sm ip6_sr_main_t * - * @param ip ip6_header_t * - * @param sr ip6_sr_header_t * - */ -void -sr_fix_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, ip6_sr_header_t * sr) -{ - u32 key_index; - static u8 *keybuf; - u8 *copy_target; - int first_segment; - ip6_address_t *addrp; - int i; - ip6_sr_hmac_key_t *hmac_key; - u32 sig_len; - - key_index = sr->hmac_key; - - /* No signature? Pass... */ - if (key_index == 0) - return; - - /* We don't know about this key? Fail... */ - if (key_index >= vec_len (sm->hmac_keys)) - return; - - hmac_key = sm->hmac_keys + key_index; - - vec_reset_length (keybuf); - - /* pkt ip6 src address */ - vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); - clib_memcpy (copy_target, ip->src_address.as_u8, sizeof (ip6_address_t)); - - /* first segment */ - vec_add2 (keybuf, copy_target, 1); - copy_target[0] = sr->first_segment; - - /* octet w/ bit 0 = "clean" flag */ - vec_add2 (keybuf, copy_target, 1); - copy_target[0] - = (sr->flags & clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)) - ? 0x80 : 0; - - /* hmac key id */ - vec_add2 (keybuf, copy_target, 1); - copy_target[0] = sr->hmac_key; - - first_segment = sr->first_segment; - - addrp = sr->segments; - - /* segments */ - for (i = 0; i <= first_segment; i++) - { - vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); - clib_memcpy (copy_target, addrp->as_u8, sizeof (ip6_address_t)); - addrp++; - } - - addrp++; - - HMAC_CTX_init (sm->hmac_ctx); - if (!HMAC_Init (sm->hmac_ctx, hmac_key->shared_secret, - vec_len (hmac_key->shared_secret), sm->md)) - clib_warning ("barf1"); - if (!HMAC_Update (sm->hmac_ctx, keybuf, vec_len (keybuf))) - clib_warning ("barf2"); - if (!HMAC_Final (sm->hmac_ctx, (unsigned char *) addrp, &sig_len)) - clib_warning ("barf3"); - HMAC_CTX_cleanup (sm->hmac_ctx); -} - -/** - * @brief Format function for decoding various SR flags - * - * @param s u8 * - formatted string - * @param args va_list * - u16 flags - * - * @return formatted output string u8 * - */ -u8 * -format_ip6_sr_header_flags (u8 * s, va_list * args) -{ - u16 flags = (u16) va_arg (*args, int); - u8 pl_flag; - int bswap_needed = va_arg (*args, int); - int i; - - if (bswap_needed) - flags = clib_host_to_net_u16 (flags); - - if (flags & IP6_SR_HEADER_FLAG_CLEANUP) - s = format (s, "cleanup "); - - if (flags & IP6_SR_HEADER_FLAG_PROTECTED) - s = format (s, "reroute "); - - s = format (s, "pl: "); - for (i = 1; i <= 4; i++) - { - pl_flag = ip6_sr_policy_list_flags (flags, i); - s = format (s, "[%d] ", i); - - switch (pl_flag) - { - case IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT: - s = format (s, "NotPr "); - break; - case IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE: - s = format (s, "InPE "); - break; - case IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE: - s = format (s, "EgPE "); - break; - - case IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR: - s = format (s, "OrgSrc "); - break; - } - } - return s; -} - -/** - * @brief Format function for decoding ip6_sr_header_t - * - * @param s u8 * - formatted string - * @param args va_list * - ip6_sr_header_t - * - * @return formatted output string u8 * - */ -u8 * -format_ip6_sr_header (u8 * s, va_list * args) -{ - ip6_sr_header_t *h = va_arg (*args, ip6_sr_header_t *); - ip6_address_t placeholder_addr = - { {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, - 254, 254} - }; - int print_hmac = va_arg (*args, int); - int i, pl_index, max_segs; - int flags_host_byte_order = clib_net_to_host_u16 (h->flags); - - s = format (s, "next proto %d, len %d, type %d", - h->protocol, (h->length << 3) + 8, h->type); - s = format (s, "\n segs left %d, first_segment %d, hmac key %d", - h->segments_left, h->first_segment, h->hmac_key); - s = format (s, "\n flags %U", format_ip6_sr_header_flags, - flags_host_byte_order, 0 /* bswap needed */ ); - - /* - * Header length is in 8-byte units (minus one), so - * divide by 2 to ascertain the number of ip6 addresses in the - * segment list - */ - max_segs = (h->length >> 1); - - if (!print_hmac && h->hmac_key) - max_segs -= 2; - - s = format (s, "\n Segments (in processing order):"); - - for (i = h->first_segment; i >= 1; i--) - s = format (s, "\n %U", format_ip6_address, h->segments + i); - if (ip6_address_is_equal (&placeholder_addr, h->segments)) - s = format (s, "\n (empty placeholder)"); - else - s = format (s, "\n %U", format_ip6_address, h->segments); - - s = format (s, "\n Policy List:"); - - pl_index = 1; /* to match the RFC text */ - for (i = (h->first_segment + 1); i < max_segs; i++, pl_index++) - { - char *tag; - char *tags[] = { " ", "InPE: ", "EgPE: ", "OrgSrc: " }; - - tag = tags[0]; - if (pl_index >= 1 && pl_index <= 4) - { - int this_pl_flag = ip6_sr_policy_list_flags - (flags_host_byte_order, pl_index); - tag = tags[this_pl_flag]; - } - - s = format (s, "\n %s%U", tag, format_ip6_address, h->segments + i); - } - - return s; -} - -/** - * @brief Format function for decoding ip6_sr_header_t with length - * - * @param s u8 * - formatted string - * @param args va_list * - ip6_header_t + ip6_sr_header_t - * - * @return formatted output string u8 * - */ -u8 * -format_ip6_sr_header_with_length (u8 * s, va_list * args) -{ - ip6_header_t *h = va_arg (*args, ip6_header_t *); - u32 max_header_bytes = va_arg (*args, u32); - uword header_bytes; - - header_bytes = sizeof (h[0]) + sizeof (ip6_sr_header_t); - if (max_header_bytes != 0 && header_bytes > max_header_bytes) - return format (s, "ip6_sr header truncated"); - - s = format (s, "IP6: %U\n", format_ip6_header, h, max_header_bytes); - s = - format (s, "SR: %U\n", format_ip6_sr_header, (ip6_sr_header_t *) (h + 1), - 0 /* print_hmac */ , max_header_bytes); - return s; -} - -/** - * @brief Defined valid next nodes -*/ -#define foreach_sr_rewrite_next \ -_(ERROR, "error-drop") \ -_(SR_LOCAL, "sr-local") - -/** - * @brief Struct for defined valid next nodes -*/ -typedef enum -{ -#define _(s,n) SR_REWRITE_NEXT_##s, - foreach_sr_rewrite_next -#undef _ - SR_REWRITE_N_NEXT, -} sr_rewrite_next_t; - -/** - * @brief Struct for data for SR rewrite packet trace - */ -typedef struct -{ - ip6_address_t src, dst; - u16 length; - u32 next_index; - u32 tunnel_index; - u8 sr[256]; -} sr_rewrite_trace_t; - -/** - * @brief Error strings for SR rewrite - */ -static char *sr_rewrite_error_strings[] = { -#define sr_error(n,s) s, -#include "sr_error.def" -#undef sr_error -}; - -/** - * @brief Struct for SR rewrite error strings - */ -typedef enum -{ -#define sr_error(n,s) SR_REWRITE_ERROR_##n, -#include "sr_error.def" -#undef sr_error - SR_REWRITE_N_ERROR, -} sr_rewrite_error_t; - - -/** - * @brief Format function for SR rewrite trace. - */ -u8 * -format_sr_rewrite_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - sr_rewrite_trace_t *t = va_arg (*args, sr_rewrite_trace_t *); - ip6_sr_main_t *sm = &sr_main; - ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index); - ip6_fib_t *rx_fib, *tx_fib; - - rx_fib = ip6_fib_get (tun->rx_fib_index); - tx_fib = ip6_fib_get (tun->tx_fib_index); - - s = format - (s, "SR-REWRITE: next %s ip6 src %U dst %U len %u\n" - " rx-fib-id %d tx-fib-id %d\n%U", - (t->next_index == SR_REWRITE_NEXT_SR_LOCAL) - ? "sr-local" : "ip6-lookup", - format_ip6_address, &t->src, - format_ip6_address, &t->dst, t->length, - rx_fib->table_id, tx_fib->table_id, - format_ip6_sr_header, t->sr, 0 /* print_hmac */ ); - return s; -} - -/** - * @brief Main processing dual-loop for Segment Routing Rewrite - * @node sr-rewrite - * - * @param vm vlib_main_t * - * @param node vlib_node_runtime_t * - * @param from_frame vlib_frame_t * - * - * @return from_frame->n_vectors uword - */ -static uword -sr_rewrite (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, *from, *to_next; - ip6_sr_main_t *sm = &sr_main; - u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *, - vlib_buffer_t *, ip6_header_t *, ip6_sr_header_t *); - sr_local_cb = sm->sr_local_cb; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Note 2x loop disabled */ - while (0 && n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - ip6_header_t *ip0, *ip1; - ip6_sr_header_t *sr0, *sr1; - ip6_sr_tunnel_t *t0, *t1; - u32 next0; - u32 next1; - u16 new_l0 = 0; - u16 new_l1 = 0; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - /* - * $$$ parse through header(s) to pick the point - * where we punch in the SR extention header - */ - t0 = - pool_elt_at_index (sm->tunnels, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - t1 = - pool_elt_at_index (sm->tunnels, - vnet_buffer (b1)->ip.adj_index[VLIB_TX]); - - ASSERT (VLIB_BUFFER_PRE_DATA_SIZE - >= ((word) vec_len (t0->rewrite)) + b0->current_data); - ASSERT (VLIB_BUFFER_PRE_DATA_SIZE - >= ((word) vec_len (t1->rewrite)) + b1->current_data); - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = t1->tx_fib_index; - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* - * SR-unaware service chaining case: pkt coming back from - * service has the original dst address, and will already - * have an SR header. If so, send it to sr-local - */ - if (PREDICT_FALSE (ip0->protocol == IPPROTO_IPV6_ROUTE)) - { - vlib_buffer_advance (b0, sizeof (ip0)); - sr0 = (ip6_sr_header_t *) (ip0 + 1); - new_l0 = clib_net_to_host_u16 (ip0->payload_length); - next0 = SR_REWRITE_NEXT_SR_LOCAL; - } - else - { - u32 len_bytes = sizeof (ip6_header_t); - u8 next_hdr = ip0->protocol; - - /* HBH must immediately follow ipv6 header */ - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - len_bytes += - ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); - /* Ignoring the sr_local for now, if RH follows HBH here */ - next_hdr = ext_hdr->next_hdr; - ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; - } - else - { - ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */ - } - /* - * Copy data before the punch-in point left by the - * required amount. Assume (for the moment) that only - * the main packet header needs to be copied. - */ - clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite), - ip0, len_bytes); - vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite)); - ip0 = vlib_buffer_get_current (b0); - sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes); - /* $$$ tune */ - clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite)); - - /* Fix the next header chain */ - sr0->protocol = next_hdr; - - new_l0 = clib_net_to_host_u16 (ip0->payload_length) + - vec_len (t0->rewrite); - ip0->payload_length = clib_host_to_net_u16 (new_l0); - - /* Copy dst address into the DA slot in the segment list */ - clib_memcpy (sr0->segments, ip0->dst_address.as_u64, - sizeof (ip6_address_t)); - /* Rewrite the ip6 dst address with the first hop */ - clib_memcpy (ip0->dst_address.as_u64, t0->first_hop.as_u64, - sizeof (ip6_address_t)); - - sr_fix_hmac (sm, ip0, sr0); - - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = - t0->first_hop_dpo.dpoi_index; - next0 = t0->first_hop_dpo.dpoi_next_node; - next0 = (sr_local_cb ? - sr_local_cb (vm, node, b0, ip0, sr0) : next0); - - /* - * Ignore "do not rewrite" shtik in this path - */ - if (PREDICT_FALSE (next0 & 0x80000000)) - { - next0 ^= 0xFFFFFFFF; - if (PREDICT_FALSE (next0 == SR_REWRITE_NEXT_ERROR)) - b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; - } - } - - if (PREDICT_FALSE (ip1->protocol == IPPROTO_IPV6_ROUTE)) - { - vlib_buffer_advance (b1, sizeof (ip1)); - sr1 = (ip6_sr_header_t *) (ip1 + 1); - new_l1 = clib_net_to_host_u16 (ip1->payload_length); - next1 = SR_REWRITE_NEXT_SR_LOCAL; - } - else - { - u32 len_bytes = sizeof (ip6_header_t); - u8 next_hdr = ip1->protocol; - - /* HBH must immediately follow ipv6 header */ - if (PREDICT_FALSE - (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1); - len_bytes += - ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); - /* Ignoring the sr_local for now, if RH follows HBH here */ - next_hdr = ext_hdr->next_hdr; - ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; - } - else - { - ip1->protocol = IPPROTO_IPV6_ROUTE; - } - /* - * Copy data before the punch-in point left by the - * required amount. Assume (for the moment) that only - * the main packet header needs to be copied. - */ - clib_memcpy (((u8 *) ip1) - vec_len (t1->rewrite), - ip1, len_bytes); - vlib_buffer_advance (b1, -(word) vec_len (t1->rewrite)); - ip1 = vlib_buffer_get_current (b1); - sr1 = (ip6_sr_header_t *) ((u8 *) ip1 + len_bytes); - clib_memcpy (sr1, t1->rewrite, vec_len (t1->rewrite)); - - sr1->protocol = next_hdr; - new_l1 = clib_net_to_host_u16 (ip1->payload_length) + - vec_len (t1->rewrite); - ip1->payload_length = clib_host_to_net_u16 (new_l1); - - /* Copy dst address into the DA slot in the segment list */ - clib_memcpy (sr1->segments, ip1->dst_address.as_u64, - sizeof (ip6_address_t)); - /* Rewrite the ip6 dst address with the first hop */ - clib_memcpy (ip1->dst_address.as_u64, t1->first_hop.as_u64, - sizeof (ip6_address_t)); - - sr_fix_hmac (sm, ip1, sr1); - - vnet_buffer (b1)->ip.adj_index[VLIB_TX] = - t1->first_hop_dpo.dpoi_index; - next1 = t1->first_hop_dpo.dpoi_next_node; - next1 = (sr_local_cb ? - sr_local_cb (vm, node, b1, ip1, sr1) : next1); - - /* - * Ignore "do not rewrite" shtik in this path - */ - if (PREDICT_FALSE (next1 & 0x80000000)) - { - next1 ^= 0xFFFFFFFF; - if (PREDICT_FALSE (next1 == SR_REWRITE_NEXT_ERROR)) - b1->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; - } - } - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->tunnel_index = t0 - sm->tunnels; - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - tr->length = new_l0; - tr->next_index = next0; - if (sr0) - clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); - } - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, - b1, sizeof (*tr)); - tr->tunnel_index = t1 - sm->tunnels; - clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - tr->length = new_l1; - tr->next_index = next1; - if (sr1) - clib_memcpy (tr->sr, sr1, sizeof (tr->sr)); - } - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0 = 0; - ip6_sr_header_t *sr0 = 0; - ip6_sr_tunnel_t *t0; - u32 next0; - u16 new_l0 = 0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - - /* - * $$$ parse through header(s) to pick the point - * where we punch in the SR extention header - */ - t0 = - pool_elt_at_index (sm->tunnels, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - - ASSERT (VLIB_BUFFER_PRE_DATA_SIZE - >= ((word) vec_len (t0->rewrite)) + b0->current_data); - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index; - - ip0 = vlib_buffer_get_current (b0); - - /* - * SR-unaware service chaining case: pkt coming back from - * service has the original dst address, and will already - * have an SR header. If so, send it to sr-local - */ - if (PREDICT_FALSE (ip0->protocol == IPPROTO_IPV6_ROUTE)) - { - vlib_buffer_advance (b0, sizeof (ip0)); - sr0 = (ip6_sr_header_t *) (ip0 + 1); - new_l0 = clib_net_to_host_u16 (ip0->payload_length); - next0 = SR_REWRITE_NEXT_SR_LOCAL; - } - else - { - u32 len_bytes = sizeof (ip6_header_t); - u8 next_hdr = ip0->protocol; - - /* HBH must immediately follow ipv6 header */ - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - len_bytes += - ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); - next_hdr = ext_hdr->next_hdr; - ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; - /* Ignoring the sr_local for now, if RH follows HBH here */ - } - else - { - ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */ - } - /* - * Copy data before the punch-in point left by the - * required amount. Assume (for the moment) that only - * the main packet header needs to be copied. - */ - clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite), - ip0, len_bytes); - vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite)); - ip0 = vlib_buffer_get_current (b0); - sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes); - /* $$$ tune */ - clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite)); - - /* Fix the next header chain */ - sr0->protocol = next_hdr; - new_l0 = clib_net_to_host_u16 (ip0->payload_length) + - vec_len (t0->rewrite); - ip0->payload_length = clib_host_to_net_u16 (new_l0); - - /* Copy dst address into the DA slot in the segment list */ - clib_memcpy (sr0->segments, ip0->dst_address.as_u64, - sizeof (ip6_address_t)); - /* Rewrite the ip6 dst address with the first hop */ - clib_memcpy (ip0->dst_address.as_u64, t0->first_hop.as_u64, - sizeof (ip6_address_t)); - - sr_fix_hmac (sm, ip0, sr0); - - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = - t0->first_hop_dpo.dpoi_index; - next0 = t0->first_hop_dpo.dpoi_next_node; - next0 = (sr_local_cb ? - sr_local_cb (vm, node, b0, ip0, sr0) : next0); - - /* - * Ignore "do not rewrite" shtik in this path - */ - if (PREDICT_FALSE (next0 & 0x80000000)) - { - next0 ^= 0xFFFFFFFF; - if (PREDICT_FALSE (next0 == SR_REWRITE_NEXT_ERROR)) - b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; - } - } - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->tunnel_index = t0 - sm->tunnels; - if (ip0) - { - memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - tr->length = new_l0; - tr->next_index = next0; - if (sr0) - clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); - } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return from_frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_rewrite_node) = { - .function = sr_rewrite, - .name = "sr-rewrite", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - .format_trace = format_sr_rewrite_trace, - .format_buffer = format_ip6_sr_header_with_length, - - .n_errors = SR_REWRITE_N_ERROR, - .error_strings = sr_rewrite_error_strings, - - .runtime_data_bytes = 0, - - .n_next_nodes = SR_REWRITE_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_REWRITE_NEXT_##s] = n, - foreach_sr_rewrite_next -#undef _ - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (sr_rewrite_node, sr_rewrite) -/* *INDENT-ON* */ - -static int -ip6_routes_add_del (ip6_sr_tunnel_t * t, int is_del) -{ - ip6_sr_main_t *sm = &sr_main; - - /* - * the prefix for the tunnel's destination - */ - /* *INDENT-OFF* */ - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = t->dst_mask_width, - .fp_addr = { - .ip6 = t->key.dst, - } - }; - /* *INDENT-ON* */ - - if (is_del) - { - fib_table_entry_delete (t->rx_fib_index, &pfx, FIB_SOURCE_SR); - } - else - { - dpo_id_t dpo = DPO_INVALID; - - dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels); - fib_table_entry_special_dpo_add (t->rx_fib_index, - &pfx, - FIB_SOURCE_SR, - FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); - dpo_reset (&dpo); - } - - /* - * Track the first hop address so we don't need to perform an extra - * lookup in the data-path - */ - /* *INDENT-OFF* */ - const fib_prefix_t first_hop_pfx = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr = { - .ip6 = t->first_hop, - } - }; - /* *INDENT-ON* */ - - if (is_del) - { - fib_entry_child_remove (t->fib_entry_index, t->sibling_index); - fib_table_entry_delete_index (t->fib_entry_index, FIB_SOURCE_RR); - } - else - { - t->fib_entry_index = - fib_table_entry_special_add (t->rx_fib_index, - &first_hop_pfx, - FIB_SOURCE_RR, - FIB_ENTRY_FLAG_NONE, ADJ_INDEX_INVALID); - t->sibling_index = - fib_entry_child_add (t->fib_entry_index, - sr_fib_node_type, t - sm->tunnels); - } - - return 0; -} - -/** - * @brief Find or add if not found - HMAC shared secret - * - * @param sm ip6_sr_main_t * - * @param secret u8 * - * @param indexp u32 * - * - * @return ip6_sr_hmac_key_t * - */ -static ip6_sr_hmac_key_t * -find_or_add_shared_secret (ip6_sr_main_t * sm, u8 * secret, u32 * indexp) -{ - uword *p; - ip6_sr_hmac_key_t *key = 0; - int i; - - p = hash_get_mem (sm->hmac_key_by_shared_secret, secret); - - if (p) - { - key = vec_elt_at_index (sm->hmac_keys, p[0]); - if (indexp) - *indexp = p[0]; - return (key); - } - - /* Specific key ID? */ - if (indexp && *indexp) - { - vec_validate (sm->hmac_keys, *indexp); - key = sm->hmac_keys + *indexp; - } - else - { - for (i = 0; i < vec_len (sm->hmac_keys); i++) - { - if (sm->hmac_keys[i].shared_secret == 0) - { - key = sm->hmac_keys + i; - goto found; - } - } - vec_validate (sm->hmac_keys, i); - key = sm->hmac_keys + i; - found: - ; - } - - key->shared_secret = vec_dup (secret); - - hash_set_mem (sm->hmac_key_by_shared_secret, key->shared_secret, - key - sm->hmac_keys); - - if (indexp) - *indexp = key - sm->hmac_keys; - return (key); -} - -/** - * @brief Stack a tunnel on the forwarding chain of the first-hop - */ -static void -sr_tunnel_stack (ip6_sr_tunnel_t * st) -{ - dpo_stack (sr_dpo_type, - DPO_PROTO_IP6, - &st->first_hop_dpo, - fib_entry_contribute_ip_forwarding (st->fib_entry_index)); -} - -/** - * @brief Add or Delete a Segment Routing tunnel. - * - * @param a ip6_sr_add_del_tunnel_args_t * - * - * @return retval int - */ -int -ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) -{ - ip6_main_t *im = &ip6_main; - ip6_sr_tunnel_key_t key; - ip6_sr_tunnel_t *t; - uword *p, *n; - ip6_sr_header_t *h = 0; - u32 header_length; - ip6_address_t *addrp, *this_address; - ip6_sr_main_t *sm = &sr_main; - u8 *key_copy; - u32 rx_fib_index, tx_fib_index; - u32 hmac_key_index_u32; - u8 hmac_key_index = 0; - ip6_sr_policy_t *pt; - int i; - - /* Make sure that the rx FIB exists */ - p = hash_get (im->fib_index_by_table_id, a->rx_table_id); - - if (p == 0) - return -3; - - /* remember the FIB index */ - rx_fib_index = p[0]; - - /* Make sure that the supplied FIB exists */ - p = hash_get (im->fib_index_by_table_id, a->tx_table_id); - - if (p == 0) - return -4; - - /* remember the FIB index */ - tx_fib_index = p[0]; - - clib_memcpy (key.src.as_u8, a->src_address->as_u8, sizeof (key.src)); - clib_memcpy (key.dst.as_u8, a->dst_address->as_u8, sizeof (key.dst)); - - /* When adding a tunnel: - * - If a "name" is given, it must not exist. - * - The "key" is always checked, and must not exist. - * When deleting a tunnel: - * - If the "name" is given, and it exists, then use it. - * - If the "name" is not given, use the "key". - * - If the "name" and the "key" are given, then both must point to the same - * thing. - */ - - /* Lookup the key */ - p = hash_get_mem (sm->tunnel_index_by_key, &key); - - /* If the name is given, look it up */ - if (a->name) - n = hash_get_mem (sm->tunnel_index_by_name, a->name); - else - n = 0; - - /* validate key/name parameters */ - if (!a->is_del) /* adding a tunnel */ - { - if (a->name && n) /* name given & exists already */ - return -1; - if (p) /* key exists already */ - return -1; - } - else /* deleting a tunnel */ - { - if (!p) /* key doesn't exist */ - return -2; - if (a->name && !n) /* name given & it doesn't exist */ - return -2; - - if (n) /* name given & found */ - { - if (n[0] != p[0]) /* name and key do not point to the same thing */ - return -2; - } - } - - - if (a->is_del) /* delete the tunnel */ - { - hash_pair_t *hp; - - /* Delete existing tunnel */ - t = pool_elt_at_index (sm->tunnels, p[0]); - - ip6_routes_add_del (t, 1); - - vec_free (t->rewrite); - /* Remove tunnel from any policy if associated */ - if (t->policy_index != ~0) - { - pt = pool_elt_at_index (sm->policies, t->policy_index); - for (i = 0; i < vec_len (pt->tunnel_indices); i++) - { - if (pt->tunnel_indices[i] == t - sm->tunnels) - { - vec_delete (pt->tunnel_indices, 1, i); - goto found; - } - } - clib_warning ("Tunnel index %d not found in policy_index %d", - t - sm->tunnels, pt - sm->policies); - found: - /* If this is last tunnel in the policy, clean up the policy too */ - if (vec_len (pt->tunnel_indices) == 0) - { - hash_unset_mem (sm->policy_index_by_policy_name, pt->name); - vec_free (pt->name); - pool_put (sm->policies, pt); - } - } - - /* Clean up the tunnel by name */ - if (t->name) - { - hash_unset_mem (sm->tunnel_index_by_name, t->name); - vec_free (t->name); - } - dpo_reset (&t->first_hop_dpo); - pool_put (sm->tunnels, t); - hp = hash_get_pair (sm->tunnel_index_by_key, &key); - key_copy = (void *) (hp->key); - hash_unset_mem (sm->tunnel_index_by_key, &key); - vec_free (key_copy); - return 0; - } - - /* create a new tunnel */ - pool_get (sm->tunnels, t); - memset (t, 0, sizeof (*t)); - t->policy_index = ~0; - fib_node_init (&t->node, sr_fib_node_type); - - clib_memcpy (&t->key, &key, sizeof (t->key)); - t->dst_mask_width = a->dst_mask_width; - t->rx_fib_index = rx_fib_index; - t->tx_fib_index = tx_fib_index; - - if (!vec_len (a->segments)) - /* there must be at least one segment... */ - return -4; - - /* The first specified hop goes right into the dst address */ - clib_memcpy (&t->first_hop, &a->segments[0], sizeof (ip6_address_t)); - - /* - * Create the sr header rewrite string - * The list of segments needs an extra slot for the ultimate destination - * which is taken from the packet we add the SRH to. - */ - header_length = sizeof (*h) + - sizeof (ip6_address_t) * (vec_len (a->segments) + 1 + vec_len (a->tags)); - - if (a->shared_secret) - { - /* Allocate a new key slot if we don't find the secret key */ - hmac_key_index_u32 = 0; - (void) find_or_add_shared_secret (sm, a->shared_secret, - &hmac_key_index_u32); - - /* Hey Vinz Clortho: Gozzer is pissed.. you're out of keys! */ - if (hmac_key_index_u32 >= 256) - return -5; - hmac_key_index = hmac_key_index_u32; - header_length += SHA256_DIGEST_LENGTH; - } - - vec_validate (t->rewrite, header_length - 1); - - h = (ip6_sr_header_t *) t->rewrite; - - h->protocol = 0xFF; /* we don't know yet */ - - h->length = (header_length / 8) - 1; - h->type = ROUTING_HEADER_TYPE_SR; - - /* first_segment and segments_left need to have the index of the last - * element in the list; a->segments has one element less than ends up - * in the header (it does not have the DA in it), so vec_len(a->segments) - * is the value we want. - */ - h->first_segment = h->segments_left = vec_len (a->segments); - - if (a->shared_secret) - h->hmac_key = hmac_key_index & 0xFF; - - h->flags = a->flags_net_byte_order; - - /* Paint on the segment list, in reverse. - * This is offset by one to leave room at the start for the ultimate - * destination. - */ - addrp = h->segments + vec_len (a->segments); - - vec_foreach (this_address, a->segments) - { - clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); - addrp--; - } - - /* - * Since the ultimate destination address is not yet known, set that slot - * to a value we will instantly recognize as bogus. - */ - memset (h->segments, 0xfe, sizeof (ip6_address_t)); - - /* Paint on the tag list, not reversed */ - addrp = h->segments + vec_len (a->segments); - - vec_foreach (this_address, a->tags) - { - clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); - addrp++; - } - - key_copy = vec_new (ip6_sr_tunnel_key_t, 1); - clib_memcpy (key_copy, &key, sizeof (ip6_sr_tunnel_key_t)); - hash_set_mem (sm->tunnel_index_by_key, key_copy, t - sm->tunnels); - - /* - * Stick the tunnel index into the rewrite header. - * - * Unfortunately, inserting an SR header according to the various - * RFC's requires parsing through the ip6 header, perhaps consing a - * buffer onto the head of the vlib_buffer_t, etc. We don't use the - * normal reverse bcopy rewrite code. - * - * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain - * at some point... - */ - - /* - * Add the routes for the tunnel destination and first-hop, then stack - * the tunnel on the appropriate forwarding DPOs. - */ - ip6_routes_add_del (t, 0); - sr_tunnel_stack (t); - - if (a->policy_name) - { - p = hash_get_mem (sm->policy_index_by_policy_name, a->policy_name); - if (p) - { - pt = pool_elt_at_index (sm->policies, p[0]); - } - else /* no policy, lets create one */ - { - pool_get (sm->policies, pt); - memset (pt, 0, sizeof (*pt)); - pt->name = format (0, "%s%c", a->policy_name, 0); - hash_set_mem (sm->policy_index_by_policy_name, pt->name, - pt - sm->policies); - p = hash_get_mem (sm->policy_index_by_policy_name, a->policy_name); - } - vec_add1 (pt->tunnel_indices, t - sm->tunnels); - if (p == 0) - clib_warning ("p is NULL!"); - t->policy_index = p ? p[0] : ~0; /* equiv. to (pt - sm->policies) */ - } - - if (a->name) - { - t->name = format (0, "%s%c", a->name, 0); - hash_set_mem (sm->tunnel_index_by_name, t->name, t - sm->tunnels); - } - - return 0; -} /** * @brief no-op lock function. * The lifetime of the SR entry is managed by the control plane */ -static void +void sr_dpo_lock (dpo_id_t * dpo) { } @@ -1208,2227 +43,15 @@ sr_dpo_lock (dpo_id_t * dpo) * @brief no-op unlock function. * The lifetime of the SR entry is managed by the control plane */ -static void +void sr_dpo_unlock (dpo_id_t * dpo) { } -u8 * -format_sr_dpo (u8 * s, va_list * args) -{ - index_t index = va_arg (*args, index_t); - CLIB_UNUSED (u32 indent) = va_arg (*args, u32); - - return (format (s, "SR: tunnel:[%d]", index)); -} - -const static dpo_vft_t sr_dpo_vft = { - .dv_lock = sr_dpo_lock, - .dv_unlock = sr_dpo_unlock, - .dv_format = format_sr_dpo, -}; - -const static char *const sr_ip6_nodes[] = { - "sr-rewrite", - NULL, -}; - -const static char *const *const sr_nodes[DPO_PROTO_NUM] = { - [DPO_PROTO_IP6] = sr_ip6_nodes, -}; - -static ip6_sr_tunnel_t * -sr_tunnel_from_fib_node (fib_node_t * node) -{ -#if (CLIB_DEBUG > 0) - ASSERT (sr_fib_node_type == node->fn_type); -#endif - return ((ip6_sr_tunnel_t *) (((char *) node) - - STRUCT_OFFSET_OF (ip6_sr_tunnel_t, node))); -} - -/** - * Function definition to backwalk a FIB node - */ -static fib_node_back_walk_rc_t -sr_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx) -{ - sr_tunnel_stack (sr_tunnel_from_fib_node (node)); - - return (FIB_NODE_BACK_WALK_CONTINUE); -} - -/** - * Function definition to get a FIB node from its index - */ -static fib_node_t * -sr_tunnel_fib_node_get (fib_node_index_t index) -{ - ip6_sr_tunnel_t *st; - ip6_sr_main_t *sm; - - sm = &sr_main; - st = pool_elt_at_index (sm->tunnels, index); - - return (&st->node); -} - -/** - * Function definition to inform the FIB node that its last lock has gone. - */ -static void -sr_tunnel_last_lock_gone (fib_node_t * node) -{ - /* - * The SR tunnel is a root of the graph. As such - * it never has children and thus is never locked. - */ - ASSERT (0); -} - /* - * Virtual function table registered by SR tunnels - * for participation in the FIB object graph. - */ -const static fib_node_vft_t sr_fib_vft = { - .fnv_get = sr_tunnel_fib_node_get, - .fnv_last_lock = sr_tunnel_last_lock_gone, - .fnv_back_walk = sr_tunnel_back_walk, -}; - -/** - * @brief CLI parser for Add or Delete a Segment Routing tunnel. - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -sr_add_del_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - int is_del = 0; - ip6_address_t src_address; - int src_address_set = 0; - ip6_address_t dst_address; - u32 dst_mask_width; - int dst_address_set = 0; - u16 flags = 0; - u8 *shared_secret = 0; - u8 *name = 0; - u8 *policy_name = 0; - u32 rx_table_id = 0; - u32 tx_table_id = 0; - ip6_address_t *segments = 0; - ip6_address_t *this_seg; - ip6_address_t *tags = 0; - ip6_address_t *this_tag; - ip6_sr_add_del_tunnel_args_t _a, *a = &_a; - ip6_address_t next_address, tag; - int pl_index; - int rv; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "del")) - is_del = 1; - else if (unformat (input, "rx-fib-id %d", &rx_table_id)) - ; - else if (unformat (input, "tx-fib-id %d", &tx_table_id)) - ; - else if (unformat (input, "src %U", unformat_ip6_address, &src_address)) - src_address_set = 1; - else if (unformat (input, "name %s", &name)) - ; - else if (unformat (input, "policy %s", &policy_name)) - ; - else if (unformat (input, "dst %U/%d", - unformat_ip6_address, &dst_address, &dst_mask_width)) - dst_address_set = 1; - else if (unformat (input, "next %U", unformat_ip6_address, - &next_address)) - { - vec_add2 (segments, this_seg, 1); - clib_memcpy (this_seg->as_u8, next_address.as_u8, - sizeof (*this_seg)); - } - else if (unformat (input, "tag %U", unformat_ip6_address, &tag)) - { - vec_add2 (tags, this_tag, 1); - clib_memcpy (this_tag->as_u8, tag.as_u8, sizeof (*this_tag)); - } - else if (unformat (input, "clean")) - flags |= IP6_SR_HEADER_FLAG_CLEANUP; - else if (unformat (input, "protected")) - flags |= IP6_SR_HEADER_FLAG_PROTECTED; - else if (unformat (input, "key %s", &shared_secret)) - /* Do not include the trailing NULL byte. Guaranteed interop issue */ - _vec_len (shared_secret) -= 1; - else if (unformat (input, "InPE %d", &pl_index)) - { - if (pl_index <= 0 || pl_index > 4) - { - pl_index_range_error: - return clib_error_return - (0, "Policy List Element Index %d out of range (1-4)", - pl_index); - - } - flags |= IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE - << ip6_sr_policy_list_shift_from_index (pl_index); - } - else if (unformat (input, "EgPE %d", &pl_index)) - { - if (pl_index <= 0 || pl_index > 4) - goto pl_index_range_error; - flags |= IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE - << ip6_sr_policy_list_shift_from_index (pl_index); - } - else if (unformat (input, "OrgSrc %d", &pl_index)) - { - if (pl_index <= 0 || pl_index > 4) - goto pl_index_range_error; - flags |= IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR - << ip6_sr_policy_list_shift_from_index (pl_index); - } - else - break; - } - - if (!src_address_set) - return clib_error_return (0, "src address required"); - - if (!dst_address_set) - return clib_error_return (0, "dst address required"); - - if (!segments) - return clib_error_return (0, "at least one sr segment required"); - - memset (a, 0, sizeof (*a)); - a->src_address = &src_address; - a->dst_address = &dst_address; - a->dst_mask_width = dst_mask_width; - a->segments = segments; - a->tags = tags; - a->flags_net_byte_order = clib_host_to_net_u16 (flags); - a->is_del = is_del; - a->rx_table_id = rx_table_id; - a->tx_table_id = tx_table_id; - a->shared_secret = shared_secret; - - if (vec_len (name)) - a->name = name; - else - a->name = 0; - - if (vec_len (policy_name)) - a->policy_name = policy_name; - else - a->policy_name = 0; - - rv = ip6_sr_add_del_tunnel (a); - - vec_free (segments); - vec_free (tags); - vec_free (shared_secret); - - switch (rv) - { - case 0: - break; - - case -1: - return clib_error_return (0, "SR tunnel src %U dst %U already exists", - format_ip6_address, &src_address, - format_ip6_address, &dst_address); - - case -2: - return clib_error_return (0, "SR tunnel src %U dst %U does not exist", - format_ip6_address, &src_address, - format_ip6_address, &dst_address); - - case -3: - return clib_error_return (0, "FIB table %d does not exist", - rx_table_id); - - case -4: - return clib_error_return (0, "At least one segment is required"); - - default: - return clib_error_return (0, "BUG: ip6_sr_add_del_tunnel returns %d", - rv); - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (sr_tunnel_command, static) = { - .path = "sr tunnel", - .short_help = - "sr tunnel [del] [name ] src dst [next ] " - "[clean] [reroute] [key ] [policy ]" - "[rx-fib-id ] [tx-fib-id ]", - .function = sr_add_del_tunnel_command_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Display Segment Routing tunnel - * - * @param vm vlib_main_t * - * @param t ip6_sr_tunnel_t * - * - */ -void -ip6_sr_tunnel_display (vlib_main_t * vm, ip6_sr_tunnel_t * t) -{ - ip6_sr_main_t *sm = &sr_main; - ip6_fib_t *rx_fib, *tx_fib; - ip6_sr_policy_t *pt; - - rx_fib = ip6_fib_get (t->rx_fib_index); - tx_fib = ip6_fib_get (t->tx_fib_index); - - if (t->name) - vlib_cli_output (vm, "sr tunnel name: %s", (char *) t->name); - - vlib_cli_output (vm, "src %U dst %U first hop %U", - format_ip6_address, &t->key.src, - format_ip6_address, &t->key.dst, - format_ip6_address, &t->first_hop); - vlib_cli_output (vm, " rx-fib-id %d tx-fib-id %d", - rx_fib->table_id, tx_fib->table_id); - vlib_cli_output (vm, " sr: %U", format_ip6_sr_header, t->rewrite, - 0 /* print_hmac */ ); - - if (t->policy_index != ~0) - { - pt = pool_elt_at_index (sm->policies, t->policy_index); - vlib_cli_output (vm, "sr policy: %s", (char *) pt->name); - } - vlib_cli_output (vm, "-------"); - - return; -} - -/** - * @brief CLI Parser for Display Segment Routing tunnel - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -show_sr_tunnel_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - static ip6_sr_tunnel_t **tunnels; - ip6_sr_tunnel_t *t; - ip6_sr_main_t *sm = &sr_main; - int i; - uword *p = 0; - u8 *name = 0; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "name %s", &name)) - { - p = hash_get_mem (sm->tunnel_index_by_name, name); - if (!p) - vlib_cli_output (vm, "No SR tunnel with name: %s. Showing all.", - name); - } - else - break; - } - - vec_reset_length (tunnels); - - if (!p) /* Either name parm not passed or no tunnel with that name found, show all */ - { - /* *INDENT-OFF* */ - pool_foreach (t, sm->tunnels, - ({ - vec_add1 (tunnels, t); - })); - /* *INDENT-ON* */ - } - else /* Just show the one tunnel by name */ - vec_add1 (tunnels, &sm->tunnels[p[0]]); - - if (vec_len (tunnels) == 0) - vlib_cli_output (vm, "No SR tunnels configured"); - - for (i = 0; i < vec_len (tunnels); i++) - { - t = tunnels[i]; - ip6_sr_tunnel_display (vm, t); - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_sr_tunnel_command, static) = { - .path = "show sr tunnel", - .short_help = "show sr tunnel [name ]", - .function = show_sr_tunnel_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Add or Delete a Segment Routing policy - * - * @param a ip6_sr_add_del_policy_args_t * - * - * @return retval int - */ -int -ip6_sr_add_del_policy (ip6_sr_add_del_policy_args_t * a) -{ - ip6_sr_main_t *sm = &sr_main; - uword *p; - ip6_sr_tunnel_t *t = 0; - ip6_sr_policy_t *policy; - u32 *tunnel_indices = 0; - int i; - - - - if (a->is_del) - { - p = hash_get_mem (sm->policy_index_by_policy_name, a->name); - if (!p) - return -6; /* policy name not found */ - - policy = pool_elt_at_index (sm->policies, p[0]); - - vec_foreach_index (i, policy->tunnel_indices) - { - t = pool_elt_at_index (sm->tunnels, policy->tunnel_indices[i]); - t->policy_index = ~0; - } - hash_unset_mem (sm->policy_index_by_policy_name, a->name); - pool_put (sm->policies, policy); - return 0; - } - - - if (!vec_len (a->tunnel_names)) - return -3; /*tunnel name is required case */ - - vec_reset_length (tunnel_indices); - /* Check tunnel names, add tunnel_index to policy */ - for (i = 0; i < vec_len (a->tunnel_names); i++) - { - p = hash_get_mem (sm->tunnel_index_by_name, a->tunnel_names[i]); - if (!p) - return -4; /* tunnel name not found case */ - - t = pool_elt_at_index (sm->tunnels, p[0]); - /* - No need to check t==0. -3 condition above ensures name - */ - if (t->policy_index != ~0) - return -5; /* tunnel name already associated with a policy */ - - /* Add to tunnel indicies */ - vec_add1 (tunnel_indices, p[0]); - } - - /* Add policy to ip6_sr_main_t */ - pool_get (sm->policies, policy); - policy->name = a->name; - policy->tunnel_indices = tunnel_indices; - hash_set_mem (sm->policy_index_by_policy_name, policy->name, - policy - sm->policies); - - /* Yes, this could be construed as overkill but the last thing you should do is set - the policy_index on the tunnel after everything is set in ip6_sr_main_t. - If this is deemed overly cautious, could set this in the vec_len(tunnel_names) loop. - */ - for (i = 0; i < vec_len (policy->tunnel_indices); i++) - { - t = pool_elt_at_index (sm->tunnels, policy->tunnel_indices[i]); - t->policy_index = policy - sm->policies; - } - - return 0; -} - -/** - * @brief CLI Parser for Add or Delete a Segment Routing policy - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -sr_add_del_policy_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - int is_del = 0; - u8 **tunnel_names = 0; - u8 *tunnel_name = 0; - u8 *name = 0; - ip6_sr_add_del_policy_args_t _a, *a = &_a; - int rv; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "del")) - is_del = 1; - else if (unformat (input, "name %s", &name)) - ; - else if (unformat (input, "tunnel %s", &tunnel_name)) - { - if (tunnel_name) - { - vec_add1 (tunnel_names, tunnel_name); - tunnel_name = 0; - } - } - else - break; - } - - if (!name) - return clib_error_return (0, "name of SR policy required"); - - - memset (a, 0, sizeof (*a)); - - a->is_del = is_del; - a->name = name; - a->tunnel_names = tunnel_names; - - rv = ip6_sr_add_del_policy (a); - - vec_free (tunnel_names); - - switch (rv) - { - case 0: - break; - - case -3: - return clib_error_return (0, - "tunnel name to associate to SR policy is required"); - - case -4: - return clib_error_return (0, "tunnel name not found"); - - case -5: - return clib_error_return (0, "tunnel already associated with policy"); - - case -6: - return clib_error_return (0, "policy name %s not found", name); - - case -7: - return clib_error_return (0, "TODO: deleting policy name %s", name); - - default: - return clib_error_return (0, "BUG: ip6_sr_add_del_policy returns %d", - rv); - - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (sr_policy_command, static) = { - .path = "sr policy", - .short_help = - "sr policy [del] name tunnel [tunnel ]*", - .function = sr_add_del_policy_command_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief CLI Parser for Displaying Segment Routing policy - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -show_sr_policy_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - static ip6_sr_policy_t **policies; - ip6_sr_policy_t *policy; - ip6_sr_tunnel_t *t; - ip6_sr_main_t *sm = &sr_main; - int i, j; - uword *p = 0; - u8 *name = 0; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "name %s", &name)) - { - p = hash_get_mem (sm->policy_index_by_policy_name, name); - if (!p) - vlib_cli_output (vm, - "policy with name %s not found. Showing all.", - name); - } - else - break; - } - - vec_reset_length (policies); - - if (!p) /* Either name parm not passed or no policy with that name found, show all */ - { - /* *INDENT-OFF* */ - pool_foreach (policy, sm->policies, - ({ - vec_add1 (policies, policy); - })); - /* *INDENT-ON* */ - } - else /* Just show the one policy by name and a summary of tunnel names */ - { - policy = pool_elt_at_index (sm->policies, p[0]); - vec_add1 (policies, policy); - } - - if (vec_len (policies) == 0) - vlib_cli_output (vm, "No SR policies configured"); - - for (i = 0; i < vec_len (policies); i++) - { - policy = policies[i]; - - if (policy->name) - vlib_cli_output (vm, "SR policy name: %s", (char *) policy->name); - for (j = 0; j < vec_len (policy->tunnel_indices); j++) - { - t = pool_elt_at_index (sm->tunnels, policy->tunnel_indices[j]); - ip6_sr_tunnel_display (vm, t); - } - } - - return 0; - -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_sr_policy_command, static) = { - .path = "show sr policy", - .short_help = "show sr policy [name ]", - .function = show_sr_policy_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Add or Delete a mapping of IP6 multicast address - * to Segment Routing policy. - * - * @param a ip6_sr_add_del_multicastmap_args_t * - * - * @return retval int - */ -int -ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a) -{ - uword *p; - ip6_sr_tunnel_t *t; - ip6_sr_main_t *sm = &sr_main; - ip6_sr_policy_t *pt; - index_t rep; - u32 ii; - - if (a->is_del) - { - /* clean up the adjacency */ - p = - hash_get_mem (sm->policy_index_by_multicast_address, - a->multicast_address); - } - else - { - /* Get our policy by policy_name */ - p = hash_get_mem (sm->policy_index_by_policy_name, a->policy_name); - - } - if (!p) - return -1; - - pt = pool_elt_at_index (sm->policies, p[0]); - - /* - Get the first tunnel associated with policy populate the fib adjacency. - From there, since this tunnel will have it's policy_index != ~0 it will - be the trigger in the dual_loop to pull up the policy and make a copy-rewrite - for each tunnel in the policy - */ - - t = pool_elt_at_index (sm->tunnels, pt->tunnel_indices[0]); - - /* - * Stick the tunnel index into the rewrite header. - * - * Unfortunately, inserting an SR header according to the various - * RFC's requires parsing through the ip6 header, perhaps consing a - * buffer onto the head of the vlib_buffer_t, etc. We don't use the - * normal reverse bcopy rewrite code. - * - * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain - * at some point... - */ - - /* - * Construct an mFIB entry for the multicast address, - * using the rx/tx fib from the first tunnel. - * There is no RPF information for this address (I need to discuss this with - * Pablo), so for now accept from anywhere... - */ - /* *INDENT-OFF* */ - mfib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = 128, - .fp_grp_addr = { - .ip6 = *a->multicast_address, - } - }; - /* *INDENT-ON* */ - - if (a->is_del) - mfib_table_entry_delete (t->rx_fib_index, &pfx, MFIB_SOURCE_SRv6); - else - { - /* - * Construct a replicate DPO that will replicate received packets over - * each tunnel in the policy - */ - dpo_id_t dpo = DPO_INVALID; - - rep = replicate_create (vec_len (pt->tunnel_indices), DPO_PROTO_IP6); - - vec_foreach_index (ii, pt->tunnel_indices) - { - dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, pt->tunnel_indices[ii]); - - replicate_set_bucket (rep, ii, &dpo); - } - - mfib_table_entry_special_add (t->rx_fib_index, - &pfx, - MFIB_SOURCE_SRv6, - MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF, rep); - - dpo_reset (&dpo); - } - - u8 *mcast_copy = 0; - mcast_copy = vec_new (ip6_address_t, 1); - memcpy (mcast_copy, a->multicast_address, sizeof (ip6_address_t)); - - if (a->is_del) - { - hash_unset_mem (sm->policy_index_by_multicast_address, mcast_copy); - vec_free (mcast_copy); - } - else - { - hash_set_mem (sm->policy_index_by_multicast_address, mcast_copy, - pt - sm->policies); - } - - return 0; -} - -/** - * @brief CLI Parser for Adding or Delete a mapping of IP6 multicast address - * to Segment Routing policy. - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -sr_add_del_multicast_map_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - int is_del = 0; - ip6_address_t multicast_address; - u8 *policy_name = 0; - int multicast_address_set = 0; - ip6_sr_add_del_multicastmap_args_t _a, *a = &_a; - int rv; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "del")) - is_del = 1; - else - if (unformat - (input, "address %U", unformat_ip6_address, &multicast_address)) - multicast_address_set = 1; - else if (unformat (input, "sr-policy %s", &policy_name)) - ; - else - break; - } - - if (!is_del && !policy_name) - return clib_error_return (0, "name of sr policy required"); - - if (!multicast_address_set) - return clib_error_return (0, "multicast address required"); - - memset (a, 0, sizeof (*a)); - - a->is_del = is_del; - a->multicast_address = &multicast_address; - a->policy_name = policy_name; - - rv = ip6_sr_add_del_multicastmap (a); - - switch (rv) - { - case 0: - break; - case -1: - return clib_error_return (0, "no policy with name: %s", policy_name); - - case -2: - return clib_error_return (0, "multicast map someting "); - - case -3: - return clib_error_return (0, - "tunnel name to associate to SR policy is required"); - - case -7: - return clib_error_return (0, "TODO: deleting policy name %s", - policy_name); - - default: - return clib_error_return (0, "BUG: ip6_sr_add_del_policy returns %d", - rv); - - } - return 0; - -} - - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (sr_multicast_map_command, static) = { - .path = "sr multicast-map", - .short_help = - "sr multicast-map address sr-policy [del]", - .function = sr_add_del_multicast_map_command_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief CLI Parser for Displaying a mapping of IP6 multicast address - * to Segment Routing policy. - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -show_sr_multicast_map_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - u8 *key = 0; - u32 value; - ip6_address_t multicast_address; - ip6_sr_policy_t *pt; - - /* pull all entries from the hash table into vector for display */ - - /* *INDENT-OFF* */ - hash_foreach_mem (key, value, sm->policy_index_by_multicast_address, - ({ - if (!key) - vlib_cli_output (vm, "no multicast maps configured"); - else - { - multicast_address = *((ip6_address_t *)key); - pt = pool_elt_at_index (sm->policies, value); - if (pt) - { - vlib_cli_output (vm, "address: %U policy: %s", - format_ip6_address, &multicast_address, - pt->name); - } - else - vlib_cli_output (vm, "BUG: policy not found for address: %U with policy index %d", - format_ip6_address, &multicast_address, - value); - - } - - })); - /* *INDENT-ON* */ - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_sr_multicast_map_command, static) = { - .path = "show sr multicast-map", - .short_help = "show sr multicast-map", - .function = show_sr_multicast_map_fn, -}; -/* *INDENT-ON* */ - - -#define foreach_sr_fix_dst_addr_next \ -_(DROP, "error-drop") - -/** - * @brief Struct for valid next-nodes for SR fix destination address node - */ -typedef enum -{ -#define _(s,n) SR_FIX_DST_ADDR_NEXT_##s, - foreach_sr_fix_dst_addr_next -#undef _ - SR_FIX_DST_ADDR_N_NEXT, -} sr_fix_dst_addr_next_t; - -/** - * @brief Error strings for SR Fix Destination rewrite - */ -static char *sr_fix_dst_error_strings[] = { -#define sr_fix_dst_error(n,s) s, -#include "sr_fix_dst_error.def" -#undef sr_fix_dst_error -}; - -/** - * @brief Struct for errors for SR Fix Destination rewrite - */ -typedef enum -{ -#define sr_fix_dst_error(n,s) SR_FIX_DST_ERROR_##n, -#include "sr_fix_dst_error.def" -#undef sr_fix_dst_error - SR_FIX_DST_N_ERROR, -} sr_fix_dst_error_t; - -/** - * @brief Information for fix address trace - */ -typedef struct -{ - ip6_address_t src, dst; - u32 next_index; - u32 adj_index; - u8 sr[256]; -} sr_fix_addr_trace_t; - -/** - * @brief Formatter for fix address trace - */ -u8 * -format_sr_fix_addr_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - sr_fix_addr_trace_t *t = va_arg (*args, sr_fix_addr_trace_t *); - vnet_hw_interface_t *hi = 0; - ip_adjacency_t *adj; - ip6_main_t *im = &ip6_main; - ip_lookup_main_t *lm = &im->lookup_main; - vnet_main_t *vnm = vnet_get_main (); - - if (t->adj_index != ~0) - { - adj = ip_get_adjacency (lm, t->adj_index); - hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index); - } - - s = format (s, "SR-FIX_ADDR: next %s ip6 src %U dst %U\n", - (t->next_index == SR_FIX_DST_ADDR_NEXT_DROP) - ? "drop" : "output", - format_ip6_address, &t->src, format_ip6_address, &t->dst); - if (t->next_index != SR_FIX_DST_ADDR_NEXT_DROP) - { - s = - format (s, "%U\n", format_ip6_sr_header, t->sr, 1 /* print_hmac */ ); - s = - format (s, " output via %s", - hi ? (char *) (hi->name) : "Invalid adj"); - } - return s; -} - -/** - * @brief Fix SR destination address - dual-loop - * - * @node sr-fix-dst-addr - * @param vm vlib_main_t * - * @param node vlib_node_runtime_t * - * @param from_frame vlib_frame_t * - * - * @return from_frame->n_vectors uword - */ -static uword -sr_fix_dst_addr (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, *from, *to_next; - ip6_main_t *im = &ip6_main; - ip_lookup_main_t *lm = &im->lookup_main; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - -#if 0 - while (0 && n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - __attribute__ ((unused)) vlib_buffer_t *b0, *b1; - u32 next0 = SR_FIX_DST_ADDR_NEXT_DROP; - u32 next1 = SR_FIX_DST_ADDR_NEXT_DROP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } -#endif - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0; - ip_adjacency_t *adj0; - ip6_sr_header_t *sr0; - u32 next0 = SR_FIX_DST_ADDR_NEXT_DROP; - ip6_address_t *new_dst0; - ethernet_header_t *eh0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - adj0 = - ip_get_adjacency (lm, vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - next0 = adj0->if_address_index; - - /* We should be pointing at an Ethernet header... */ - eh0 = vlib_buffer_get_current (b0); - ip0 = (ip6_header_t *) (eh0 + 1); - sr0 = (ip6_sr_header_t *) (ip0 + 1); - - /* We'd better find an SR header... */ - if (PREDICT_FALSE (ip0->protocol != IPPROTO_IPV6_ROUTE)) - { - b0->error = node->errors[SR_FIX_DST_ERROR_NO_SR_HEADER]; - goto do_trace0; - } - else - { - /* - * We get here from sr_rewrite or sr_local, with - * sr->segments_left pointing at the (copy of the original) dst - * address. Use it, then increment sr0->segments_left. - */ - - /* Out of segments? Turf the packet */ - if (PREDICT_FALSE (sr0->segments_left == 0)) - { - b0->error = node->errors[SR_FIX_DST_ERROR_NO_MORE_SEGMENTS]; - goto do_trace0; - } - - /* - * Rewrite the packet with the original dst address - * We assume that the last segment (in processing order) contains - * the original dst address. The list is reversed, so sr0->segments - * contains the original dst address. - */ - new_dst0 = sr0->segments; - ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; - ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; - } - - do_trace0: - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_fix_addr_trace_t *t = vlib_add_trace (vm, node, - b0, sizeof (*t)); - t->next_index = next0; - t->adj_index = ~0; - - if (next0 != SR_FIX_DST_ADDR_NEXT_DROP) - { - t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - clib_memcpy (t->src.as_u8, ip0->src_address.as_u8, - sizeof (t->src.as_u8)); - clib_memcpy (t->dst.as_u8, ip0->dst_address.as_u8, - sizeof (t->dst.as_u8)); - clib_memcpy (t->sr, sr0, sizeof (t->sr)); - } - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return from_frame->n_vectors; -} - - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_fix_dst_addr_node) = { - .function = sr_fix_dst_addr, - .name = "sr-fix-dst-addr", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - .format_trace = format_sr_fix_addr_trace, - .format_buffer = format_ip6_sr_header_with_length, - - .runtime_data_bytes = 0, - - .n_errors = SR_FIX_DST_N_ERROR, - .error_strings = sr_fix_dst_error_strings, - - .n_next_nodes = SR_FIX_DST_ADDR_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_FIX_DST_ADDR_NEXT_##s] = n, - foreach_sr_fix_dst_addr_next -#undef _ - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (sr_fix_dst_addr_node, sr_fix_dst_addr) -/* *INDENT-ON* */ - -static clib_error_t * -sr_init (vlib_main_t * vm) -{ - ip6_sr_main_t *sm = &sr_main; - clib_error_t *error = 0; - vlib_node_t *ip6_lookup_node, *ip6_rewrite_node; - - if ((error = vlib_call_init_function (vm, ip_main_init))) - return error; - - if ((error = vlib_call_init_function (vm, ip6_lookup_init))) - return error; - - sm->vlib_main = vm; - sm->vnet_main = vnet_get_main (); - - vec_validate (sm->hmac_keys, 0); - sm->hmac_keys[0].shared_secret = (u8 *) 0xdeadbeef; - - sm->tunnel_index_by_key = - hash_create_mem (0, sizeof (ip6_sr_tunnel_key_t), sizeof (uword)); - - sm->tunnel_index_by_name = hash_create_string (0, sizeof (uword)); - - sm->policy_index_by_policy_name = hash_create_string (0, sizeof (uword)); - - sm->policy_index_by_multicast_address = - hash_create_mem (0, sizeof (ip6_address_t), sizeof (uword)); - - sm->hmac_key_by_shared_secret = hash_create_string (0, sizeof (uword)); - - ip6_register_protocol (IPPROTO_IPV6_ROUTE, sr_local_node.index); - - ip6_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip6-lookup"); - ASSERT (ip6_lookup_node); - - ip6_rewrite_node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite"); - ASSERT (ip6_rewrite_node); - - /* Add a disposition to ip6_rewrite for the sr dst address hack node */ - sm->ip6_rewrite_sr_next_index = - vlib_node_add_next (vm, ip6_rewrite_node->index, - sr_fix_dst_addr_node.index); - - OpenSSL_add_all_digests (); - - sm->md = (void *) EVP_get_digestbyname ("sha1"); - sm->hmac_ctx = clib_mem_alloc (sizeof (HMAC_CTX)); - - sr_dpo_type = dpo_register_new_type (&sr_dpo_vft, sr_nodes); - sr_fib_node_type = fib_node_register_new_type (&sr_fib_vft); - - return error; -} - -VLIB_INIT_FUNCTION (sr_init); - -/** - * @brief Definition of next-nodes for SR local - */ -#define foreach_sr_local_next \ - _ (ERROR, "error-drop") \ - _ (IP6_LOOKUP, "ip6-lookup") - -/** - * @brief Struct for definition of next-nodes for SR local - */ -typedef enum -{ -#define _(s,n) SR_LOCAL_NEXT_##s, - foreach_sr_local_next -#undef _ - SR_LOCAL_N_NEXT, -} sr_local_next_t; - -/** - * @brief Struct for packet trace of SR local - */ -typedef struct -{ - u8 next_index; - u8 sr_valid; - ip6_address_t src, dst; - u16 length; - u8 sr[256]; -} sr_local_trace_t; - -/** - * @brief Definition of SR local error-strings - */ -static char *sr_local_error_strings[] = { -#define sr_error(n,s) s, -#include "sr_error.def" -#undef sr_error -}; - -/** - * @brief Struct for definition of SR local error-strings - */ -typedef enum -{ -#define sr_error(n,s) SR_LOCAL_ERROR_##n, -#include "sr_error.def" -#undef sr_error - SR_LOCAL_N_ERROR, -} sr_local_error_t; - -/** - * @brief Format SR local trace - * - * @param s u8 * - * @param args va_list * - * - * @return s u8 * - */ -u8 * -format_sr_local_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - sr_local_trace_t *t = va_arg (*args, sr_local_trace_t *); - - s = format (s, "SR-LOCAL: src %U dst %U len %u next_index %d", - format_ip6_address, &t->src, - format_ip6_address, &t->dst, t->length, t->next_index); - if (t->sr_valid) - s = - format (s, "\n %U", format_ip6_sr_header, t->sr, 1 /* print_hmac */ ); - else - s = format (s, "\n popped SR header"); - - return s; -} - - -/* $$$$ fixme: smp, don't copy data, cache input, output (maybe) */ -/** - * @brief Validate the SR HMAC - * - * @param sm ip6_sr_main_t * - * @param ip ip6_header_t * - * @param sr ip6_sr_header_t * - * - * @return retval int - */ -static int -sr_validate_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, ip6_sr_header_t * sr) -{ - u32 key_index; - static u8 *keybuf; - u8 *copy_target; - int first_segment; - ip6_address_t *addrp; - int i; - ip6_sr_hmac_key_t *hmac_key; - static u8 *signature; - u32 sig_len; - - key_index = sr->hmac_key; - - /* No signature? Pass... */ - if (key_index == 0) - return 0; - - /* We don't know about this key? Fail... */ - if (key_index >= vec_len (sm->hmac_keys)) - return 1; - - vec_validate (signature, SHA256_DIGEST_LENGTH - 1); - - hmac_key = sm->hmac_keys + key_index; - - vec_reset_length (keybuf); - - /* pkt ip6 src address */ - vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); - clib_memcpy (copy_target, ip->src_address.as_u8, sizeof (ip6_address_t)); - - /* last segment */ - vec_add2 (keybuf, copy_target, 1); - copy_target[0] = sr->first_segment; - - /* octet w/ bit 0 = "clean" flag */ - vec_add2 (keybuf, copy_target, 1); - copy_target[0] - = (sr->flags & clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)) - ? 0x80 : 0; - - /* hmac key id */ - vec_add2 (keybuf, copy_target, 1); - copy_target[0] = sr->hmac_key; - - first_segment = sr->first_segment; - - addrp = sr->segments; - - /* segments */ - for (i = 0; i <= first_segment; i++) - { - vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); - clib_memcpy (copy_target, addrp->as_u8, sizeof (ip6_address_t)); - addrp++; - } - - if (sm->is_debug) - clib_warning ("verify key index %d keybuf: %U", key_index, - format_hex_bytes, keybuf, vec_len (keybuf)); - - /* shared secret */ - - /* SHA1 is shorter than SHA-256 */ - memset (signature, 0, vec_len (signature)); - - HMAC_CTX_init (sm->hmac_ctx); - if (!HMAC_Init (sm->hmac_ctx, hmac_key->shared_secret, - vec_len (hmac_key->shared_secret), sm->md)) - clib_warning ("barf1"); - if (!HMAC_Update (sm->hmac_ctx, keybuf, vec_len (keybuf))) - clib_warning ("barf2"); - if (!HMAC_Final (sm->hmac_ctx, signature, &sig_len)) - clib_warning ("barf3"); - HMAC_CTX_cleanup (sm->hmac_ctx); - - if (sm->is_debug) - clib_warning ("computed signature len %d, value %U", sig_len, - format_hex_bytes, signature, vec_len (signature)); - - /* Point at the SHA signature in the packet */ - addrp++; - if (sm->is_debug) - clib_warning ("read signature %U", format_hex_bytes, addrp, - SHA256_DIGEST_LENGTH); - - return memcmp (signature, addrp, SHA256_DIGEST_LENGTH); -} - -/** - * @brief SR local node - * @node sr-local - * - * @param vm vlib_main_t * - * @param node vlib_node_runtime_t * - * @param from_frame vlib_frame_t * - * - * @return from_frame->n_vectors uword - */ -static uword -sr_local (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, *from, *to_next; - ip6_sr_main_t *sm = &sr_main; - u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *, - vlib_buffer_t *, ip6_header_t *, ip6_sr_header_t *); - sr_local_cb = sm->sr_local_cb; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - ip6_header_t *ip0, *ip1; - ip6_sr_header_t *sr0, *sr1; - ip6_address_t *new_dst0, *new_dst1; - u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP; - u32 next1 = SR_LOCAL_NEXT_IP6_LOOKUP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - - b0 = vlib_get_buffer (vm, bi0); - ip0 = vlib_buffer_get_current (b0); - sr0 = (ip6_sr_header_t *) (ip0 + 1); - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - sr0 = - (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *) - ext_hdr); - } - - if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR)) - { - next0 = SR_LOCAL_NEXT_ERROR; - b0->error = - node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE]; - goto do_trace0; - } - - /* Out of segments? Turf the packet */ - if (PREDICT_FALSE (sr0->segments_left == 0)) - { - next0 = SR_LOCAL_NEXT_ERROR; - b0->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS]; - goto do_trace0; - } - - if (PREDICT_FALSE (sm->validate_hmac)) - { - if (sr_validate_hmac (sm, ip0, sr0)) - { - next0 = SR_LOCAL_NEXT_ERROR; - b0->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID]; - goto do_trace0; - } - } - - next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : next0; - - /* - * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx - */ - if (PREDICT_FALSE (next0 & 0x80000000)) - { - next0 ^= 0xFFFFFFFF; - if (PREDICT_FALSE (next0 == SR_LOCAL_NEXT_ERROR)) - b0->error = node->errors[SR_LOCAL_ERROR_APP_CALLBACK]; - } - else - { - u32 segment_index0; - - segment_index0 = sr0->segments_left - 1; - - /* Rewrite the packet */ - new_dst0 = (ip6_address_t *) (sr0->segments + segment_index0); - ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; - ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; - - if (PREDICT_TRUE (sr0->segments_left > 0)) - sr0->segments_left -= 1; - } - - /* End of the path. Clean up the SR header, or not */ - if (PREDICT_FALSE - (sr0->segments_left == 0 && - (sr0->flags & - clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)))) - { - u64 *copy_dst0, *copy_src0; - u16 new_l0; - u32 copy_len_u64s0 = 0; - int i; - - /* - * Copy the ip6 header right by the (real) length of the - * sr header. - */ - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - copy_len_u64s0 = - (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1; - ext_hdr->next_hdr = sr0->protocol; - } - else - { - ip0->protocol = sr0->protocol; - } - vlib_buffer_advance (b0, (sr0->length + 1) * 8); - - new_l0 = clib_net_to_host_u16 (ip0->payload_length) - - (sr0->length + 1) * 8; - ip0->payload_length = clib_host_to_net_u16 (new_l0); - - copy_src0 = (u64 *) ip0; - copy_dst0 = copy_src0 + (sr0->length + 1); - - copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0]; - copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0]; - copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0]; - copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0]; - copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0]; - - for (i = copy_len_u64s0 - 1; i >= 0; i--) - { - copy_dst0[i] = copy_src0[i]; - } - - sr0 = 0; - } - - do_trace0: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_local_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - tr->length = vlib_buffer_length_in_chain (vm, b0); - tr->next_index = next0; - tr->sr_valid = sr0 != 0; - if (tr->sr_valid) - clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); - } - - b1 = vlib_get_buffer (vm, bi1); - ip1 = vlib_buffer_get_current (b1); - sr1 = (ip6_sr_header_t *) (ip1 + 1); - if (PREDICT_FALSE - (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1); - sr1 = - (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *) - ext_hdr); - } - - if (PREDICT_FALSE (sr1->type != ROUTING_HEADER_TYPE_SR)) - { - next1 = SR_LOCAL_NEXT_ERROR; - b1->error = - node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE]; - goto do_trace1; - } - - /* Out of segments? Turf the packet */ - if (PREDICT_FALSE (sr1->segments_left == 0)) - { - next1 = SR_LOCAL_NEXT_ERROR; - b1->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS]; - goto do_trace1; - } - - if (PREDICT_FALSE (sm->validate_hmac)) - { - if (sr_validate_hmac (sm, ip1, sr1)) - { - next1 = SR_LOCAL_NEXT_ERROR; - b1->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID]; - goto do_trace1; - } - } - - next1 = sr_local_cb ? sr_local_cb (vm, node, b1, ip1, sr1) : next1; - - /* - * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx - */ - if (PREDICT_FALSE (next1 & 0x80000000)) - { - next1 ^= 0xFFFFFFFF; - if (PREDICT_FALSE (next1 == SR_LOCAL_NEXT_ERROR)) - b1->error = node->errors[SR_LOCAL_ERROR_APP_CALLBACK]; - } - else - { - u32 segment_index1; - - segment_index1 = sr1->segments_left - 1; - - /* Rewrite the packet */ - new_dst1 = (ip6_address_t *) (sr1->segments + segment_index1); - ip1->dst_address.as_u64[0] = new_dst1->as_u64[0]; - ip1->dst_address.as_u64[1] = new_dst1->as_u64[1]; - - if (PREDICT_TRUE (sr1->segments_left > 0)) - sr1->segments_left -= 1; - } - - /* End of the path. Clean up the SR header, or not */ - if (PREDICT_FALSE - (sr1->segments_left == 0 && - (sr1->flags & - clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)))) - { - u64 *copy_dst1, *copy_src1; - u16 new_l1; - u32 copy_len_u64s1 = 0; - int i; - - /* - * Copy the ip6 header right by the (real) length of the - * sr header. - */ - if (PREDICT_FALSE - (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1); - copy_len_u64s1 = - (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1; - ext_hdr->next_hdr = sr1->protocol; - } - else - { - ip1->protocol = sr1->protocol; - } - vlib_buffer_advance (b1, (sr1->length + 1) * 8); - - new_l1 = clib_net_to_host_u16 (ip1->payload_length) - - (sr1->length + 1) * 8; - ip1->payload_length = clib_host_to_net_u16 (new_l1); - - copy_src1 = (u64 *) ip1; - copy_dst1 = copy_src1 + (sr1->length + 1); - - copy_dst1[4 + copy_len_u64s1] = copy_src1[4 + copy_len_u64s1]; - copy_dst1[3 + copy_len_u64s1] = copy_src1[3 + copy_len_u64s1]; - copy_dst1[2 + copy_len_u64s1] = copy_src1[2 + copy_len_u64s1]; - copy_dst1[1 + copy_len_u64s1] = copy_src1[1 + copy_len_u64s1]; - copy_dst1[0 + copy_len_u64s1] = copy_src1[0 + copy_len_u64s1]; - - for (i = copy_len_u64s1 - 1; i >= 0; i--) - { - copy_dst1[i] = copy_src1[i]; - } - - sr1 = 0; - } - - do_trace1: - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_local_trace_t *tr = vlib_add_trace (vm, node, - b1, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - tr->length = vlib_buffer_length_in_chain (vm, b1); - tr->next_index = next1; - tr->sr_valid = sr1 != 0; - if (tr->sr_valid) - clib_memcpy (tr->sr, sr1, sizeof (tr->sr)); - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0 = 0; - ip6_sr_header_t *sr0; - ip6_address_t *new_dst0; - u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - ip0 = vlib_buffer_get_current (b0); - sr0 = (ip6_sr_header_t *) (ip0 + 1); - - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - sr0 = - (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *) - ext_hdr); - } - if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR)) - { - next0 = SR_LOCAL_NEXT_ERROR; - b0->error = - node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE]; - goto do_trace; - } - - /* Out of segments? Turf the packet */ - if (PREDICT_FALSE (sr0->segments_left == 0)) - { - next0 = SR_LOCAL_NEXT_ERROR; - b0->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS]; - goto do_trace; - } - - if (PREDICT_FALSE (sm->validate_hmac)) - { - if (sr_validate_hmac (sm, ip0, sr0)) - { - next0 = SR_LOCAL_NEXT_ERROR; - b0->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID]; - goto do_trace; - } - } - - next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : next0; - - /* - * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx - */ - if (PREDICT_FALSE (next0 & 0x80000000)) - { - next0 ^= 0xFFFFFFFF; - if (PREDICT_FALSE (next0 == SR_LOCAL_NEXT_ERROR)) - b0->error = node->errors[SR_LOCAL_ERROR_APP_CALLBACK]; - } - else - { - u32 segment_index0; - - segment_index0 = sr0->segments_left - 1; - - /* Rewrite the packet */ - new_dst0 = (ip6_address_t *) (sr0->segments + segment_index0); - ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; - ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; - - if (PREDICT_TRUE (sr0->segments_left > 0)) - sr0->segments_left -= 1; - } - - /* End of the path. Clean up the SR header, or not */ - if (PREDICT_FALSE - (sr0->segments_left == 0 && - (sr0->flags & - clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)))) - { - u64 *copy_dst0, *copy_src0; - u16 new_l0; - u32 copy_len_u64s0 = 0; - int i; - - /* - * Copy the ip6 header right by the (real) length of the - * sr header. - */ - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - copy_len_u64s0 = - (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1; - ext_hdr->next_hdr = sr0->protocol; - } - else - { - ip0->protocol = sr0->protocol; - } - - vlib_buffer_advance (b0, (sr0->length + 1) * 8); - - new_l0 = clib_net_to_host_u16 (ip0->payload_length) - - (sr0->length + 1) * 8; - ip0->payload_length = clib_host_to_net_u16 (new_l0); - - copy_src0 = (u64 *) ip0; - copy_dst0 = copy_src0 + (sr0->length + 1); - copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0]; - copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0]; - copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0]; - copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0]; - copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0]; - - for (i = copy_len_u64s0 - 1; i >= 0; i--) - { - copy_dst0[i] = copy_src0[i]; - } - - sr0 = 0; - } - - do_trace: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_local_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - tr->length = vlib_buffer_length_in_chain (vm, b0); - tr->next_index = next0; - tr->sr_valid = sr0 != 0; - if (tr->sr_valid) - clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, sr_local_node.index, - SR_LOCAL_ERROR_PKTS_PROCESSED, - from_frame->n_vectors); - return from_frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_local_node, static) = { - .function = sr_local, - .name = "sr-local", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - .format_trace = format_sr_local_trace, - - .runtime_data_bytes = 0, - - .n_errors = SR_LOCAL_N_ERROR, - .error_strings = sr_local_error_strings, - - .n_next_nodes = SR_LOCAL_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_LOCAL_NEXT_##s] = n, - foreach_sr_local_next -#undef _ - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (sr_local_node, sr_local) -/* *INDENT-ON* */ - -ip6_sr_main_t * -sr_get_main (vlib_main_t * vm) -{ - vlib_call_init_function (vm, sr_init); - ASSERT (sr_local_node.index); - return &sr_main; -} - -/** - * @brief CLI parser for SR fix destination rewrite node - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -set_ip6_sr_rewrite_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = 128, - }; - u32 fib_index = 0; - u32 fib_id = 0; - u32 adj_index; - ip_adjacency_t *adj; - vnet_hw_interface_t *hi; - u32 sw_if_index; - ip6_sr_main_t *sm = &sr_main; - vnet_main_t *vnm = vnet_get_main (); - fib_node_index_t fei; - - if (!unformat (input, "%U", unformat_ip6_address, &pfx.fp_addr.ip6)) - return clib_error_return (0, "ip6 address missing in '%U'", - format_unformat_error, input); - - if (unformat (input, "rx-table-id %d", &fib_id)) - { - fib_index = fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, fib_id); - if (fib_index == ~0) - return clib_error_return (0, "fib-id %d not found", fib_id); - } - - fei = fib_table_lookup_exact_match (fib_index, &pfx); - - if (FIB_NODE_INDEX_INVALID == fei) - return clib_error_return (0, "no match for %U", - format_ip6_address, &pfx.fp_addr.ip6); - - adj_index = fib_entry_get_adj_for_source (fei, FIB_SOURCE_SR); - - if (ADJ_INDEX_INVALID == adj_index) - return clib_error_return (0, "%U not SR sourced", - format_ip6_address, &pfx.fp_addr.ip6); - - adj = adj_get (adj_index); - - if (adj->lookup_next_index != IP_LOOKUP_NEXT_REWRITE) - return clib_error_return (0, "%U unresolved (not a rewrite adj)", - format_ip6_address, &pfx.fp_addr.ip6); - - adj->rewrite_header.next_index = sm->ip6_rewrite_sr_next_index; - - sw_if_index = adj->rewrite_header.sw_if_index; - hi = vnet_get_sup_hw_interface (vnm, sw_if_index); - adj->rewrite_header.node_index = sr_fix_dst_addr_node.index; - - /* $$$$$ hack... steal the interface address index */ - adj->if_address_index = - vlib_node_add_next (vm, sr_fix_dst_addr_node.index, - hi->output_node_index); - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_ip6_sr_rewrite, static) = { - .path = "set ip6 sr rewrite", - .short_help = "set ip6 sr rewrite [fib-id ]", - .function = set_ip6_sr_rewrite_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Register a callback routine to set next0 in sr_local - * - * @param cb void * - */ -void -vnet_register_sr_app_callback (void *cb) -{ - ip6_sr_main_t *sm = &sr_main; - - sm->sr_local_cb = cb; -} - -/** - * @brief Test routine for validation of HMAC - */ -static clib_error_t * -test_sr_hmac_validate_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - - if (unformat (input, "validate on")) - sm->validate_hmac = 1; - else if (unformat (input, "chunk-offset off")) - sm->validate_hmac = 0; - else - return clib_error_return (0, "expected validate on|off in '%U'", - format_unformat_error, input); - - vlib_cli_output (vm, "hmac signature validation %s", - sm->validate_hmac ? "on" : "off"); - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (test_sr_hmac_validate, static) = { - .path = "test sr hmac", - .short_help = "test sr hmac validate [on|off]", - .function = test_sr_hmac_validate_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Add or Delete HMAC key - * - * @param sm ip6_sr_main_t * - * @param key_id u32 - * @param shared_secret u8 * - * @param is_del u8 - * - * @return retval i32 - */ -// $$$ fixme shouldn't return i32 -i32 -sr_hmac_add_del_key (ip6_sr_main_t * sm, u32 key_id, u8 * shared_secret, - u8 is_del) -{ - u32 index; - ip6_sr_hmac_key_t *key; - - if (is_del == 0) - { - /* Specific key in use? Fail. */ - if (key_id && vec_len (sm->hmac_keys) > key_id - && sm->hmac_keys[key_id].shared_secret) - return -2; - - index = key_id; - key = find_or_add_shared_secret (sm, shared_secret, &index); - ASSERT (index == key_id); - return 0; - } - - /* delete */ - - if (key_id) /* delete by key ID */ - { - if (vec_len (sm->hmac_keys) <= key_id) - return -3; - - key = sm->hmac_keys + key_id; - - hash_unset_mem (sm->hmac_key_by_shared_secret, key->shared_secret); - vec_free (key->shared_secret); - return 0; - } - - index = 0; - key = find_or_add_shared_secret (sm, shared_secret, &index); - hash_unset_mem (sm->hmac_key_by_shared_secret, key->shared_secret); - vec_free (key->shared_secret); - return 0; -} - - -static clib_error_t * -sr_hmac_add_del_key_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - u8 is_del = 0; - u32 key_id = 0; - u8 key_id_set = 0; - u8 *shared_secret = 0; - i32 rv; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "del")) - is_del = 1; - else if (unformat (input, "id %d", &key_id)) - key_id_set = 1; - else if (unformat (input, "key %s", &shared_secret)) - { - /* Do not include the trailing NULL byte. Guaranteed interop issue */ - _vec_len (shared_secret) -= 1; - } - else - break; - } - - if (is_del == 0 && shared_secret == 0) - return clib_error_return (0, "shared secret must be set to add a key"); - - if (shared_secret == 0 && key_id_set == 0) - return clib_error_return (0, "shared secret and key id both unset"); - - rv = sr_hmac_add_del_key (sm, key_id, shared_secret, is_del); - - vec_free (shared_secret); - - switch (rv) - { - case 0: - break; - - default: - return clib_error_return (0, "sr_hmac_add_del_key returned %d", rv); - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (sr_hmac, static) = { - .path = "sr hmac", - .short_help = "sr hmac [del] id key ", - .function = sr_hmac_add_del_key_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief CLI parser for show HMAC key shared secrets - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -show_sr_hmac_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - int i; - - for (i = 1; i < vec_len (sm->hmac_keys); i++) - { - if (sm->hmac_keys[i].shared_secret) - vlib_cli_output (vm, "[%d]: %v", i, sm->hmac_keys[i].shared_secret); - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_sr_hmac, static) = { - .path = "show sr hmac", - .short_help = "show sr hmac", - .function = show_sr_hmac_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Test for SR debug flag - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -test_sr_debug_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - - if (unformat (input, "on")) - sm->is_debug = 1; - else if (unformat (input, "off")) - sm->is_debug = 0; - else - return clib_error_return (0, "expected on|off in '%U'", - format_unformat_error, input); - - vlib_cli_output (vm, "debug trace now %s", sm->is_debug ? "on" : "off"); - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (test_sr_debug, static) = { - .path = "test sr debug", - .short_help = "test sr debug on|off", - .function = test_sr_debug_fn, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/sr/sr.h b/src/vnet/sr/sr.h old mode 100644 new mode 100755 index 3c50b735..eb781e4b --- a/src/vnet/sr/sr.h +++ b/src/vnet/sr/sr.h @@ -12,256 +12,283 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + /** * @file - * @brief Segment Routing header + * @brief Segment Routing data structures definitions + * */ + #ifndef included_vnet_sr_h #define included_vnet_sr_h #include #include #include +#include -#include #include #include -#include -#include -#include -#include +#define IPv6_DEFAULT_HEADER_LENGTH 40 +#define IPv6_DEFAULT_HOP_LIMIT 64 +#define IPv6_DEFAULT_MAX_MASK_WIDTH 128 -/** - * @brief Segment Route tunnel key - */ -typedef struct -{ - ip6_address_t src; - ip6_address_t dst; -} ip6_sr_tunnel_key_t; +#define SR_BEHAVIOR_END 1 +#define SR_BEHAVIOR_X 2 +#define SR_BEHAVIOR_D_FIRST 3 /* Unused. Separator in between regular and D */ +#define SR_BEHAVIOR_DX2 4 +#define SR_BEHAVIOR_DX6 5 +#define SR_BEHAVIOR_DX4 6 +#define SR_BEHAVIOR_DT6 7 +#define SR_BEHAVIOR_DT4 8 +#define SR_BEHAVIOR_LAST 9 /* Must always be the last one */ + +#define SR_STEER_L2 2 +#define SR_STEER_IPV4 4 +#define SR_STEER_IPV6 6 + +#define SR_FUNCTION_SIZE 4 +#define SR_ARGUMENT_SIZE 4 + +#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1 /** - * @brief Segment Route tunnel + * @brief SR Segment List (SID list) */ typedef struct { - /** src, dst address */ - ip6_sr_tunnel_key_t key; - - /** Pptional tunnel name */ - u8 *name; + ip6_address_t *segments; /**< SIDs (key) */ - /** Mask width for FIB entry */ - u32 dst_mask_width; + u32 weight; /**< SID list weight (wECMP / UCMP) */ - /** First hop, to save 1 elt in the segment list */ - ip6_address_t first_hop; + u8 *rewrite; /**< Precomputed rewrite header */ + u8 *rewrite_bsid; /**< Precomputed rewrite header for bindingSID */ - /** RX Fib index */ - u32 rx_fib_index; - /** TX Fib index */ - u32 tx_fib_index; + dpo_id_t bsid_dpo; /**< DPO for Encaps/Insert for BSID */ + dpo_id_t ip6_dpo; /**< DPO for Encaps/Insert IPv6 */ + dpo_id_t ip4_dpo; /**< DPO for Encaps IPv6 */ +} ip6_sr_sl_t; - /** The actual ip6 SR header */ - u8 *rewrite; +/* SR policy types */ +#define SR_POLICY_TYPE_DEFAULT 0 +#define SR_POLICY_TYPE_SPRAY 1 +/** + * @brief SR Policy + */ +typedef struct +{ + u32 *segments_lists; /**< SID lists indexes (vector) */ - /** Indicates that this tunnel is part of a policy comprising - of multiple tunnels. If == ~0 tunnel is not part of a policy */ - u32 policy_index; + ip6_address_t bsid; /**< BindingSID (key) */ - /** - * The FIB node graph linkage - */ - fib_node_t node; + u8 type; /**< Type (default is 0) */ - /** - * The FIB entry index for the first hop. We track this so we - * don't need an extra lookup for it in the data plane - */ - fib_node_index_t fib_entry_index; + /* SR Policy specific DPO */ + /* IF Type = DEFAULT Then Load Balancer DPO among SID lists */ + /* IF Type = SPRAY then Spray DPO with all SID lists */ + dpo_id_t bsid_dpo; /**< SR Policy specific DPO - BSID */ + dpo_id_t ip4_dpo; /**< SR Policy specific DPO - IPv6 */ + dpo_id_t ip6_dpo; /**< SR Policy specific DPO - IPv4 */ - /** - * This tunnel's sibling index in the children of the FIB entry - */ - u32 sibling_index; + u32 fib_table; /**< FIB table */ - /** - * The DPO contributed by the first-hop FIB entry. - */ - dpo_id_t first_hop_dpo; -} ip6_sr_tunnel_t; + u8 is_encap; /**< Mode (0 is SRH insert, 1 Encaps) */ +} ip6_sr_policy_t; /** - * @brief Shared secret for keyed-hash message authentication code (HMAC). + * @brief SR LocalSID */ typedef struct { - u8 *shared_secret; -} ip6_sr_hmac_key_t; + ip6_address_t localsid; /**< LocalSID IPv6 address */ -/** - * @brief Args required for add/del tunnel. - * - * Else we end up passing a LOT of parameters around. - */ -typedef struct -{ - /** Key (header imposition case) */ - ip6_address_t *src_address; - ip6_address_t *dst_address; - u32 dst_mask_width; - u32 rx_table_id; - u32 tx_table_id; + char end_psp; /**< Combined with End.PSP? */ - /** optional name argument - for referencing SR tunnel/policy by name */ - u8 *name; + u16 behavior; /**< Behavior associated to this localsid */ - /** optional policy name */ - u8 *policy_name; + union + { + u32 sw_if_index; /**< xconnect only */ + u32 vrf_index; /**< vrf only */ + }; - /** segment list, when inserting an ip6 SR header */ - ip6_address_t *segments; + u32 fib_table; /**< FIB table where localsid is registered */ - /** - * "Tag" list, aka segments inserted at the end of the list, - * past last_seg - */ - ip6_address_t *tags; + u32 vlan_index; /**< VLAN tag (not an index) */ - /** Shared secret => generate SHA-256 HMAC security fields */ - u8 *shared_secret; + ip46_address_t next_hop; /**< Next_hop for xconnect usage only */ - /** Flags, e.g. cleanup, policy-list flags */ - u16 flags_net_byte_order; + u32 nh_adj; /**< Next_adj for xconnect usage only */ - /** Delete the tunnnel? */ - u8 is_del; -} ip6_sr_add_del_tunnel_args_t; + void *plugin_mem; /**< Memory to be used by the plugin callback functions */ +} ip6_sr_localsid_t; + +typedef int (sr_plugin_callback_t) (ip6_sr_localsid_t * localsid); /** - * @brief Args for creating a policy. - * - * Typically used for multicast replication. - * ie a multicast address can be associated with a policy, - * then replicated across a number of unicast SR tunnels. + * @brief SR LocalSID behavior registration */ typedef struct { - /** policy name */ - u8 *name; + u16 sr_localsid_function_number; /**< SR LocalSID plugin function (>SR_BEHAVIOR_LAST) */ - /** tunnel names */ - u8 **tunnel_names; + u8 *function_name; /**< Function name. (key). */ - /** Delete the policy? */ - u8 is_del; -} ip6_sr_add_del_policy_args_t; + u8 *keyword_str; /**< Behavior keyword (i.e. End.X) */ + + u8 *def_str; /**< Behavior definition (i.e. Endpoint with cross-connect) */ + + u8 *params_str; /**< Behavior parameters (i.e. ) */ + + dpo_type_t dpo; /**< DPO type registration */ + + format_function_t *ls_format; /**< LocalSID format function */ + + unformat_function_t *ls_unformat; /**< LocalSID unformat function */ + + sr_plugin_callback_t *creation; /**< Function within plugin that will be called after localsid creation*/ + + sr_plugin_callback_t *removal; /**< Function within plugin that will be called before localsid removal */ +} sr_localsid_fn_registration_t; /** - * @brief Segment Routing policy. + * @brief Steering db key * - * Typically used for multicast replication. - * ie a multicast address can be associated with a policy, - * then replicated across a number of unicast SR tunnels. + * L3 is IPv4/IPv6 + mask + * L2 is sf_if_index + vlan */ typedef struct { - /** name of policy */ - u8 *name; - - /** vector to SR tunnel index */ - u32 *tunnel_indices; + union + { + struct + { + ip46_address_t prefix; /**< IP address of the prefix */ + u32 mask_width; /**< Mask width of the prefix */ + u32 fib_table; /**< VRF of the prefix */ + } l3; + struct + { + u32 sw_if_index; /**< Incoming software interface */ + } l2; + }; + u8 traffic_type; /**< Traffic type (IPv4, IPv6, L2) */ +} sr_steering_key_t; -} ip6_sr_policy_t; +typedef struct +{ + sr_steering_key_t classify; /**< Traffic classification */ + u32 sr_policy; /**< SR Policy index */ +} ip6_sr_steering_policy_t; /** - * @brief Args for mapping of multicast address to policy name. - * - * Typically used for multicast replication. - * ie a multicast address can be associated with a policy, - * then replicated across a number of unicast SR tunnels. + * @brief Segment Routing main datastructure */ typedef struct { - /** multicast IP6 address */ - ip6_address_t *multicast_address; + /* ip6-lookup next index for imposition FIB entries */ + u32 ip6_lookup_sr_next_index; - /** name of policy to map to */ - u8 *policy_name; + /* ip6-replicate next index for multicast tunnel */ + u32 ip6_lookup_sr_spray_index; - /** Delete the mapping */ - u8 is_del; + /* IP4-lookup -> SR rewrite next index */ + u32 ip4_lookup_sr_policy_rewrite_encaps_index; + u32 ip4_lookup_sr_policy_rewrite_insert_index; -} ip6_sr_add_del_multicastmap_args_t; + /* IP6-lookup -> SR rewrite next index */ + u32 ip6_lookup_sr_policy_rewrite_encaps_index; + u32 ip6_lookup_sr_policy_rewrite_insert_index; -/** - * @brief Segment Routing state. - */ -typedef struct -{ - /** pool of tunnel instances, sr entry only */ - ip6_sr_tunnel_t *tunnels; + /* L2-input -> SR rewrite next index */ + u32 l2_sr_policy_rewrite_index; + + /* IP6-lookup -> SR LocalSID (SR End processing) index */ + u32 ip6_lookup_sr_localsid_index; - /** find an sr "tunnel" by its outer-IP src/dst */ - uword *tunnel_index_by_key; + /* SR SID lists */ + ip6_sr_sl_t *sid_lists; - /** find an sr "tunnel" by its name */ - uword *tunnel_index_by_name; + /* SR policies */ + ip6_sr_policy_t *sr_policies; - /** policy pool */ - ip6_sr_policy_t *policies; + /* Find an SR policy by its BindingSID */ + ip6_address_t *sr_policy_index_by_key; - /** find a policy by name */ - uword *policy_index_by_policy_name; + /* Pool of SR localsid instances */ + ip6_sr_localsid_t *localsids; - /** multicast address to policy mapping */ - uword *policy_index_by_multicast_address; + /* Find a SR localsid instance based on its functionID */ + ip6_address_t *localsids_index_by_key; - /** hmac key id by shared secret */ - uword *hmac_key_by_shared_secret; + /* Pool of SR steer policies instances */ + ip6_sr_steering_policy_t *steer_policies; - /** ip6-rewrite next index for reinstalling the original dst address */ - u32 ip6_rewrite_sr_next_index; + /* Find a steer policy based on its classifier */ + sr_steering_key_t *steer_policies_index_by_key; - /** application API callback */ - void *sr_local_cb; + /* L2 steering ifaces - sr_policies */ + u32 *sw_iface_sr_policies; - /** validate hmac keys */ - u8 validate_hmac; + /* Spray DPO */ + dpo_type_t sr_pr_spray_dpo_type; - /** pool of hmac keys */ - ip6_sr_hmac_key_t *hmac_keys; + /* Plugin functions */ + sr_localsid_fn_registration_t *plugin_functions; - /** Openssl var */ - EVP_MD *md; - /** Openssl var */ - HMAC_CTX *hmac_ctx; + /* Find plugin function by name */ + uword *plugin_functions_by_key; - /** enable debug spew */ - u8 is_debug; + /* Counters */ + vlib_combined_counter_main_t sr_ls_valid_counters; + vlib_combined_counter_main_t sr_ls_invalid_counters; - /** convenience */ + /* SR Policies FIBs */ + u32 fib_table_ip6; + u32 fib_table_ip4; + + /* convenience */ vlib_main_t *vlib_main; - /** convenience */ vnet_main_t *vnet_main; } ip6_sr_main_t; ip6_sr_main_t sr_main; -format_function_t format_ip6_sr_header; -format_function_t format_ip6_sr_header_with_length; - -vlib_node_registration_t ip6_sr_input_node; - -int ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a); -int ip6_sr_add_del_policy (ip6_sr_add_del_policy_args_t * a); -int ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a); - -void vnet_register_sr_app_callback (void *cb); - -void sr_fix_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, - ip6_sr_header_t * sr); +extern vlib_node_registration_t sr_policy_rewrite_encaps_node; +extern vlib_node_registration_t sr_policy_rewrite_insert_node; +extern vlib_node_registration_t sr_localsid_node; +extern vlib_node_registration_t sr_localsid_d_node; + +void sr_dpo_lock (dpo_id_t * dpo); +void sr_dpo_unlock (dpo_id_t * dpo); + +int sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name, + u8 * keyword_str, u8 * def_str, + u8 * params_str, dpo_type_t * dpo, + format_function_t * ls_format, + unformat_function_t * ls_unformat, + sr_plugin_callback_t * creation_fn, + sr_plugin_callback_t * removal_fn); + +int +sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments, + u32 weight, u8 behavior, u32 fib_table, u8 is_encap); +int +sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table, + u8 operation, ip6_address_t * segments, u32 sl_index, + u32 weight); +int sr_policy_del (ip6_address_t * bsid, u32 index); + +int sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, + char end_psp, u8 behavior, u32 sw_if_index, + u32 vlan_index, u32 fib_table, ip46_address_t * nh_addr, + void *ls_plugin_mem); + +int +sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index, + u32 table_id, ip46_address_t * prefix, u32 mask_width, + u32 sw_if_index, u8 traffic_type); #endif /* included_vnet_sr_h */ diff --git a/src/vnet/sr/sr_api.c b/src/vnet/sr/sr_api.c index bab0fc84..f4e1c346 100644 --- a/src/vnet/sr/sr_api.c +++ b/src/vnet/sr/sr_api.c @@ -18,6 +18,7 @@ */ #include +#include #include #include @@ -43,159 +44,129 @@ #include #define foreach_vpe_api_msg \ -_(SR_MULTICAST_MAP_ADD_DEL, sr_multicast_map_add_del) - -static void vl_api_sr_tunnel_add_del_t_handler - (vl_api_sr_tunnel_add_del_t * mp) +_(SR_LOCALSID_ADD_DEL, sr_localsid_add_del) \ +_(SR_POLICY_DEL, sr_policy_del) \ +_(SR_STEERING_ADD_DEL, sr_steering_add_del) +//_(SR_LOCALSIDS, sr_localsids_dump) +//_(SR_LOCALSID_BEHAVIORS, sr_localsid_behaviors_dump) + +static void vl_api_sr_localsid_add_del_t_handler + (vl_api_sr_localsid_add_del_t * mp) { -#if IP6SR == 0 - clib_warning ("unimplemented"); -#else - ip6_sr_add_del_tunnel_args_t _a, *a = &_a; + vl_api_sr_localsid_add_del_reply_t *rmp; int rv = 0; - vl_api_sr_tunnel_add_del_reply_t *rmp; - ip6_address_t *segments = 0, *seg; - ip6_address_t *tags = 0, *tag; - ip6_address_t *this_address; - int i; +/* + * int sr_cli_localsid (char is_del, ip6_address_t *localsid_addr, + * char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, u32 fib_table, + * ip46_address_t *nh_addr, void *ls_plugin_mem) + */ + rv = sr_cli_localsid (mp->is_del, + (ip6_address_t *) & mp->localsid_addr, + mp->end_psp, + mp->behavior, + ntohl (mp->sw_if_index), + ntohl (mp->vlan_index), + ntohl (mp->fib_table), + (ip46_address_t *) & mp->nh_addr, NULL); + + REPLY_MACRO (VL_API_SR_LOCALSID_ADD_DEL_REPLY); +} - if (mp->n_segments == 0) - { - rv = -11; - goto out; - } +static void +vl_api_sr_policy_add_t_handler (vl_api_sr_policy_add_t * mp) +{ + vl_api_sr_policy_add_reply_t *rmp; + ip6_address_t *segments = 0, *seg; + ip6_address_t *this_address = (ip6_address_t *) mp->segments; - memset (a, 0, sizeof (*a)); - a->src_address = (ip6_address_t *) & mp->src_address; - a->dst_address = (ip6_address_t *) & mp->dst_address; - a->dst_mask_width = mp->dst_mask_width; - a->flags_net_byte_order = mp->flags_net_byte_order; - a->is_del = (mp->is_add == 0); - a->rx_table_id = ntohl (mp->outer_vrf_id); - a->tx_table_id = ntohl (mp->inner_vrf_id); - - a->name = format (0, "%s", mp->name); - if (!(vec_len (a->name))) - a->name = 0; - - a->policy_name = format (0, "%s", mp->policy_name); - if (!(vec_len (a->policy_name))) - a->policy_name = 0; - - /* Yank segments and tags out of the API message */ - this_address = (ip6_address_t *) mp->segs_and_tags; + int i; for (i = 0; i < mp->n_segments; i++) { vec_add2 (segments, seg, 1); clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); this_address++; } - for (i = 0; i < mp->n_tags; i++) - { - vec_add2 (tags, tag, 1); - clib_memcpy (tag->as_u8, this_address->as_u8, sizeof (*this_address)); - this_address++; - } - a->segments = segments; - a->tags = tags; - - rv = ip6_sr_add_del_tunnel (a); - -out: +/* + * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, + * u32 weight, u8 behavior, u32 fib_table, u8 is_encap) + */ + int rv = 0; + rv = sr_policy_add ((ip6_address_t *) & mp->bsid_addr, + segments, + ntohl (mp->weight), + mp->type, ntohl (mp->fib_table), mp->is_encap); - REPLY_MACRO (VL_API_SR_TUNNEL_ADD_DEL_REPLY); -#endif + REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY); } -static void vl_api_sr_policy_add_del_t_handler - (vl_api_sr_policy_add_del_t * mp) +static void +vl_api_sr_policy_mod_t_handler (vl_api_sr_policy_mod_t * mp) { -#if IP6SR == 0 - clib_warning ("unimplemented"); -#else - ip6_sr_add_del_policy_args_t _a, *a = &_a; - int rv = 0; - vl_api_sr_policy_add_del_reply_t *rmp; - int i; - - memset (a, 0, sizeof (*a)); - a->is_del = (mp->is_add == 0); - - a->name = format (0, "%s", mp->name); - if (!(vec_len (a->name))) - { - rv = VNET_API_ERROR_NO_SUCH_NODE2; - goto out; - } - - if (!(mp->tunnel_names[0])) - { - rv = VNET_API_ERROR_NO_SUCH_NODE2; - goto out; - } + vl_api_sr_policy_mod_reply_t *rmp; - // start deserializing tunnel_names - int num_tunnels = mp->tunnel_names[0]; //number of tunnels - u8 *deser_tun_names = mp->tunnel_names; - deser_tun_names += 1; //moving along - - u8 *tun_name = 0; - int tun_name_len = 0; + ip6_address_t *segments = 0, *seg; + ip6_address_t *this_address = (ip6_address_t *) mp->segments; - for (i = 0; i < num_tunnels; i++) + int i; + for (i = 0; i < mp->n_segments; i++) { - tun_name_len = *deser_tun_names; - deser_tun_names += 1; - vec_resize (tun_name, tun_name_len); - memcpy (tun_name, deser_tun_names, tun_name_len); - vec_add1 (a->tunnel_names, tun_name); - deser_tun_names += tun_name_len; - tun_name = 0; + vec_add2 (segments, seg, 1); + clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); + this_address++; } - rv = ip6_sr_add_del_policy (a); - -out: + int rv = 0; +/* + * int + * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table, + * u8 operation, ip6_address_t *segments, u32 sl_index, + * u32 weight, u8 is_encap) + */ + rv = sr_policy_mod ((ip6_address_t *) & mp->bsid_addr, + ntohl (mp->sr_policy_index), + ntohl (mp->fib_table), + mp->operation, + segments, ntohl (mp->sl_index), ntohl (mp->weight)); - REPLY_MACRO (VL_API_SR_POLICY_ADD_DEL_REPLY); -#endif + REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY); } -static void vl_api_sr_multicast_map_add_del_t_handler - (vl_api_sr_multicast_map_add_del_t * mp) +static void +vl_api_sr_policy_del_t_handler (vl_api_sr_policy_del_t * mp) { -#if IP6SR == 0 - clib_warning ("unimplemented"); -#else - ip6_sr_add_del_multicastmap_args_t _a, *a = &_a; + vl_api_sr_policy_del_reply_t *rmp; int rv = 0; - vl_api_sr_multicast_map_add_del_reply_t *rmp; - - memset (a, 0, sizeof (*a)); - a->is_del = (mp->is_add == 0); - - a->multicast_address = (ip6_address_t *) & mp->multicast_address; - a->policy_name = format (0, "%s", mp->policy_name); - - if (a->multicast_address == 0) - { - rv = -1; - goto out; - } - - if (!(a->policy_name)) - { - rv = -2; - goto out; - } - - rv = ip6_sr_add_del_multicastmap (a); +/* + * int + * sr_policy_del (ip6_address_t *bsid, u32 index) + */ + rv = sr_policy_del ((ip6_address_t *) & mp->bsid_addr, + ntohl (mp->sr_policy_index)); -out: + REPLY_MACRO (VL_API_SR_POLICY_DEL_REPLY); +} - REPLY_MACRO (VL_API_SR_MULTICAST_MAP_ADD_DEL_REPLY); -#endif +static void vl_api_sr_steering_add_del_t_handler + (vl_api_sr_steering_add_del_t * mp) +{ + vl_api_sr_steering_add_del_reply_t *rmp; + int rv = 0; +/* + * int + * sr_steering_policy(int is_del, ip6_address_t *bsid, u32 sr_policy_index, + * u32 table_id, ip46_address_t *prefix, u32 mask_width, u32 sw_if_index, + * u8 traffic_type) + */ + rv = sr_steering_policy (mp->is_del, + (ip6_address_t *) & mp->bsid_addr, + ntohl (mp->sr_policy_index), + ntohl (mp->table_id), + (ip46_address_t *) & mp->prefix_addr, + ntohl (mp->mask_width), + ntohl (mp->sw_if_index), mp->traffic_type); + + REPLY_MACRO (VL_API_SR_STEERING_ADD_DEL_REPLY); } /* @@ -233,27 +204,26 @@ sr_api_hookup (vlib_main_t * vm) #undef _ /* - * Manually register the sr tunnel add del msg, so we trace + * Manually register the sr policy add msg, so we trace * enough bytes to capture a typical segment list */ - vl_msg_api_set_handlers (VL_API_SR_TUNNEL_ADD_DEL, - "sr_tunnel_add_del", - vl_api_sr_tunnel_add_del_t_handler, + vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD, + "sr_policy_add", + vl_api_sr_policy_add_t_handler, vl_noop_handler, - vl_api_sr_tunnel_add_del_t_endian, - vl_api_sr_tunnel_add_del_t_print, 256, 1); - + vl_api_sr_policy_add_t_endian, + vl_api_sr_policy_add_t_print, 256, 1); /* - * Manually register the sr policy add del msg, so we trace - * enough bytes to capture a typical tunnel name list + * Manually register the sr policy mod msg, so we trace + * enough bytes to capture a typical segment list */ - vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD_DEL, - "sr_policy_add_del", - vl_api_sr_policy_add_del_t_handler, + vl_msg_api_set_handlers (VL_API_SR_POLICY_MOD, + "sr_policy_mod", + vl_api_sr_policy_mod_t_handler, vl_noop_handler, - vl_api_sr_policy_add_del_t_endian, - vl_api_sr_policy_add_del_t_print, 256, 1); + vl_api_sr_policy_mod_t_endian, + vl_api_sr_policy_mod_t_print, 256, 1); /* * Set up the (msg_name, crc, message-id) table diff --git a/src/vnet/sr/sr_doc.md b/src/vnet/sr/sr_doc.md new file mode 100644 index 00000000..a7220630 --- /dev/null +++ b/src/vnet/sr/sr_doc.md @@ -0,0 +1,161 @@ +# SRv6: Segment Routing for IPv6 {#sr_doc} + +This is a memo intended to contain documentation of the VPP SRv6 implementation +Everything that is not directly obvious should come here. +For any feedback on content that should be explained please mailto:pcamaril@cisco.com + +## Segment Routing + +Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior. + +Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era. + +Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements. + +Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design. + +Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network. + +**The implementation of Segment Routing in VPP only covers the IPv6 data plane (SRv6).** + +## Segment Routing terminology + +* Segment Routing Header (SRH): IPv6 routing extension header of type 'Segment Routing'. (draft-ietf-6man-segment-routing-header-05) +* SegmentID (SID): is an IPv6 address. +* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse. +* SR Policy: defines the SRH that will be applied to a packet. A packet steered into an SR policy may either receive the SRH by IPv6 header encapsulation (as recommended in draft-ietf-6man-rfc2460bis) or it could be inserted within an existing IPv6 header. An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights. +* Local SID: is a SID associated with a processing function on the local node, which may go from advancing to the next SID in the SRH, to complex user-defined behaviors. When a FIB lookup, either in the main FIB or in a specific VRF, returns a match on a local SID, the associated function is performed. +* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with an IPv6 DA corresponding to a BindingSID, then the SR policy will be applied to such packet. + +## Creating an SR LocalSID + +A local SID is associated to a Segment Routing behavior -or function- on the current node. + +The most basic behavior is called END. It simply activates the next SID in the current packet, by decrementing the Segments Left value and updating the IPv6 DA. + +A local END SID is instantiated using the following CLI: + + sr localsid (del) address XX::YY behavior end + +This creates a new entry in the main FIB for IPv6 address XX::YY. All packets whose IPv6 DA matches this FIB entry are redirected to the sr-localsid node, where they are processed as described above. + +Other examples of local SIDs are the following: + + sr localsid (del) address XX::YY behavior end (psp) + sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a (psp) + sr localsid (del) address XX::YY behavior end.dx6 GE0/1/0 2001::a + sr localsid (del) address XX::YY behavior end.dx4 GE0/1/0 10.0.0.1 + sr localsid (del) address XX::YY behavior end.dx2 GigabitE0/11/0 + sr localsid (del) address XX::YY behavior end.dt6 5 + sr localsid (del) address XX::YY behavior end.dt6 5 + +Note that all of these behaviors match the specifications in **TODO REF NET PGM**. Please refer to this document for a detailed description of each behavior. + +Help on the available local SID behaviors and their usage can be obtained with: + + help sr localsid + +Alternatively they can be obtained using. + + show sr localsids behavior + +The difference in between those two commands is that the first one will only display the SR LocalSID behaviors that are built-in VPP, while the latter will display those behaviors plus the ones added with the SR LocalSID Development Framework. + + +VPP keeps a 'My LocalSID Table' where it stores all the SR local SIDs instantiated as well as their parameters. Every time a new local SID is instantiated, a new entry is added to this table. In addition, counters for correctly and incorrectly processed traffic are maintained for each local SID. The counters store both the number of packets and bytes. + +The contents of the 'My LocalSID Table' is shown with: + + vpp# show sr localsid + SRv6 - My LocalSID Table: + ========================= + Address: c3::1 + Behavior: DX6 (Endpoint with decapsulation and IPv6 cross-connect) + Iface: GigabitEthernet0/5/0 + Next hop: b:c3::b + Good traffic: [51277 packets : 5332808 bytes] + Bad traffic: [0 packets : 0 bytes] + -------------------- + +The traffic counters can be reset with: + + vpp# clear sr localsid counters + +## Creating a SR Policy + +An SR Policy is defined by a Binding SID and a weighted set of Segment Lists. + +A new SR policy is created with a first SID list using: + + sr policy add bsid 2001::1 next A1:: next B1:: next C1:: (weight 5) (fib-table 3) + +* The weight parameter is only used if more than one SID list is associated with the policy. +* The fib-table parameter specifies in which table (VRF) the Binding SID is to be installed. + +An SR policy is deleted with: + + sr policy del bsid 2001::1 + sr policy del index 1 + +The existing SR policies are listed with: + + show sr policies + +### Adding/Removing SID Lists from an SR policy + +An additional SID list is associated with an existing SR policy with: + + sr policy mod bsid 2001::1 add sl next A2:: next B2:: next C2:: (weight 3) + sr policy mod index 3 add sl next A2:: next B2:: next C2:: (weight 3) + +Conversely, a SID list can be removed from an SR policy with: + + sr policy mod bsid 2001::1 del sl index 1 + sr policy mod index 3 del sl index 1 + +Note that this cannot be used to remove the last SID list of a policy. + +The weight of a SID list can also be modified with: + + sr policy mod bsid 2001::1 mod sl index 1 weight 4 + sr policy mod index 3 mod sl index 1 weight 4 + +### SR Policies: Spray policies + +Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them. + +SID list weights are ignored with this type of policies. + +A Spray policy is instantiated by appending the keyword **spray** to a regular SR policy command, as in: + + sr policy add bsid 2001::1 next A1:: next B1:: next C1:: spray + +Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node. + +### Encapsulation SR policies + +In case the user decides to create an SR policy an IPv6 Source Address must be specified for the encapsulated traffic. In order to do so the user might use the following command: + + set sr encaps source addr XXXX::YYYY + +## Steering packets into a SR Policy + +To steer packets in Transit into an SR policy (T.Insert, T.Encaps and T.Encaps.L2 behaviors), the user needs to create an 'sr steering policy'. + + sr steer l3 2001::/64 via sr policy index 1 + sr steer l3 2001::/64 via sr policy bsid cafe::1 + sr steer l3 2001::/64 via sr policy bsid cafe::1 fib-table 3 + sr steer l3 10.0.0.0/16 via sr policy bsid cafe::1 + sr steer l2 TenGE0/1/0 via sr policy bsid cafe::1 + +Disclaimer: The T.Encaps.L2 will steer L2 frames into an SR Policy. Notice that creating an SR steering policy for L2 frames will actually automatically *puts the interface into promiscous mode*. + +## SR LocalSID development framework + +One of the * 'key' * concepts about SRv6 is regarding network programmability. This is why an SRv6 LocalSID is associated with an specific function. + +However, the trully way to enable network programmability is allowing any developer **easily** create his own SRv6 LocalSID function. That is the reason why we have added some API calls such that any developer can code his own SRv6 LocalSID behaviors as plugins an add them to the running SRv6 code. + +The principle is that the developer only codes the behavior -the graph node-. However all the FIB handling, SR LocalSID instantiation and so on are done by the VPP SRv6 code. + +For more information please refer to the documentation *SRv6 Sample SR LocalSID plugin*. diff --git a/src/vnet/sr/sr_error.def b/src/vnet/sr/sr_error.def deleted file mode 100644 index 62d021fd..00000000 --- a/src/vnet/sr/sr_error.def +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -sr_error (NONE, "no error") -sr_error (BAD_ROUTING_HEADER_TYPE, "bad routing header type (not 4)") -sr_error (NO_MORE_SEGMENTS, "out of SR segment drops") -sr_error (PKTS_PROCESSED, "SR packets processed") -sr_error (APP_CALLBACK, "SR application callback errors") -sr_error (HMAC_INVALID, "SR packets with invalid HMAC signatures") diff --git a/src/vnet/sr/sr_fix_dst_error.def b/src/vnet/sr/sr_fix_dst_error.def deleted file mode 100644 index 48fe7af6..00000000 --- a/src/vnet/sr/sr_fix_dst_error.def +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -sr_fix_dst_error (NONE, "no error") -sr_fix_dst_error (NO_SR_HEADER, "no SR header present") -sr_fix_dst_error (NO_MORE_SEGMENTS, "no more SR segments") diff --git a/src/vnet/sr/sr_localsid.c b/src/vnet/sr/sr_localsid.c new file mode 100755 index 00000000..407491ce --- /dev/null +++ b/src/vnet/sr/sr_localsid.c @@ -0,0 +1,1478 @@ +/* + * sr_localsid.c: ipv6 segment routing Endpoint behaviors + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Processing of packets with a SRH + * + * CLI to define new Segment Routing End processing functions. + * Graph node to support such functions. + * + * Each function associates an SRv6 segment (IPv6 address) with an specific + * Segment Routing function. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/** + * @brief Dynamically added SR localsid DPO type + */ +static dpo_type_t sr_localsid_dpo_type; +static dpo_type_t sr_localsid_d_dpo_type; + +/** + * @brief SR localsid add/del + * + * Function to add or delete SR LocalSIDs. + * + * @param is_del Boolean of whether its a delete instruction + * @param localsid_addr IPv6 address of the localsid + * @param is_decap Boolean of whether decapsulation is allowed in this function + * @param behavior Type of behavior (function) for this localsid + * @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table. + * @param vlan_index Only for L2 xconnect. Outgoing VLAN tag. + * @param fib_table FIB table in which we should install the localsid entry + * @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. + * + * @return 0 on success, error otherwise. + */ +int +sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, + char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, + u32 fib_table, ip46_address_t * nh_addr, void *ls_plugin_mem) +{ + ip6_sr_main_t *sm = &sr_main; + uword *p; + int rv; + + ip6_sr_localsid_t *ls = 0; + ip6_address_t *key_copy; + + dpo_id_t dpo = DPO_INVALID; + + /* Search for the item */ + p = hash_get_mem (sm->localsids_index_by_key, localsid_addr); + + if (p) + { + if (is_del) + { + hash_pair_t *hp; + /* Retrieve localsid */ + ls = pool_elt_at_index (sm->localsids, p[0]); + /* Delete FIB entry */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = *localsid_addr, + } + }; + + fib_table_entry_delete (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP6, fib_table), &pfx, + FIB_SOURCE_SR); + + /* In case it is a Xconnect iface remove the (OIF, NHOP) adj */ + if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX6 + || ls->behavior == SR_BEHAVIOR_DX4) + adj_unlock (ls->nh_adj); + + if (ls->behavior >= SR_BEHAVIOR_LAST) + { + sr_localsid_fn_registration_t *plugin = 0; + plugin = pool_elt_at_index (sm->plugin_functions, + ls->behavior - SR_BEHAVIOR_LAST); + + /* Callback plugin removal function */ + rv = plugin->removal (ls); + } + + /* Delete localsid registry */ + pool_put (sm->localsids, ls); + hp = hash_get_pair (sm->localsids_index_by_key, localsid_addr); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->localsids_index_by_key, localsid_addr); + vec_free (key_copy); + return 1; + } + else /* create with function already existing; complain */ + return -1; + } + else + /* delete; localsid does not exist; complain */ + if (is_del) + return -2; + + /* Check whether there exists a FIB entry with such address */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + }; + + pfx.fp_addr.as_u64[0] = localsid_addr->as_u64[0]; + pfx.fp_addr.as_u64[1] = localsid_addr->as_u64[1]; + + /* Lookup the FIB index associated to the table id provided */ + u32 fib_index = fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, fib_table); + if (fib_index == ~0) + return -3; + + /* Lookup the localsid in such FIB table */ + fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx); + if (FIB_NODE_INDEX_INVALID != fei) + return -4; //There is an entry for such address (the localsid addr) + + /* Create a new localsid registry */ + pool_get (sm->localsids, ls); + memset (ls, 0, sizeof (*ls)); + + clib_memcpy (&ls->localsid, localsid_addr, sizeof (ip6_address_t)); + ls->end_psp = end_psp; + ls->behavior = behavior; + ls->nh_adj = (u32) ~ 0; + ls->fib_table = fib_table; + switch (behavior) + { + case SR_BEHAVIOR_END: + break; + case SR_BEHAVIOR_X: + ls->sw_if_index = sw_if_index; + clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t)); + break; + case SR_BEHAVIOR_DX4: + ls->sw_if_index = sw_if_index; + clib_memcpy (&ls->next_hop.ip4, &nh_addr->ip4, sizeof (ip4_address_t)); + break; + case SR_BEHAVIOR_DX6: + ls->sw_if_index = sw_if_index; + clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t)); + break; + case SR_BEHAVIOR_DT6: + ls->vrf_index = sw_if_index; + break; + case SR_BEHAVIOR_DX2: + ls->sw_if_index = sw_if_index; + ls->vlan_index = vlan_index; + break; + } + + /* Figure out the adjacency magic for Xconnect variants */ + if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX4 + || ls->behavior == SR_BEHAVIOR_DX6) + { + adj_index_t nh_adj_index = ADJ_INDEX_INVALID; + + /* Retrieve the adjacency corresponding to the (OIF, next_hop) */ + if (ls->behavior == SR_BEHAVIOR_DX6 || ls->behavior == SR_BEHAVIOR_X) + nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6, + nh_addr, sw_if_index); + + else if (ls->behavior == SR_BEHAVIOR_DX4) + nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4, + nh_addr, sw_if_index); + + /* Check for ADJ creation error. If so panic */ + if (nh_adj_index == ADJ_INDEX_INVALID) + { + pool_put (sm->localsids, ls); + return -5; + } + + ls->nh_adj = nh_adj_index; + } + + /* Set DPO */ + if (ls->behavior == SR_BEHAVIOR_END || ls->behavior == SR_BEHAVIOR_X) + dpo_set (&dpo, sr_localsid_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); + else if (ls->behavior > SR_BEHAVIOR_D_FIRST + && ls->behavior < SR_BEHAVIOR_LAST) + dpo_set (&dpo, sr_localsid_d_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); + else if (ls->behavior >= SR_BEHAVIOR_LAST) + { + sr_localsid_fn_registration_t *plugin = 0; + plugin = pool_elt_at_index (sm->plugin_functions, + ls->behavior - SR_BEHAVIOR_LAST); + /* Copy the unformat memory result */ + ls->plugin_mem = ls_plugin_mem; + /* Callback plugin creation function */ + rv = plugin->creation (ls); + if (rv) + { + pool_put (sm->localsids, ls); + return -6; + } + dpo_set (&dpo, plugin->dpo, DPO_PROTO_IP6, ls - sm->localsids); + } + + /* Set hash key for searching localsid by address */ + key_copy = vec_new (ip6_address_t, 1); + clib_memcpy (key_copy, localsid_addr, sizeof (ip6_address_t)); + hash_set_mem (sm->localsids_index_by_key, key_copy, ls - sm->localsids); + + fib_table_entry_special_dpo_add (fib_index, &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); + dpo_reset (&dpo); + + /* Set counter to zero */ + vlib_validate_combined_counter (&(sm->sr_ls_valid_counters), + ls - sm->localsids); + vlib_validate_combined_counter (&(sm->sr_ls_invalid_counters), + ls - sm->localsids); + + vlib_zero_combined_counter (&(sm->sr_ls_valid_counters), + ls - sm->localsids); + vlib_zero_combined_counter (&(sm->sr_ls_invalid_counters), + ls - sm->localsids); + + return 0; +} + +/** + * @brief SR LocalSID CLI function. + * + * @see sr_cli_localsid + */ +static clib_error_t * +sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_sr_main_t *sm = &sr_main; + u32 sw_if_index = (u32) ~ 0, vlan_index = (u32) ~ 0, fib_index = 0; + int is_del = 0; + int end_psp = 0; + ip6_address_t resulting_address; + ip46_address_t next_hop; + char address_set = 0; + char behavior = 0; + void *ls_plugin_mem = 0; + + int rv; + + memset (&resulting_address, 0, sizeof (ip6_address_t)); + ip46_address_reset (&next_hop); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (!address_set + && unformat (input, "address %U", unformat_ip6_address, + &resulting_address)) + address_set = 1; + else if (!address_set + && unformat (input, "addr %U", unformat_ip6_address, + &resulting_address)) + address_set = 1; + else if (unformat (input, "fib-table %u", &fib_index)); + else if (vlan_index == (u32) ~ 0 + && unformat (input, "vlan %u", &vlan_index)); + else if (!behavior && unformat (input, "behavior")) + { + if (unformat (input, "end.x %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip6_address, &next_hop.ip6)) + behavior = SR_BEHAVIOR_X; + else if (unformat (input, "end.dx6 %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip6_address, &next_hop.ip6)) + behavior = SR_BEHAVIOR_DX6; + else if (unformat (input, "end.dx4 %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip4_address, &next_hop.ip4)) + behavior = SR_BEHAVIOR_DX4; + else if (unformat (input, "end.dx2 %U", + unformat_vnet_sw_interface, vnm, &sw_if_index)) + behavior = SR_BEHAVIOR_DX2; + else if (unformat (input, "end.dt6 %u", &sw_if_index)) + behavior = SR_BEHAVIOR_DT6; + else if (unformat (input, "end.dt4 %u", &sw_if_index)) + behavior = SR_BEHAVIOR_DT4; + else + { + /* Loop over all the plugin behavior format functions */ + sr_localsid_fn_registration_t *plugin = 0, **vec_plugins = 0; + sr_localsid_fn_registration_t **plugin_it = 0; + + /* Create a vector out of the plugin pool as recommended */ + /* *INDENT-OFF* */ + pool_foreach (plugin, sm->plugin_functions, + { + vec_add1 (vec_plugins, plugin); + }); + /* *INDENT-ON* */ + + vec_foreach (plugin_it, vec_plugins) + { + if (unformat + (input, "%U", (*plugin_it)->ls_unformat, &ls_plugin_mem)) + { + behavior = (*plugin_it)->sr_localsid_function_number; + break; + } + } + } + + if (!behavior) + { + if (unformat (input, "end")) + behavior = SR_BEHAVIOR_END; + else + break; + } + } + else if (!end_psp && unformat (input, "psp")) + end_psp = 1; + else + break; + } + + if (!behavior && end_psp) + behavior = SR_BEHAVIOR_END; + + if (!address_set) + return clib_error_return (0, + "Error: SRv6 LocalSID address is mandatory."); + if (!is_del && !behavior) + return clib_error_return (0, + "Error: SRv6 LocalSID behavior is mandatory."); + if (vlan_index != (u32) ~ 0) + return clib_error_return (0, + "Error: SRv6 End.DX2 with rewrite VLAN tag not supported by now."); + if (end_psp && !(behavior == SR_BEHAVIOR_END || behavior == SR_BEHAVIOR_X)) + return clib_error_return (0, + "Error: SRv6 PSP only compatible with End and End.X"); + + rv = sr_cli_localsid (is_del, &resulting_address, end_psp, behavior, + sw_if_index, vlan_index, fib_index, &next_hop, + ls_plugin_mem); + + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -1: + return clib_error_return (0, + "Identical localsid already exists. Requested localsid not created."); + case -2: + return clib_error_return (0, + "The requested localsid could not be deleted. SR localsid not found"); + case -3: + return clib_error_return (0, "FIB table %u does not exist", fib_index); + case -4: + return clib_error_return (0, "There is already one FIB entry for the" + "requested localsid non segment routing related"); + case -5: + return clib_error_return (0, + "Could not create ARP/ND entry for such next_hop. Internal error."); + case -6: + return clib_error_return (0, + "Error on the plugin based localsid creation."); + default: + return clib_error_return (0, "BUG: sr localsid returns %d", rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_localsid_command, static) = { + .path = "sr localsid", + .short_help = "sr localsid (del) address XX:XX::YY:YY" + "(fib-table 8) behavior STRING", + .long_help = + "Create SR LocalSID and binds it to a particular behavior\n" + "Arguments:\n" + "\tlocalSID IPv6_addr(128b) LocalSID IPv6 address\n" + "\t(fib-table X) Optional. VRF where to install SRv6 localsid\n" + "\tbehavior STRING Specifies the behavior\n" + "\n\tBehaviors:\n" + "\tEnd\t-> Endpoint.\n" + "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n" + "\t\tParameters: ' '\n" + "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" + "\t\tParameters: ''\n" + "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n" + "\t\tParameters: ' '\n" + "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" + "\t\tParameters: ' '\n" + "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n" + "\t\tParameters: ''\n" + "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n" + "\t\tParameters: ''\n", + .function = sr_cli_localsid_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief CLI function to 'show' all SR LocalSIDs on console. + */ +static clib_error_t * +show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_sr_main_t *sm = &sr_main; + ip6_sr_localsid_t **localsid_list = 0; + ip6_sr_localsid_t *ls; + int i; + + vlib_cli_output (vm, "SRv6 - My LocalSID Table:"); + vlib_cli_output (vm, "========================="); + /* *INDENT-OFF* */ + pool_foreach (ls, sm->localsids, ({ vec_add1 (localsid_list, ls); })); + /* *INDENT-ON* */ + for (i = 0; i < vec_len (localsid_list); i++) + { + ls = localsid_list[i]; + switch (ls->behavior) + { + case SR_BEHAVIOR_END: + vlib_cli_output (vm, "\tAddress: \t%U\n\tBehavior: \tEnd", + format_ip6_address, &ls->localsid); + break; + case SR_BEHAVIOR_X: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tX (Endpoint with Layer-3 cross-connect)" + "\n\tIface: \t%U\n\tNext hop: \t%U", + format_ip6_address, &ls->localsid, + format_vnet_sw_if_index_name, vnm, ls->sw_if_index, + format_ip6_address, &ls->next_hop.ip6); + break; + case SR_BEHAVIOR_DX4: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDX4 (Endpoint with decapsulation and IPv4 cross-connect)" + "\n\tIface: \t%U\n\tNext hop: \t%U", + format_ip6_address, &ls->localsid, + format_vnet_sw_if_index_name, vnm, ls->sw_if_index, + format_ip4_address, &ls->next_hop.ip4); + break; + case SR_BEHAVIOR_DX6: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDX6 (Endpoint with decapsulation and IPv6 cross-connect)" + "\n\tIface: \t%U\n\tNext hop: \t%U", + format_ip6_address, &ls->localsid, + format_vnet_sw_if_index_name, vnm, ls->sw_if_index, + format_ip6_address, &ls->next_hop.ip6); + break; + case SR_BEHAVIOR_DX2: + if (ls->vlan_index == (u32) ~ 0) + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDX2 (Endpoint with decapulation and Layer-2 cross-connect)" + "\n\tIface: \t%U", format_ip6_address, + &ls->localsid, format_vnet_sw_if_index_name, vnm, + ls->sw_if_index); + else + vlib_cli_output (vm, + "Unsupported yet. (DX2 with egress VLAN rewrite)"); + break; + case SR_BEHAVIOR_DT6: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDT6 (Endpoint with decapsulation and specific IPv6 table lookup)" + "\n\tTable: %u", format_ip6_address, &ls->localsid, + ls->fib_table); + break; + case SR_BEHAVIOR_DT4: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDT4 (Endpoint with decapsulation and specific IPv4 table lookup)" + "\n\tTable: \t%u", format_ip6_address, + &ls->localsid, ls->fib_table); + break; + default: + if (ls->behavior >= SR_BEHAVIOR_LAST) + { + sr_localsid_fn_registration_t *plugin = + pool_elt_at_index (sm->plugin_functions, + ls->behavior - SR_BEHAVIOR_LAST); + + vlib_cli_output (vm, "\tAddress: \t%U\n" + "\tBehavior: \t%s (%s)\n\t%U", + format_ip6_address, &ls->localsid, + plugin->keyword_str, plugin->def_str, + plugin->ls_format, ls->plugin_mem); + } + else + //Should never get here... + vlib_cli_output (vm, "Internal error"); + break; + } + if (ls->end_psp) + vlib_cli_output (vm, "\tPSP: \tTrue\n"); + + /* Print counters */ + vlib_counter_t valid, invalid; + vlib_get_combined_counter (&(sm->sr_ls_valid_counters), i, &valid); + vlib_get_combined_counter (&(sm->sr_ls_invalid_counters), i, &invalid); + vlib_cli_output (vm, "\tGood traffic: \t[%Ld packets : %Ld bytes]\n", + valid.packets, valid.bytes); + vlib_cli_output (vm, "\tBad traffic: \t[%Ld packets : %Ld bytes]\n", + invalid.packets, invalid.bytes); + vlib_cli_output (vm, "--------------------"); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_localsid_command, static) = { + .path = "show sr localsids", + .short_help = "show sr localsids", + .function = show_sr_localsid_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief Function to 'clear' ALL SR localsid counters + */ +static clib_error_t * +clear_sr_localsid_counters_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + + vlib_clear_combined_counters (&(sm->sr_ls_valid_counters)); + vlib_clear_combined_counters (&(sm->sr_ls_invalid_counters)); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (clear_sr_localsid_counters_command, static) = { + .path = "clear sr localsid counters", + .short_help = "clear sr localsid counters", + .function = clear_sr_localsid_counters_command_fn, +}; +/* *INDENT-ON* */ + +/************************ SR LocalSID graphs node ****************************/ +/** + * @brief SR localsid node trace + */ +typedef struct +{ + u32 localsid_index; + ip6_address_t src, out_dst; + u8 sr[256]; + u8 num_segments; + u8 segments_left; + //With SRv6 header update include flags here. +} sr_localsid_trace_t; + +#define foreach_sr_localsid_error \ +_(NO_INNER_HEADER, "(SR-Error) No inner IP header") \ +_(NO_MORE_SEGMENTS, "(SR-Error) No more segments") \ +_(NO_SRH, "(SR-Error) No SR header") \ +_(NO_PSP, "(SR-Error) PSP Not available (segments left > 0)") \ +_(NOT_LS, "(SR-Error) Decaps not available (segments left > 0)") \ +_(L2, "(SR-Error) SRv6 decapsulated a L2 frame without dest") + +typedef enum +{ +#define _(sym,str) SR_LOCALSID_ERROR_##sym, + foreach_sr_localsid_error +#undef _ + SR_LOCALSID_N_ERROR, +} sr_localsid_error_t; + +static char *sr_localsid_error_strings[] = { +#define _(sym,string) string, + foreach_sr_localsid_error +#undef _ +}; + +#define foreach_sr_localsid_next \ +_(ERROR, "error-drop") \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(IP4_LOOKUP, "ip4-lookup") \ +_(IP6_REWRITE, "ip6-rewrite") \ +_(IP4_REWRITE, "ip4-rewrite") \ +_(INTERFACE_OUTPUT, "interface-output") + +typedef enum +{ +#define _(s,n) SR_LOCALSID_NEXT_##s, + foreach_sr_localsid_next +#undef _ + SR_LOCALSID_N_NEXT, +} sr_localsid_next_t; + +/** + * @brief SR LocalSID graph node trace function + * + * @see sr_localsid + */ +u8 * +format_sr_localsid_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_sr_main_t *sm = &sr_main; + sr_localsid_trace_t *t = va_arg (*args, sr_localsid_trace_t *); + + ip6_sr_localsid_t *ls = + pool_elt_at_index (sm->localsids, t->localsid_index); + + s = + format (s, "SR-LOCALSID:\n\tLocalsid: %U\n", format_ip6_address, + &ls->localsid); + switch (ls->behavior) + { + case SR_BEHAVIOR_END: + s = format (s, "\tBehavior: End\n"); + break; + case SR_BEHAVIOR_DX6: + s = format (s, "\tBehavior: Decapsulation with IPv6 L3 xconnect\n"); + break; + case SR_BEHAVIOR_DX4: + s = format (s, "\tBehavior: Decapsulation with IPv4 L3 xconnect\n"); + break; + case SR_BEHAVIOR_X: + s = format (s, "\tBehavior: IPv6 L3 xconnect\n"); + break; + case SR_BEHAVIOR_DT6: + s = format (s, "\tBehavior: Decapsulation with IPv6 Table lookup\n"); + break; + case SR_BEHAVIOR_DT4: + s = format (s, "\tBehavior: Decapsulation with IPv4 Table lookup\n"); + break; + case SR_BEHAVIOR_DX2: + s = format (s, "\tBehavior: Decapsulation with L2 xconnect\n"); + break; + default: + s = format (s, "\tBehavior: defined in plugin\n"); //TODO + break; + } + if (t->num_segments != 0xFF) + { + if (t->num_segments > 0) + { + s = format (s, "\tSegments left: %d\n", t->num_segments); + s = format (s, "\tSID list: [in ietf order]"); + int i = 0; + for (i = 0; i < t->num_segments; i++) + { + s = format (s, "\n\t-> %U", format_ip6_address, + (ip6_address_t *) & t->sr[i * + sizeof (ip6_address_t)]); + } + } + } + return s; +} + +/** + * @brief Function doing End processing. + */ +static_always_inline void +end_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_sr_header_t * sr0, + ip6_sr_localsid_t * ls0, u32 * next0) +{ + ip6_address_t *new_dst0; + + if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR)) + { + if (PREDICT_TRUE (sr0->segments_left != 0)) + { + sr0->segments_left -= 1; + new_dst0 = (ip6_address_t *) (sr0->segments); + new_dst0 += sr0->segments_left; + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + + if (ls0->behavior == SR_BEHAVIOR_X) + { + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; + } + } + else + { + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NO_MORE_SEGMENTS]; + } + } + else + { + /* Error. Routing header of type != SR */ + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NO_SRH]; + } +} + +/* + * @brief Function doing SRH processing for D* variants + */ +//FixME. I must crosscheck that next_proto matches the localsid +static_always_inline void +end_decaps_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_sr_header_t * sr0, + ip6_sr_localsid_t * ls0, u32 * next0) +{ + /* Compute the size of the IPv6 header with all Ext. headers */ + u8 next_proto; + ip6_ext_header_t *next_ext_header; + u16 total_size = 0; + + next_proto = ip0->protocol; + next_ext_header = (void *) (ip0 + 1); + total_size = sizeof (ip6_header_t); + while (ip6_ext_hdr (next_proto)) + { + total_size += ip6_ext_header_len (next_ext_header); + next_proto = next_ext_header->next_hdr; + next_ext_header = ip6_ext_next_header (next_ext_header); + } + + /* Ensure this is the last segment. Otherwise drop. */ + if (sr0 && sr0->segments_left != 0) + { + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NOT_LS]; + return; + } + + switch (next_proto) + { + case IP_PROTOCOL_IPV6: + /* Encap-End IPv6. Pop outer IPv6 header. */ + if (ls0->behavior == SR_BEHAVIOR_DX6) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; + return; + } + else if (ls0->behavior == SR_BEHAVIOR_DT6) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table; + return; + } + break; + case IP_PROTOCOL_IP_IN_IP: + /* Encap-End IPv4. Pop outer IPv6 header */ + if (ls0->behavior == SR_BEHAVIOR_DX4) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP4_REWRITE; + return; + } + else if (ls0->behavior == SR_BEHAVIOR_DT4) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table; + *next0 = SR_LOCALSID_NEXT_IP4_LOOKUP; + return; + } + break; + case IP_PROTOCOL_IP6_NONXT: + /* L2 encaps */ + if (ls0->behavior == SR_BEHAVIOR_DX2) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->sw_if_index; + *next0 = SR_LOCALSID_NEXT_INTERFACE_OUTPUT; + return; + } + break; + } + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NO_INNER_HEADER]; + return; +} + +/** + * @brief Function doing End processing with PSP + */ +static_always_inline void +end_psp_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_ext_header_t * prev0, + ip6_sr_header_t * sr0, + ip6_sr_localsid_t * ls0, u32 * next0) +{ + u32 new_l0, sr_len; + + if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR)) + { + if (PREDICT_TRUE (sr0->segments_left == 1)) + { + ip0->dst_address.as_u64[0] = sr0->segments->as_u64[0]; + ip0->dst_address.as_u64[1] = sr0->segments->as_u64[1]; + + /* Remove the SRH taking care of the rest of IPv6 ext header */ + if (prev0) + prev0->next_hdr = sr0->protocol; + else + ip0->protocol = sr0->protocol; + + sr_len = ip6_ext_header_len (sr0); + vlib_buffer_advance (b0, sr_len); + new_l0 = clib_net_to_host_u16 (ip0->payload_length) - sr_len; + ip0->payload_length = clib_host_to_net_u16 (new_l0); + clib_memcpy ((void *) ip0 + sr_len, ip0, + (void *) sr0 - (void *) ip0); + + if (ls0->behavior == SR_BEHAVIOR_X) + { + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; + } + return; + } + } + /* Error. Routing header of type != SR */ + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NO_PSP]; +} + +/** + * @brief SR LocalSID graph node. Supports all default SR Endpoint variants + */ +static uword +sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, *from, *to_next; + ip6_sr_main_t *sm = &sr_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + u32 cpu_index = os_get_cpu_number (); + + while (n_left_from > 0) + { + u32 n_left_to_next; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_ext_header_t *prev0, *prev1, *prev2, *prev3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP; + ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+4 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + ls0 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls1 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls2 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls3 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE); + + end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0); + end_decaps_srh_processing (node, b1, ip1, sr1, ls1, &next1); + end_decaps_srh_processing (node, b2, ip2, sr2, ls2, &next2); + end_decaps_srh_processing (node, b3, ip3, sr3, ls3, &next3); + + //TODO: trace. + + vlib_increment_combined_counter + (((next0 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b0)); + + vlib_increment_combined_counter + (((next1 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls1 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b1)); + + vlib_increment_combined_counter + (((next2 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls2 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b2)); + + vlib_increment_combined_counter + (((next3 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls3 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b3)); + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0; + ip6_ext_header_t *prev0; + ip6_sr_header_t *sr0; + u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP; + ip6_sr_localsid_t *ls0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* Lookup the SR End behavior based on IP DA (adj) */ + ls0 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + /* Find SRH as well as previous header */ + ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); + + /* SRH processing and End variants */ + end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_localsid_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->num_segments = 0; + tr->localsid_index = ls0 - sm->localsids; + + if (ip0 == vlib_buffer_get_current (b0)) + { + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->out_dst.as_u8)); + if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE + && sr0->type == ROUTING_HEADER_TYPE_SR) + { + clib_memcpy (tr->sr, sr0->segments, sr0->length * 8); + tr->num_segments = + sr0->length * 8 / sizeof (ip6_address_t); + tr->segments_left = sr0->segments_left; + } + } + else + tr->num_segments = 0xFF; + } + + /* Increase the counters */ + vlib_increment_combined_counter + (((next0 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_localsid_d_node) = { + .function = sr_localsid_d_fn, + .name = "sr-localsid-d", + .vector_size = sizeof (u32), + .format_trace = format_sr_localsid_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_LOCALSID_N_ERROR, + .error_strings = sr_localsid_error_strings, + .n_next_nodes = SR_LOCALSID_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_LOCALSID_NEXT_##s] = n, + foreach_sr_localsid_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief SR LocalSID graph node. Supports all default SR Endpoint variants + */ +static uword +sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, *from, *to_next; + ip6_sr_main_t *sm = &sr_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + u32 cpu_index = os_get_cpu_number (); + + while (n_left_from > 0) + { + u32 n_left_to_next; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_ext_header_t *prev0, *prev1, *prev2, *prev3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP; + ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE); + + ls0 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls1 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls2 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls3 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + if (ls0->end_psp) + end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0); + else + end_srh_processing (node, b0, ip0, sr0, ls0, &next0); + + if (ls1->end_psp) + end_psp_srh_processing (node, b1, ip1, prev1, sr1, ls1, &next1); + else + end_srh_processing (node, b1, ip1, sr1, ls1, &next1); + + if (ls2->end_psp) + end_psp_srh_processing (node, b2, ip2, prev2, sr2, ls2, &next2); + else + end_srh_processing (node, b2, ip2, sr2, ls2, &next2); + + if (ls3->end_psp) + end_psp_srh_processing (node, b3, ip3, prev3, sr3, ls3, &next3); + else + end_srh_processing (node, b3, ip3, sr3, ls3, &next3); + + //TODO: proper trace. + + vlib_increment_combined_counter + (((next0 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b0)); + + vlib_increment_combined_counter + (((next1 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls1 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b1)); + + vlib_increment_combined_counter + (((next2 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls2 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b2)); + + vlib_increment_combined_counter + (((next3 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls3 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b3)); + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_ext_header_t *prev0; + ip6_sr_header_t *sr0; + u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP; + ip6_sr_localsid_t *ls0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); + + /* Lookup the SR End behavior based on IP DA (adj) */ + ls0 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + /* SRH processing */ + if (ls0->end_psp) + end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0); + else + end_srh_processing (node, b0, ip0, sr0, ls0, &next0); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_localsid_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->num_segments = 0; + tr->localsid_index = ls0 - sm->localsids; + + if (ip0 == vlib_buffer_get_current (b0)) + { + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->out_dst.as_u8)); + if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE + && sr0->type == ROUTING_HEADER_TYPE_SR) + { + clib_memcpy (tr->sr, sr0->segments, sr0->length * 8); + tr->num_segments = + sr0->length * 8 / sizeof (ip6_address_t); + tr->segments_left = sr0->segments_left; + } + } + else + { + tr->num_segments = 0xFF; + } + } + + vlib_increment_combined_counter + (((next0 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_localsid_node) = { + .function = sr_localsid_fn, + .name = "sr-localsid", + .vector_size = sizeof (u32), + .format_trace = format_sr_localsid_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_LOCALSID_N_ERROR, + .error_strings = sr_localsid_error_strings, + .n_next_nodes = SR_LOCALSID_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_LOCALSID_NEXT_##s] = n, + foreach_sr_localsid_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +static u8 * +format_sr_dpo (u8 * s, va_list * args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (*args, u32); + + return (format (s, "SR: localsid_index:[%d]", index)); +} + +const static dpo_vft_t sr_loc_vft = { + .dv_lock = sr_dpo_lock, + .dv_unlock = sr_dpo_unlock, + .dv_format = format_sr_dpo, +}; + +const static char *const sr_loc_ip6_nodes[] = { + "sr-localsid", + NULL, +}; + +const static char *const *const sr_loc_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_loc_ip6_nodes, +}; + +const static char *const sr_loc_d_ip6_nodes[] = { + "sr-localsid-d", + NULL, +}; + +const static char *const *const sr_loc_d_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_loc_d_ip6_nodes, +}; + + +/*************************** SR LocalSID plugins ******************************/ +/** + * @brief SR LocalSID plugin registry + */ +int +sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name, + u8 * keyword_str, u8 * def_str, + u8 * params_str, dpo_type_t * dpo, + format_function_t * ls_format, + unformat_function_t * ls_unformat, + sr_plugin_callback_t * creation_fn, + sr_plugin_callback_t * removal_fn) +{ + ip6_sr_main_t *sm = &sr_main; + uword *p; + + sr_localsid_fn_registration_t *plugin; + + /* Did this function exist? If so update it */ + p = hash_get_mem (sm->plugin_functions_by_key, fn_name); + if (p) + { + plugin = pool_elt_at_index (sm->plugin_functions, p[0]); + } + /* Else create a new one and set hash key */ + else + { + pool_get (sm->plugin_functions, plugin); + hash_set_mem (sm->plugin_functions_by_key, fn_name, + plugin - sm->plugin_functions); + } + + memset (plugin, 0, sizeof (*plugin)); + + plugin->sr_localsid_function_number = (plugin - sm->plugin_functions); + plugin->sr_localsid_function_number += SR_BEHAVIOR_LAST; + plugin->ls_format = ls_format; + plugin->ls_unformat = ls_unformat; + plugin->creation = creation_fn; + plugin->removal = removal_fn; + clib_memcpy (&plugin->dpo, dpo, sizeof (dpo_type_t)); + plugin->function_name = format (0, "%s%c", fn_name, 0); + plugin->keyword_str = format (0, "%s%c", keyword_str, 0); + plugin->def_str = format (0, "%s%c", def_str, 0); + plugin->params_str = format (0, "%s%c", params_str, 0); + + return plugin->sr_localsid_function_number; +} + +/** + * @brief CLI function to 'show' all available SR LocalSID behaviors + */ +static clib_error_t * +show_sr_localsid_behaviors_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + sr_localsid_fn_registration_t *plugin; + sr_localsid_fn_registration_t **plugins_vec = 0; + int i; + + vlib_cli_output (vm, + "SR LocalSIDs behaviors:\n-----------------------\n\n"); + + /* *INDENT-OFF* */ + pool_foreach (plugin, sm->plugin_functions, + ({ vec_add1 (plugins_vec, plugin); })); + /* *INDENT-ON* */ + + /* Print static behaviors */ + vlib_cli_output (vm, "Default behaviors:\n" + "\tEnd\t-> Endpoint.\n" + "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n" + "\t\tParameters: ' '\n" + "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" + "\t\tParameters: ''\n" + "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n" + "\t\tParameters: ' '\n" + "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" + "\t\tParameters: ' '\n" + "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n" + "\t\tParameters: ''\n" + "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n" + "\t\tParameters: ''\n"); + vlib_cli_output (vm, "Plugin behaviors:\n"); + for (i = 0; i < vec_len (plugins_vec); i++) + { + plugin = plugins_vec[i]; + vlib_cli_output (vm, "\t%s\t-> %s.\n", plugin->keyword_str, + plugin->def_str); + vlib_cli_output (vm, "\t\tParameters: '%s'\n", plugin->params_str); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_localsid_behaviors_command, static) = { + .path = "show sr localsids behaviors", + .short_help = "show sr localsids behaviors", + .function = show_sr_localsid_behaviors_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief SR LocalSID initialization + */ +clib_error_t * +sr_localsids_init (vlib_main_t * vm) +{ + /* Init memory for function keys */ + ip6_sr_main_t *sm = &sr_main; + sm->localsids_index_by_key = + hash_create_mem (0, sizeof (ip6_address_t), sizeof (uword)); + /* Init SR behaviors DPO type */ + sr_localsid_dpo_type = dpo_register_new_type (&sr_loc_vft, sr_loc_nodes); + /* Init SR behaviors DPO type */ + sr_localsid_d_dpo_type = + dpo_register_new_type (&sr_loc_vft, sr_loc_d_nodes); + /* Init memory for localsid plugins */ + sm->plugin_functions_by_key = hash_create_string (0, sizeof (uword)); + return 0; +} + +VLIB_INIT_FUNCTION (sr_localsids_init); +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/sr/sr_packet.h b/src/vnet/sr/sr_packet.h old mode 100644 new mode 100755 index 179b94c2..7af4ad4d --- a/src/vnet/sr/sr_packet.h +++ b/src/vnet/sr/sr_packet.h @@ -20,60 +20,36 @@ * limitations under the License. */ -/** - * @file - * @brief The Segment Routing Header (SRH). - * - * The Segment Routing Header (SRH) is defined in the diagram below. - * - * - * 0 1 2 3 - * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | Next Header | Hdr Ext Len | Routing Type | Segments Left | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | First Segment | Flags | HMAC Key ID | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | Segment List[0] (128 bits ipv6 address) | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | | - * ... - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | Segment List[n] (128 bits ipv6 address) | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | Policy List[0] (optional) | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | Policy List[1] (optional) | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | Policy List[2] (optional) | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | | - * | | - * | HMAC (256 bits) | - * | (optional) | - * | | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +/* + * The Segment Routing Header (SRH) is defined as follows: + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Next Header | Hdr Ext Len | Routing Type | Segments Left | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | First Segment | Flags | RESERVED | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Segment List[0] (128 bits IPv6 address) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | | + * ... + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Segment List[n] (128 bits IPv6 address) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * // // + * // Optional Type Length Value objects (variable) // + * // // + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * where: * @@ -87,57 +63,39 @@ * * o Segments Left. Defined in [RFC2460], it contains the index, in * the Segment List, of the next segment to inspect. Segments Left - * is decremented at each segment and it is used as an index in the - * segment list. - * - * o First Segment: offset in the SRH, not including the first 8 octets - * and expressed in 16-octet units, pointing to the last element of - * the segment list, which is in fact the first segment of the - * segment routing path. - * - * o Flags: 16 bits of flags. Following flags are defined: - * - * 1 - * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * |C|P|R|R| Policy Flags | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * - * C-flag: Clean-up flag. Set when the SRH has to be removed from - * the packet when packet reaches the last segment. - * - * P-flag: Protected flag. Set when the packet has been rerouted - * through FRR mechanism by a SR endpoint node. See Section 6.3 - * for more details. - * - * R-flags. Reserved and for future use. - * - * Policy Flags. Define the type of the IPv6 addresses encoded - * into the Policy List (see below). The following have been - * defined: + * is decremented at each segment. * - * Bits 4-6: determine the type of the first element after the - * segment list. + * o First Segment: contains the index, in the Segment List, of the + * first segment of the path which is in fact the last element of the + * Segment List. * - * Bits 7-9: determine the type of the second element. + * o Flags: 8 bits of flags. Following flags are defined: * - * Bits 10-12: determine the type of the third element. + * 0 1 2 3 4 5 6 7 + * +-+-+-+-+-+-+-+-+ + * |U|P|O|A|H| U | + * +-+-+-+-+-+-+-+-+ * - * Bits 13-15: determine the type of the fourth element. + * U: Unused and for future use. SHOULD be unset on transmission + * and MUST be ignored on receipt. * - * The following values are used for the type: + * P-flag: Protected flag. Set when the packet has been rerouted + * through FRR mechanism by an SR endpoint node. * - * 0x0: Not present. If value is set to 0x0, it means the - * element represented by these bits is not present. + * O-flag: OAM flag. When set, it indicates that this packet is + * an operations and management (OAM) packet. * - * 0x1: SR Ingress. + * A-flag: Alert flag. If present, it means important Type Length + * Value (TLV) objects are present. See Section 3.1 for details + * on TLVs objects. * - * 0x2: SR Egress. + * H-flag: HMAC flag. If set, the HMAC TLV is present and is + * encoded as the last TLV of the SRH. In other words, the last + * 36 octets of the SRH represent the HMAC information. See + * Section 3.1.5 for details on the HMAC TLV. * - * 0x3: Original Source Address. - * - * o HMAC Key ID and HMAC field, and their use are defined in - * [I-D.vyncke-6man-segment-routing-security]. + * o RESERVED: SHOULD be unset on transmission and MUST be ignored on + * receipt. * * o Segment List[n]: 128 bit IPv6 addresses representing the nth * segment in the Segment List. The Segment List is encoded starting @@ -147,23 +105,8 @@ * contains the first segment of the path. The index contained in * "Segments Left" identifies the current active segment. * - * o Policy List. Optional addresses representing specific nodes in - * the SR path such as: - * - * SR Ingress: a 128 bit generic identifier representing the - * ingress in the SR domain (i.e.: it needs not to be a valid IPv6 - * address). - * - * SR Egress: a 128 bit generic identifier representing the egress - * in the SR domain (i.e.: it needs not to be a valid IPv6 - * address). - * - * Original Source Address: IPv6 address originally present in the - * SA field of the packet. + * o Type Length Value (TLV) are described in Section 3.1. * - * The segments in the Policy List are encoded after the segment list - * and they are optional. If none are in the SRH, all bits of the - * Policy List Flags MUST be set to 0x0. */ #ifndef IPPROTO_IPV6_ROUTE @@ -171,81 +114,46 @@ #endif #define ROUTING_HEADER_TYPE_SR 4 -/** - @brief SR header struct. -*/ + typedef struct { - /** Protocol for next header. */ + /* Protocol for next header. */ u8 protocol; - - /** + /* * Length of routing header in 8 octet units, * not including the first 8 octets */ u8 length; - /** Type of routing header; type 4 = segement routing */ + /* Type of routing header; type 4 = segement routing */ u8 type; - /** Next segment in the segment list */ + /* Next segment in the segment list */ u8 segments_left; - /** - * Policy list pointer: offset in the SRH of the policy - * list - in 16-octet units - not including the first 8 octets. - */ + /* Pointer to the first segment in the header */ u8 first_segment; - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_CLEANUP (0x8000) - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_PROTECTED (0x4000) - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_RESERVED (0x3000) - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT (0x0) - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE (0x1) - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE (0x2) - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR (0x3) - /** values 0x4 - 0x7 are reserved */ - u16 flags; - u8 hmac_key; + /* Flag bits */ +#define IP6_SR_HEADER_FLAG_PROTECTED (0x40) +#define IP6_SR_HEADER_FLAG_OAM (0x20) +#define IP6_SR_HEADER_FLAG_ALERT (0x10) +#define IP6_SR_HEADER_FLAG_HMAC (0x80) + + /* values 0x0, 0x4 - 0x7 are reserved */ + u8 flags; + u16 reserved; - /** The segment + policy list elts */ + /* The segment elts */ ip6_address_t segments[0]; } __attribute__ ((packed)) ip6_sr_header_t; -static inline int -ip6_sr_policy_list_shift_from_index (int pl_index) -{ - return (-3 * pl_index) + 12; -} - -/** pl_index is one-origined */ -static inline int -ip6_sr_policy_list_flags (u16 flags_host_byte_order, int pl_index) -{ - int shift; - - if (pl_index <= 0 || pl_index > 4) - return 0; - - shift = (-3 * pl_index) + 12; - flags_host_byte_order >>= shift; - - return (flags_host_byte_order & 7); -} +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ #endif /* included_vnet_sr_packet_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/sr/sr_policy_rewrite.c b/src/vnet/sr/sr_policy_rewrite.c new file mode 100755 index 00000000..1f8bdca5 --- /dev/null +++ b/src/vnet/sr/sr_policy_rewrite.c @@ -0,0 +1,3253 @@ +/* + * sr_policy_rewrite.c: ipv6 sr policy creation + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief SR policy creation and application + * + * Create an SR policy. + * An SR policy can be either of 'default' type or 'spray' type + * An SR policy has attached a list of SID lists. + * In case the SR policy is a default one it will load balance among them. + * An SR policy has associated a BindingSID. + * In case any packet arrives with IPv6 DA == BindingSID then the SR policy + * associated to such bindingSID will be applied to such packet. + * + * SR policies can be applied either by using IPv6 encapsulation or + * SRH insertion. Both methods can be found on this file. + * + * Traffic input usually is IPv6 packets. However it is possible to have + * IPv4 packets or L2 frames. (that are encapsulated into IPv6 with SRH) + * + * This file provides the appropiates VPP graph nodes to do any of these + * methods. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/** + * @brief SR policy rewrite trace + */ +typedef struct +{ + ip6_address_t src, dst; +} sr_policy_rewrite_trace_t; + +/* Graph arcs */ +#define foreach_sr_policy_rewrite_next \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(ERROR, "error-drop") + +typedef enum +{ +#define _(s,n) SR_POLICY_REWRITE_NEXT_##s, + foreach_sr_policy_rewrite_next +#undef _ + SR_POLICY_REWRITE_N_NEXT, +} sr_policy_rewrite_next_t; + +/* SR rewrite errors */ +#define foreach_sr_policy_rewrite_error \ +_(INTERNAL_ERROR, "Segment Routing undefined error") \ +_(BSID_ZERO, "BSID with SL = 0") \ +_(COUNTER_TOTAL, "SR steered IPv6 packets") \ +_(COUNTER_ENCAP, "SR: Encaps packets") \ +_(COUNTER_INSERT, "SR: SRH inserted packets") \ +_(COUNTER_BSID, "SR: BindingSID steered packets") + +typedef enum +{ +#define _(sym,str) SR_POLICY_REWRITE_ERROR_##sym, + foreach_sr_policy_rewrite_error +#undef _ + SR_POLICY_REWRITE_N_ERROR, +} sr_policy_rewrite_error_t; + +static char *sr_policy_rewrite_error_strings[] = { +#define _(sym,string) string, + foreach_sr_policy_rewrite_error +#undef _ +}; + +/** + * @brief Dynamically added SR SL DPO type + */ +static dpo_type_t sr_pr_encaps_dpo_type; +static dpo_type_t sr_pr_insert_dpo_type; +static dpo_type_t sr_pr_bsid_encaps_dpo_type; +static dpo_type_t sr_pr_bsid_insert_dpo_type; + +/** + * @brief IPv6 SA for encapsulated packets + */ +static ip6_address_t sr_pr_encaps_src; + +/******************* SR rewrite set encaps IPv6 source addr *******************/ +/* Note: This is temporal. We don't know whether to follow this path or + take the ip address of a loopback interface or even the OIF */ + +static clib_error_t * +set_sr_src_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "addr %U", unformat_ip6_address, &sr_pr_encaps_src)) + return 0; + else + return clib_error_return (0, "No address specified"); + } + return clib_error_return (0, "No address specified"); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_sr_src_command, static) = { + .path = "set sr encaps source", + .short_help = "set sr encaps source addr ", + .function = set_sr_src_command_fn, +}; +/* *INDENT-ON* */ + +/*********************** SR rewrite string computation ************************/ +/** + * @brief SR rewrite string computation for IPv6 encapsulation (inline) + * + * @param sl is a vector of IPv6 addresses composing the Segment List + * + * @return precomputed rewrite string for encapsulation + */ +static inline u8 * +compute_rewrite_encaps (ip6_address_t * sl) +{ + ip6_header_t *iph; + ip6_sr_header_t *srh; + ip6_address_t *addrp, *this_address; + u32 header_length = 0; + u8 *rs = NULL; + + header_length = 0; + header_length += IPv6_DEFAULT_HEADER_LENGTH; + if (vec_len (sl) > 1) + { + header_length += sizeof (ip6_sr_header_t); + header_length += vec_len (sl) * sizeof (ip6_address_t); + } + + vec_validate (rs, header_length - 1); + + iph = (ip6_header_t *) rs; + iph->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0 | ((6 & 0xF) << 28)); + iph->src_address.as_u64[0] = sr_pr_encaps_src.as_u64[0]; + iph->src_address.as_u64[1] = sr_pr_encaps_src.as_u64[1]; + iph->payload_length = header_length - IPv6_DEFAULT_HEADER_LENGTH; + iph->protocol = IP_PROTOCOL_IPV6; + iph->hop_limit = IPv6_DEFAULT_HOP_LIMIT; + + srh = (ip6_sr_header_t *) (iph + 1); + iph->protocol = IP_PROTOCOL_IPV6_ROUTE; + srh->protocol = IP_PROTOCOL_IPV6; + srh->type = ROUTING_HEADER_TYPE_SR; + srh->segments_left = vec_len (sl) - 1; + srh->first_segment = vec_len (sl) - 1; + srh->length = ((sizeof (ip6_sr_header_t) + + (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1; + srh->flags = 0x00; + srh->reserved = 0x00; + addrp = srh->segments + vec_len (sl) - 1; + vec_foreach (this_address, sl) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + iph->dst_address.as_u64[0] = sl->as_u64[0]; + iph->dst_address.as_u64[1] = sl->as_u64[1]; + return rs; +} + +/** + * @brief SR rewrite string computation for SRH insertion (inline) + * + * @param sl is a vector of IPv6 addresses composing the Segment List + * + * @return precomputed rewrite string for SRH insertion + */ +static inline u8 * +compute_rewrite_insert (ip6_address_t * sl) +{ + ip6_sr_header_t *srh; + ip6_address_t *addrp, *this_address; + u32 header_length = 0; + u8 *rs = NULL; + + header_length = 0; + header_length += sizeof (ip6_sr_header_t); + header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t); + + vec_validate (rs, header_length - 1); + + srh = (ip6_sr_header_t *) rs; + srh->type = ROUTING_HEADER_TYPE_SR; + srh->segments_left = vec_len (sl); + srh->first_segment = vec_len (sl); + srh->length = ((sizeof (ip6_sr_header_t) + + ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1; + srh->flags = 0x00; + srh->reserved = 0x0000; + addrp = srh->segments + vec_len (sl); + vec_foreach (this_address, sl) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + return rs; +} + +/** + * @brief SR rewrite string computation for SRH insertion with BSID (inline) + * + * @param sl is a vector of IPv6 addresses composing the Segment List + * + * @return precomputed rewrite string for SRH insertion with BSID + */ +static inline u8 * +compute_rewrite_bsid (ip6_address_t * sl) +{ + ip6_sr_header_t *srh; + ip6_address_t *addrp, *this_address; + u32 header_length = 0; + u8 *rs = NULL; + + header_length = 0; + header_length += sizeof (ip6_sr_header_t); + header_length += vec_len (sl) * sizeof (ip6_address_t); + + vec_validate (rs, header_length - 1); + + srh = (ip6_sr_header_t *) rs; + srh->type = ROUTING_HEADER_TYPE_SR; + srh->segments_left = vec_len (sl) - 1; + srh->first_segment = vec_len (sl) - 1; + srh->length = ((sizeof (ip6_sr_header_t) + + (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1; + srh->flags = 0x00; + srh->reserved = 0x0000; + addrp = srh->segments + vec_len (sl) - 1; + vec_foreach (this_address, sl) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + return rs; +} + +/*************************** SR LB helper functions **************************/ +/** + * @brief Creates a Segment List and adds it to an SR policy + * + * Creates a Segment List and adds it to the SR policy. Notice that the SL are + * not necessarily unique. Hence there might be two Segment List within the + * same SR Policy with exactly the same segments and same weight. + * + * @param sr_policy is the SR policy where the SL will be added + * @param sl is a vector of IPv6 addresses composing the Segment List + * @param weight is the weight of the SegmentList (for load-balancing purposes) + * @param is_encap represents the mode (SRH insertion vs Encapsulation) + * + * @return pointer to the just created segment list + */ +static inline ip6_sr_sl_t * +create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight, + u8 is_encap) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_sl_t *segment_list; + + pool_get (sm->sid_lists, segment_list); + memset (segment_list, 0, sizeof (*segment_list)); + + vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists); + + /* Fill in segment list */ + segment_list->weight = + (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT); + segment_list->segments = vec_dup (sl); + + if (is_encap) + { + segment_list->rewrite = compute_rewrite_encaps (sl); + segment_list->rewrite_bsid = segment_list->rewrite; + } + else + { + segment_list->rewrite = compute_rewrite_insert (sl); + segment_list->rewrite_bsid = compute_rewrite_bsid (sl); + } + + /* Create DPO */ + dpo_reset (&segment_list->bsid_dpo); + dpo_reset (&segment_list->ip6_dpo); + dpo_reset (&segment_list->ip4_dpo); + + if (is_encap) + { + dpo_set (&segment_list->ip6_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP6, + segment_list - sm->sid_lists); + dpo_set (&segment_list->ip4_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP4, + segment_list - sm->sid_lists); + dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_encaps_dpo_type, + DPO_PROTO_IP6, segment_list - sm->sid_lists); + } + else + { + dpo_set (&segment_list->ip6_dpo, sr_pr_insert_dpo_type, DPO_PROTO_IP6, + segment_list - sm->sid_lists); + dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_insert_dpo_type, + DPO_PROTO_IP6, segment_list - sm->sid_lists); + } + + return segment_list; +} + +/** + * @brief Updates the Load Balancer after an SR Policy change + * + * @param sr_policy is the modified SR Policy + */ +static inline void +update_lb (ip6_sr_policy_t * sr_policy) +{ + flow_hash_config_t fhc; + u32 *sl_index; + ip6_sr_sl_t *segment_list; + ip6_sr_main_t *sm = &sr_main; + load_balance_path_t path; + load_balance_path_t *ip4_path_vector = 0; + load_balance_path_t *ip6_path_vector = 0; + load_balance_path_t *b_path_vector = 0; + + /* In case LB does not exist, create it */ + if (!dpo_id_is_valid (&sr_policy->bsid_dpo)) + { + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = sr_policy->bsid, + } + }; + + /* Add FIB entry for BSID */ + fhc = fib_table_get_flow_hash_config (sr_policy->fib_table, + dpo_proto_to_fib (DPO_PROTO_IP6)); + + dpo_set (&sr_policy->bsid_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6, + load_balance_create (0, DPO_PROTO_IP6, fhc)); + + dpo_set (&sr_policy->ip6_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6, + load_balance_create (0, DPO_PROTO_IP6, fhc)); + + /* Update FIB entry's to point to the LB DPO in the main FIB and hidden one */ + fib_table_entry_special_dpo_update (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP6, + sr_policy->fib_table), &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->bsid_dpo); + + fib_table_entry_special_dpo_update (sm->fib_table_ip6, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->ip6_dpo); + + if (sr_policy->is_encap) + { + dpo_set (&sr_policy->ip4_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP4, + load_balance_create (0, DPO_PROTO_IP4, fhc)); + + fib_table_entry_special_dpo_update (sm->fib_table_ip4, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->ip4_dpo); + } + + } + + /* Create the LB path vector */ + //path_vector = vec_new(load_balance_path_t, vec_len(sr_policy->segments_lists)); + vec_foreach (sl_index, sr_policy->segments_lists) + { + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + path.path_dpo = segment_list->bsid_dpo; + path.path_weight = segment_list->weight; + vec_add1 (b_path_vector, path); + path.path_dpo = segment_list->ip6_dpo; + vec_add1 (ip6_path_vector, path); + if (sr_policy->is_encap) + { + path.path_dpo = segment_list->ip4_dpo; + vec_add1 (ip4_path_vector, path); + } + } + + /* Update LB multipath */ + load_balance_multipath_update (&sr_policy->bsid_dpo, b_path_vector, + LOAD_BALANCE_FLAG_NONE); + load_balance_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector, + LOAD_BALANCE_FLAG_NONE); + if (sr_policy->is_encap) + load_balance_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector, + LOAD_BALANCE_FLAG_NONE); + + /* Cleanup */ + vec_free (b_path_vector); + vec_free (ip6_path_vector); + vec_free (ip4_path_vector); + +} + +/** + * @brief Updates the Replicate DPO after an SR Policy change + * + * @param sr_policy is the modified SR Policy (type spray) + */ +static inline void +update_replicate (ip6_sr_policy_t * sr_policy) +{ + u32 *sl_index; + ip6_sr_sl_t *segment_list; + ip6_sr_main_t *sm = &sr_main; + load_balance_path_t path; + load_balance_path_t *b_path_vector = 0; + load_balance_path_t *ip6_path_vector = 0; + load_balance_path_t *ip4_path_vector = 0; + + /* In case LB does not exist, create it */ + if (!dpo_id_is_valid (&sr_policy->bsid_dpo)) + { + dpo_set (&sr_policy->bsid_dpo, DPO_REPLICATE, + DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6)); + + dpo_set (&sr_policy->ip6_dpo, DPO_REPLICATE, + DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6)); + + /* Update FIB entry's DPO to point to SR without LB */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = sr_policy->bsid, + } + }; + fib_table_entry_special_dpo_update (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP6, + sr_policy->fib_table), &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->bsid_dpo); + + fib_table_entry_special_dpo_update (sm->fib_table_ip6, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->ip6_dpo); + + if (sr_policy->is_encap) + { + dpo_set (&sr_policy->ip4_dpo, DPO_REPLICATE, DPO_PROTO_IP4, + replicate_create (0, DPO_PROTO_IP4)); + + fib_table_entry_special_dpo_update (sm->fib_table_ip4, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->ip4_dpo); + } + + } + + /* Create the replicate path vector */ + path.path_weight = 1; + vec_foreach (sl_index, sr_policy->segments_lists) + { + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + path.path_dpo = segment_list->bsid_dpo; + vec_add1 (b_path_vector, path); + path.path_dpo = segment_list->ip6_dpo; + vec_add1 (ip6_path_vector, path); + if (sr_policy->is_encap) + { + path.path_dpo = segment_list->ip4_dpo; + vec_add1 (ip4_path_vector, path); + } + } + + /* Update replicate multipath */ + replicate_multipath_update (&sr_policy->bsid_dpo, b_path_vector); + replicate_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector); + if (sr_policy->is_encap) + replicate_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector); + + /* Cleanup */ + vec_free (b_path_vector); + vec_free (ip6_path_vector); + vec_free (ip4_path_vector); +} + +/******************************* SR rewrite API *******************************/ +/* Three functions for handling sr policies: + * -> sr_policy_add + * -> sr_policy_del + * -> sr_policy_mod + * All of them are API. CLI function on sr_policy_command_fn */ + +/** + * @brief Create a new SR policy + * + * @param bsid is the bindingSID of the SR Policy + * @param segments is a vector of IPv6 address composing the segment list + * @param weight is the weight of the sid list. optional. + * @param behavior is the behavior of the SR policy. (default//spray) + * @param fib_table is the VRF where to install the FIB entry for the BSID + * @param is_encap (bool) whether SR policy should behave as Encap/SRH Insertion + * + * @return 0 if correct, else error + */ +int +sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments, + u32 weight, u8 behavior, u32 fib_table, u8 is_encap) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_policy_t *sr_policy = 0; + ip6_address_t *key_copy; + uword *p; + + /* Search for existing keys (BSID) */ + p = hash_get_mem (sm->sr_policy_index_by_key, bsid); + if (p) + { + /* Add SR policy that already exists; complain */ + return -12; + } + + /* Search collision in FIB entries */ + /* Explanation: It might be possible that some other entity has already + * created a route for the BSID. This in theory is impossible, but in + * practise we could see it. Assert it and scream if needed */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = *bsid, + } + }; + + /* Lookup the FIB index associated to the table selected */ + u32 fib_index = fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, + (fib_table != + (u32) ~ 0 ? fib_table : 0)); + if (fib_index == ~0) + return -13; + + /* Lookup whether there exists an entry for the BSID */ + fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx); + if (FIB_NODE_INDEX_INVALID != fei) + return -12; //There is an entry for such lookup + + /* Add an SR policy object */ + pool_get (sm->sr_policies, sr_policy); + memset (sr_policy, 0, sizeof (*sr_policy)); + clib_memcpy (&sr_policy->bsid, bsid, sizeof (ip6_address_t)); + sr_policy->type = behavior; + sr_policy->fib_table = (fib_table != (u32) ~ 0 ? fib_table : 0); //Is default FIB 0 ? + sr_policy->is_encap = is_encap; + + /* Copy the key */ + key_copy = vec_new (ip6_address_t, 1); + clib_memcpy (key_copy, bsid, sizeof (ip6_address_t)); + hash_set_mem (sm->sr_policy_index_by_key, key_copy, + sr_policy - sm->sr_policies); + + /* Create a segment list and add the index to the SR policy */ + create_sl (sr_policy, segments, weight, is_encap); + + /* If FIB doesnt exist, create them */ + if (sm->fib_table_ip6 == (u32) ~ 0) + { + sm->fib_table_ip6 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, + "SRv6 steering of IP6 prefixes through BSIDs"); + sm->fib_table_ip4 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, + "SRv6 steering of IP4 prefixes through BSIDs"); + fib_table_flush (sm->fib_table_ip6, FIB_PROTOCOL_IP6, + FIB_SOURCE_SPECIAL); + fib_table_flush (sm->fib_table_ip4, FIB_PROTOCOL_IP6, + FIB_SOURCE_SPECIAL); + } + + /* Create IPv6 FIB for the BindingSID attached to the DPO of the only SL */ + if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) + update_lb (sr_policy); + else if (sr_policy->type == SR_POLICY_TYPE_SPRAY) + update_replicate (sr_policy); + return 0; +} + +/** + * @brief Delete a SR policy + * + * @param bsid is the bindingSID of the SR Policy + * @param index is the index of the SR policy + * + * @return 0 if correct, else error + */ +int +sr_policy_del (ip6_address_t * bsid, u32 index) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_policy_t *sr_policy = 0; + ip6_sr_sl_t *segment_list; + ip6_address_t *key_copy; + u32 *sl_index; + uword *p; + + hash_pair_t *hp; + if (bsid) + { + p = hash_get_mem (sm->sr_policy_index_by_key, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -1; + } + else + { + sr_policy = pool_elt_at_index (sm->sr_policies, index); + if (!sr_policy) + return -1; + } + + /* Remove BindingSID FIB entry */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = sr_policy->bsid, + } + , + }; + + fib_table_entry_special_remove (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP6, sr_policy->fib_table), + &pfx, FIB_SOURCE_SR); + + fib_table_entry_special_remove (sm->fib_table_ip6, &pfx, FIB_SOURCE_SR); + + if (sr_policy->is_encap) + fib_table_entry_special_remove (sm->fib_table_ip4, &pfx, FIB_SOURCE_SR); + + if (dpo_id_is_valid (&sr_policy->bsid_dpo)) + { + dpo_reset (&sr_policy->bsid_dpo); + dpo_reset (&sr_policy->ip4_dpo); + dpo_reset (&sr_policy->ip6_dpo); + } + + /* Clean SID Lists */ + vec_foreach (sl_index, sr_policy->segments_lists) + { + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + vec_free (segment_list->segments); + vec_free (segment_list->rewrite); + vec_free (segment_list->rewrite_bsid); + pool_put_index (sm->sid_lists, *sl_index); + } + + /* Remove SR policy entry */ + hp = hash_get_pair (sm->sr_policy_index_by_key, &sr_policy->bsid); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->sr_policy_index_by_key, &sr_policy->bsid); + vec_free (key_copy); + pool_put (sm->sr_policies, sr_policy); + + /* If FIB empty unlock it */ + if (!pool_elts (sm->sr_policies)) + { + fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6); + fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6); + sm->fib_table_ip6 = (u32) ~ 0; + sm->fib_table_ip4 = (u32) ~ 0; + } + + return 0; +} + +/** + * @brief Modify an existing SR policy + * + * The possible modifications are adding a new Segment List, modifying an + * existing Segment List (modify the weight only) and delete a given + * Segment List from the SR Policy. + * + * @param bsid is the bindingSID of the SR Policy + * @param index is the index of the SR policy + * @param fib_table is the VRF where to install the FIB entry for the BSID + * @param operation is the operation to perform (among the top ones) + * @param segments is a vector of IPv6 address composing the segment list + * @param sl_index is the index of the Segment List to modify/delete + * @param weight is the weight of the sid list. optional. + * @param is_encap Mode. Encapsulation or SRH insertion. + * + * @return 0 if correct, else error + */ +int +sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table, + u8 operation, ip6_address_t * segments, u32 sl_index, + u32 weight) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_policy_t *sr_policy = 0; + ip6_sr_sl_t *segment_list; + u32 *sl_index_iterate; + uword *p; + + if (bsid) + { + p = hash_get_mem (sm->sr_policy_index_by_key, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -1; + } + else + { + sr_policy = pool_elt_at_index (sm->sr_policies, index); + if (!sr_policy) + return -1; + } + + if (operation == 1) /* Add SR List to an existing SR policy */ + { + /* Create the new SL */ + segment_list = + create_sl (sr_policy, segments, weight, sr_policy->is_encap); + + /* Create a new LB DPO */ + if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) + update_lb (sr_policy); + else if (sr_policy->type == SR_POLICY_TYPE_SPRAY) + update_replicate (sr_policy); + } + else if (operation == 2) /* Delete SR List from an existing SR policy */ + { + /* Check that currently there are more than one SID list */ + if (vec_len (sr_policy->segments_lists) == 1) + return -21; + + /* Check that the SR list does exist and is assigned to the sr policy */ + vec_foreach (sl_index_iterate, sr_policy->segments_lists) + if (*sl_index_iterate == sl_index) + break; + + if (*sl_index_iterate != sl_index) + return -22; + + /* Remove the lucky SR list that is being kicked out */ + segment_list = pool_elt_at_index (sm->sid_lists, sl_index); + vec_free (segment_list->segments); + vec_free (segment_list->rewrite); + vec_free (segment_list->rewrite_bsid); + pool_put_index (sm->sid_lists, sl_index); + vec_del1 (sr_policy->segments_lists, + sl_index_iterate - sr_policy->segments_lists); + + /* Create a new LB DPO */ + if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) + update_lb (sr_policy); + else if (sr_policy->type == SR_POLICY_TYPE_SPRAY) + update_replicate (sr_policy); + } + else if (operation == 3) /* Modify the weight of an existing SR List */ + { + /* Find the corresponding SL */ + vec_foreach (sl_index_iterate, sr_policy->segments_lists) + if (*sl_index_iterate == sl_index) + break; + + if (*sl_index_iterate != sl_index) + return -32; + + /* Change the weight */ + segment_list = pool_elt_at_index (sm->sid_lists, sl_index); + segment_list->weight = weight; + + /* Update LB */ + if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) + update_lb (sr_policy); + } + else /* Incorrect op. */ + return -1; + + return 0; +} + +/** + * @brief CLI for 'sr policies' command family + */ +static clib_error_t * +sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int rv = -1; + char is_del = 0, is_add = 0, is_mod = 0; + char policy_set = 0; + ip6_address_t bsid, next_address; + u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0; + u32 weight = (u32) ~ 0, fib_table = (u32) ~ 0; + ip6_address_t *segments = 0, *this_seg; + u8 operation = 0; + char is_encap = 1; + char is_spray = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (!is_add && !is_mod && !is_del && unformat (input, "add")) + is_add = 1; + else if (!is_add && !is_mod && !is_del && unformat (input, "del")) + is_del = 1; + else if (!is_add && !is_mod && !is_del && unformat (input, "mod")) + is_mod = 1; + else if (!policy_set + && unformat (input, "bsid %U", unformat_ip6_address, &bsid)) + policy_set = 1; + else if (!is_add && !policy_set + && unformat (input, "index %d", &sr_policy_index)) + policy_set = 1; + else if (unformat (input, "weight %d", &weight)); + else + if (unformat (input, "next %U", unformat_ip6_address, &next_address)) + { + vec_add2 (segments, this_seg, 1); + clib_memcpy (this_seg->as_u8, next_address.as_u8, + sizeof (*this_seg)); + } + else if (unformat (input, "add sl")) + operation = 1; + else if (unformat (input, "del sl index %d", &sl_index)) + operation = 2; + else if (unformat (input, "mod sl index %d", &sl_index)) + operation = 3; + else if (fib_table == (u32) ~ 0 + && unformat (input, "fib-table %d", &fib_table)); + else if (unformat (input, "encap")) + is_encap = 1; + else if (unformat (input, "insert")) + is_encap = 0; + else if (unformat (input, "spray")) + is_spray = 1; + else + break; + } + + if (!is_add && !is_mod && !is_del) + return clib_error_return (0, "Incorrect CLI"); + + if (!policy_set) + return clib_error_return (0, "No SR policy BSID or index specified"); + + if (is_add) + { + if (vec_len (segments) == 0) + return clib_error_return (0, "No Segment List specified"); + rv = sr_policy_add (&bsid, segments, weight, + (is_spray ? SR_POLICY_TYPE_SPRAY : + SR_POLICY_TYPE_DEFAULT), fib_table, is_encap); + } + else if (is_del) + rv = sr_policy_del ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid), + sr_policy_index); + else if (is_mod) + { + if (!operation) + return clib_error_return (0, "No SL modification specified"); + if (operation != 1 && sl_index == (u32) ~ 0) + return clib_error_return (0, "No Segment List index specified"); + if (operation == 1 && vec_len (segments) == 0) + return clib_error_return (0, "No Segment List specified"); + if (operation == 3 && weight == (u32) ~ 0) + return clib_error_return (0, "No new weight for the SL specified"); + rv = sr_policy_mod ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid), + sr_policy_index, fib_table, operation, segments, + sl_index, weight); + } + + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -12: + return clib_error_return (0, + "There is already a FIB entry for the BindingSID address.\n" + "The SR policy could not be created."); + case -13: + return clib_error_return (0, "The specified FIB table does not exist."); + case -21: + return clib_error_return (0, + "The selected SR policy only contains ONE segment list. " + "Please remove the SR policy instead"); + case -22: + return clib_error_return (0, + "Could not delete the segment list. " + "It is not associated with that SR policy."); + case -32: + return clib_error_return (0, + "Could not modify the segment list. " + "The given SL is not associated with such SR policy."); + default: + return clib_error_return (0, "BUG: sr policy returns %d", rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_policy_command, static) = { + .path = "sr policy", + .short_help = "sr policy [add||del||mod] [bsid 2001::1||index 5] " + "next A:: next B:: next C:: (weight 1) (fib-table 2) (encap|insert)", + .long_help = + "Manipulation of SR policies.\n" + "A Segment Routing policy may contain several SID lists. Each SID list has\n" + "an associated weight (default 1), which will result in wECMP (uECMP).\n" + "Segment Routing policies might be of type encapsulation or srh insertion\n" + "Each SR policy will be associated with a unique BindingSID.\n" + "A BindingSID is a locally allocated SegmentID. For every packet that arrives\n" + "with IPv6_DA:BSID such traffic will be steered into the SR policy.\n" + "The add command will create a SR policy with its first segment list (sl)\n" + "The mod command allows you to add, remove, or modify the existing segment lists\n" + "within an SR policy.\n" + "The del command allows you to delete a SR policy along with all its associated\n" + "SID lists.\n", + .function = sr_policy_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief CLI to display onscreen all the SR policies + */ +static clib_error_t * +show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + u32 *sl_index; + ip6_sr_sl_t *segment_list = 0; + ip6_sr_policy_t *sr_policy = 0; + ip6_sr_policy_t **vec_policies = 0; + ip6_address_t *addr; + u8 *s; + int i = 0; + + vlib_cli_output (vm, "SR policies:"); + + /* *INDENT-OFF* */ + pool_foreach (sr_policy, sm->sr_policies, + {vec_add1 (vec_policies, sr_policy); } ); + /* *INDENT-ON* */ + + vec_foreach_index (i, vec_policies) + { + sr_policy = vec_policies[i]; + vlib_cli_output (vm, "[%u].-\tBSID: %U", + (u32) (sr_policy - sm->sr_policies), + format_ip6_address, &sr_policy->bsid); + vlib_cli_output (vm, "\tBehavior: %s", + (sr_policy->is_encap ? "Encapsulation" : + "SRH insertion")); + vlib_cli_output (vm, "\tType: %s", + (sr_policy->type == + SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray")); + vlib_cli_output (vm, "\tFIB table: %u", + (sr_policy->fib_table != + (u32) ~ 0 ? sr_policy->fib_table : 0)); + vlib_cli_output (vm, "\tSegment Lists:"); + vec_foreach (sl_index, sr_policy->segments_lists) + { + s = NULL; + s = format (s, "\t[%u].- ", *sl_index); + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + s = format (s, "< "); + vec_foreach (addr, segment_list->segments) + { + s = format (s, "%U, ", format_ip6_address, addr); + } + s = format (s, "\b\b > "); + s = format (s, "weight: %u", segment_list->weight); + vlib_cli_output (vm, " %s", s); + } + vlib_cli_output (vm, "-----------"); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_policies_command, static) = { + .path = "show sr policies", + .short_help = "show sr policies", + .function = show_sr_policies_command_fn, +}; +/* *INDENT-ON* */ + +/*************************** SR rewrite graph node ****************************/ +/** + * @brief Trace for the SR Policy Rewrite graph node + */ +static u8 * +format_sr_policy_rewrite_trace (u8 * s, va_list * args) +{ + //TODO + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sr_policy_rewrite_trace_t *t = va_arg (*args, sr_policy_rewrite_trace_t *); + + s = format + (s, "SR-policy-rewrite: src %U dst %U", + format_ip6_address, &t->src, format_ip6_address, &t->dst); + + return s; +} + +/** + * @brief IPv6 encapsulation processing as per RFC2473 + */ +static_always_inline void +encaps_processing_v6 (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, ip6_header_t * ip0_encap) +{ + u32 new_l0; + + ip0_encap->hop_limit -= 1; + new_l0 = + ip0->payload_length + sizeof (ip6_header_t) + + clib_net_to_host_u16 (ip0_encap->payload_length); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip0->ip_version_traffic_class_and_flow_label = + ip0_encap->ip_version_traffic_class_and_flow_label; +} + +/** + * @brief Graph node for applying a SR policy into an IPv6 packet. Encapsulation + */ +static uword +sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int encap_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl1->rewrite) + b1->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl2->rewrite) + b2->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl3->rewrite) + b3->current_data)); + + ip0_encap = vlib_buffer_get_current (b0); + ip1_encap = vlib_buffer_get_current (b1); + ip2_encap = vlib_buffer_get_current (b2); + ip3_encap = vlib_buffer_get_current (b3); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite), + sl1->rewrite, vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite), + sl2->rewrite, vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite), + sl3->rewrite, vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + encaps_processing_v6 (node, b0, ip0, ip0_encap); + encaps_processing_v6 (node, b1, ip1, ip1_encap); + encaps_processing_v6 (node, b2, ip2, ip2_encap); + encaps_processing_v6 (node, b3, ip3, ip3_encap); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + encap_pkts += 4; + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0, *ip0_encap = 0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + + ip0_encap = vlib_buffer_get_current (b0); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + encaps_processing_v6 (node, b0, ip0, ip0_encap); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + encap_pkts++; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + encap_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = { + .function = sr_policy_rewrite_encaps, + .name = "sr-pl-rewrite-encaps", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief IPv4 encapsulation processing as per RFC2473 + */ +static_always_inline void +encaps_processing_v4 (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, ip4_header_t * ip0_encap) +{ + u32 new_l0; + ip6_sr_header_t *sr0; + + u32 checksum0; + + /* Inner IPv4: Decrement TTL & update checksum */ + ip0_encap->ttl -= 1; + checksum0 = ip0_encap->checksum + clib_host_to_net_u16 (0x0100); + checksum0 += checksum0 >= 0xffff; + ip0_encap->checksum = checksum0; + + /* Outer IPv6: Update length, FL, proto */ + new_l0 = ip0->payload_length + clib_net_to_host_u16 (ip0_encap->length); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip0->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0 | ((6 & 0xF) << 28) | + ((ip0_encap->tos & 0xFF) << 20)); + sr0 = (void *) (ip0 + 1); + sr0->protocol = IP_PROTOCOL_IP_IN_IP; +} + +/** + * @brief Graph node for applying a SR policy into an IPv4 packet. Encapsulation + */ +static uword +sr_policy_rewrite_encaps_v4 (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int encap_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip4_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl1->rewrite) + b1->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl2->rewrite) + b2->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl3->rewrite) + b3->current_data)); + + ip0_encap = vlib_buffer_get_current (b0); + ip1_encap = vlib_buffer_get_current (b1); + ip2_encap = vlib_buffer_get_current (b2); + ip3_encap = vlib_buffer_get_current (b3); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite), + sl1->rewrite, vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite), + sl2->rewrite, vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite), + sl3->rewrite, vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + encaps_processing_v4 (node, b0, ip0, ip0_encap); + encaps_processing_v4 (node, b1, ip1, ip1_encap); + encaps_processing_v4 (node, b2, ip2, ip2_encap); + encaps_processing_v4 (node, b3, ip3, ip3_encap); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + encap_pkts += 4; + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip4_header_t *ip0_encap = 0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + + ip0_encap = vlib_buffer_get_current (b0); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + encaps_processing_v4 (node, b0, ip0, ip0_encap); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + encap_pkts++; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + encap_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = { + .function = sr_policy_rewrite_encaps_v4, + .name = "sr-pl-rewrite-encaps-v4", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +always_inline u32 +ip_flow_hash (void *data) +{ + ip4_header_t *iph = (ip4_header_t *) data; + + if ((iph->ip_version_and_header_length & 0xF0) == 0x40) + return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT); + else + return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT); +} + +always_inline u64 +mac_to_u64 (u8 * m) +{ + return (*((u64 *) m) & 0xffffffffffff); +} + +always_inline u32 +l2_flow_hash (vlib_buffer_t * b0) +{ + ethernet_header_t *eh; + u64 a, b, c; + uword is_ip, eh_size; + u16 eh_type; + + eh = vlib_buffer_get_current (b0); + eh_type = clib_net_to_host_u16 (eh->type); + eh_size = ethernet_buffer_header_size (b0); + + is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6); + + /* since we have 2 cache lines, use them */ + if (is_ip) + a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size); + else + a = eh->type; + + b = mac_to_u64 ((u8 *) eh->dst_address); + c = mac_to_u64 ((u8 *) eh->src_address); + hash_mix64 (a, b, c); + + return (u32) c; +} + +/** + * @brief Graph node for applying a SR policy into a L2 frame + */ +static uword +sr_policy_rewrite_encaps_l2 (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int encap_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ethernet_header_t *en0, *en1, *en2, *en3; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_sr_policy_t *sp0, *sp1, *sp2, *sp3; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sp0 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b0)->sw_if_index + [VLIB_RX]]); + + sp1 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b1)->sw_if_index + [VLIB_RX]]); + + sp2 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b2)->sw_if_index + [VLIB_RX]]); + + sp3 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b3)->sw_if_index + [VLIB_RX]]); + + if (vec_len (sp0->segments_lists) == 1) + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0]; + else + { + vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash & + (vec_len (sp0->segments_lists) - 1))]; + } + + if (vec_len (sp1->segments_lists) == 1) + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = sp1->segments_lists[1]; + else + { + vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1); + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = + sp1->segments_lists[(vnet_buffer (b1)->ip.flow_hash & + (vec_len (sp1->segments_lists) - 1))]; + } + + if (vec_len (sp2->segments_lists) == 1) + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = sp2->segments_lists[2]; + else + { + vnet_buffer (b2)->ip.flow_hash = l2_flow_hash (b2); + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = + sp2->segments_lists[(vnet_buffer (b2)->ip.flow_hash & + (vec_len (sp2->segments_lists) - 1))]; + } + + if (vec_len (sp3->segments_lists) == 1) + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = sp3->segments_lists[3]; + else + { + vnet_buffer (b3)->ip.flow_hash = l2_flow_hash (b3); + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = + sp3->segments_lists[(vnet_buffer (b3)->ip.flow_hash & + (vec_len (sp3->segments_lists) - 1))]; + } + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl1->rewrite) + b1->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl2->rewrite) + b2->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl3->rewrite) + b3->current_data)); + + en0 = vlib_buffer_get_current (b0); + en1 = vlib_buffer_get_current (b1); + en2 = vlib_buffer_get_current (b2); + en3 = vlib_buffer_get_current (b3); + + clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite, + vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) en1) - vec_len (sl1->rewrite), sl1->rewrite, + vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) en2) - vec_len (sl2->rewrite), sl2->rewrite, + vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) en3) - vec_len (sl3->rewrite), sl3->rewrite, + vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + ip0->payload_length = + clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t)); + ip1->payload_length = + clib_host_to_net_u16 (b1->current_length - sizeof (ip6_header_t)); + ip2->payload_length = + clib_host_to_net_u16 (b2->current_length - sizeof (ip6_header_t)); + ip3->payload_length = + clib_host_to_net_u16 (b3->current_length - sizeof (ip6_header_t)); + + sr0 = (void *) (ip0 + 1); + sr1 = (void *) (ip1 + 1); + sr2 = (void *) (ip2 + 1); + sr3 = (void *) (ip3 + 1); + + sr0->protocol = sr1->protocol = sr2->protocol = sr3->protocol = + IP_PROTOCOL_IP6_NONXT; + + /* Which Traffic class and flow label do I set ? */ + //ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(0|((6&0xF)<<28)|((ip0_encap->tos&0xFF)<<20)); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + encap_pkts += 4; + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_sr_header_t *sr0; + ethernet_header_t *en0; + ip6_sr_policy_t *sp0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + + /* Find the SR policy */ + sp0 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b0)->sw_if_index + [VLIB_RX]]); + + /* In case there is more than one SL, LB among them */ + if (vec_len (sp0->segments_lists) == 1) + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0]; + else + { + vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash & + (vec_len (sp0->segments_lists) - 1))]; + } + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + + en0 = vlib_buffer_get_current (b0); + + clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite, + vec_len (sl0->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + ip0->payload_length = + clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t)); + + sr0 = (void *) (ip0 + 1); + sr0->protocol = IP_PROTOCOL_IP6_NONXT; + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + encap_pkts++; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + encap_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = { + .function = sr_policy_rewrite_encaps_l2, + .name = "sr-pl-rewrite-encaps-l2", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief Graph node for applying a SR policy into a packet. SRH insertion. + */ +static uword +sr_policy_rewrite_insert (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int insert_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + u16 new_l0, new_l1, new_l2, new_l3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl1->rewrite) + b1->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl2->rewrite) + b2->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl3->rewrite) + b3->current_data)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr0 = + (ip6_sr_header_t *) (((void *) (ip0 + 1)) + + ip6_ext_header_len (ip0 + 1)); + else + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr1 = + (ip6_sr_header_t *) (((void *) (ip1 + 1)) + + ip6_ext_header_len (ip1 + 1)); + else + sr1 = (ip6_sr_header_t *) (ip1 + 1); + + if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr2 = + (ip6_sr_header_t *) (((void *) (ip2 + 1)) + + ip6_ext_header_len (ip2 + 1)); + else + sr2 = (ip6_sr_header_t *) (ip2 + 1); + + if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr3 = + (ip6_sr_header_t *) (((void *) (ip3 + 1)) + + ip6_ext_header_len (ip3 + 1)); + else + sr3 = (ip6_sr_header_t *) (ip3 + 1); + + clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0, + (void *) sr0 - (void *) ip0); + clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite), (u8 *) ip1, + (void *) sr1 - (void *) ip1); + clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite), (u8 *) ip2, + (void *) sr2 - (void *) ip2); + clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite), (u8 *) ip3, + (void *) sr3 - (void *) ip3); + + clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite, + vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite)), sl1->rewrite, + vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite)), sl2->rewrite, + vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite)), sl3->rewrite, + vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = ((void *) ip0) - vec_len (sl0->rewrite); + ip1 = ((void *) ip1) - vec_len (sl1->rewrite); + ip2 = ((void *) ip2) - vec_len (sl2->rewrite); + ip3 = ((void *) ip3) - vec_len (sl3->rewrite); + + ip0->hop_limit -= 1; + ip1->hop_limit -= 1; + ip2->hop_limit -= 1; + ip3->hop_limit -= 1; + + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + + vec_len (sl0->rewrite); + new_l1 = + clib_net_to_host_u16 (ip1->payload_length) + + vec_len (sl1->rewrite); + new_l2 = + clib_net_to_host_u16 (ip2->payload_length) + + vec_len (sl2->rewrite); + new_l3 = + clib_net_to_host_u16 (ip3->payload_length) + + vec_len (sl3->rewrite); + + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip1->payload_length = clib_host_to_net_u16 (new_l1); + ip2->payload_length = clib_host_to_net_u16 (new_l2); + ip3->payload_length = clib_host_to_net_u16 (new_l3); + + sr0 = ((void *) sr0) - vec_len (sl0->rewrite); + sr1 = ((void *) sr1) - vec_len (sl1->rewrite); + sr2 = ((void *) sr2) - vec_len (sl2->rewrite); + sr3 = ((void *) sr3) - vec_len (sl3->rewrite); + + sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0]; + sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1]; + sr1->segments->as_u64[0] = ip1->dst_address.as_u64[0]; + sr1->segments->as_u64[1] = ip1->dst_address.as_u64[1]; + sr2->segments->as_u64[0] = ip2->dst_address.as_u64[0]; + sr2->segments->as_u64[1] = ip2->dst_address.as_u64[1]; + sr3->segments->as_u64[0] = ip3->dst_address.as_u64[0]; + sr3->segments->as_u64[1] = ip3->dst_address.as_u64[1]; + + ip0->dst_address.as_u64[0] = + (sr0->segments + sr0->segments_left)->as_u64[0]; + ip0->dst_address.as_u64[1] = + (sr0->segments + sr0->segments_left)->as_u64[1]; + ip1->dst_address.as_u64[0] = + (sr1->segments + sr1->segments_left)->as_u64[0]; + ip1->dst_address.as_u64[1] = + (sr1->segments + sr1->segments_left)->as_u64[1]; + ip2->dst_address.as_u64[0] = + (sr2->segments + sr2->segments_left)->as_u64[0]; + ip2->dst_address.as_u64[1] = + (sr2->segments + sr2->segments_left)->as_u64[1]; + ip3->dst_address.as_u64[0] = + (sr3->segments + sr3->segments_left)->as_u64[0]; + ip3->dst_address.as_u64[1] = + (sr3->segments + sr3->segments_left)->as_u64[1]; + + ip6_ext_header_t *ip_ext; + if (ip0 + 1 == (void *) sr0) + { + sr0->protocol = ip0->protocol; + ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip0 + 1); + sr0->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip1 + 1 == (void *) sr1) + { + sr1->protocol = ip1->protocol; + ip1->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip2 + 1); + sr2->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip2 + 1 == (void *) sr2) + { + sr2->protocol = ip2->protocol; + ip2->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip2 + 1); + sr2->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip3 + 1 == (void *) sr3) + { + sr3->protocol = ip3->protocol; + ip3->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip3 + 1); + sr3->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + insert_pkts += 4; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_sr_header_t *sr0 = 0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + u16 new_l0 = 0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + + ip0 = vlib_buffer_get_current (b0); + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr0 = + (ip6_sr_header_t *) (((void *) (ip0 + 1)) + + ip6_ext_header_len (ip0 + 1)); + else + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0, + (void *) sr0 - (void *) ip0); + clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite, + vec_len (sl0->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = ((void *) ip0) - vec_len (sl0->rewrite); + ip0->hop_limit -= 1; + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + + vec_len (sl0->rewrite); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + + sr0 = ((void *) sr0) - vec_len (sl0->rewrite); + sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0]; + sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1]; + + ip0->dst_address.as_u64[0] = + (sr0->segments + sr0->segments_left)->as_u64[0]; + ip0->dst_address.as_u64[1] = + (sr0->segments + sr0->segments_left)->as_u64[1]; + + if (ip0 + 1 == (void *) sr0) + { + sr0->protocol = ip0->protocol; + ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip6_ext_header_t *ip_ext = (void *) (ip0 + 1); + sr0->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + insert_pkts++; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + insert_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = { + .function = sr_policy_rewrite_insert, + .name = "sr-pl-rewrite-insert", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief Graph node for applying a SR policy into a packet. BSID - SRH insertion. + */ +static uword +sr_policy_rewrite_b_insert (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int insert_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + u16 new_l0, new_l1, new_l2, new_l3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite_bsid) + b0->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl1->rewrite_bsid) + b1->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl2->rewrite_bsid) + b2->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl3->rewrite_bsid) + b3->current_data)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr0 = + (ip6_sr_header_t *) (((void *) (ip0 + 1)) + + ip6_ext_header_len (ip0 + 1)); + else + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr1 = + (ip6_sr_header_t *) (((void *) (ip1 + 1)) + + ip6_ext_header_len (ip1 + 1)); + else + sr1 = (ip6_sr_header_t *) (ip1 + 1); + + if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr2 = + (ip6_sr_header_t *) (((void *) (ip2 + 1)) + + ip6_ext_header_len (ip2 + 1)); + else + sr2 = (ip6_sr_header_t *) (ip2 + 1); + + if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr3 = + (ip6_sr_header_t *) (((void *) (ip3 + 1)) + + ip6_ext_header_len (ip3 + 1)); + else + sr3 = (ip6_sr_header_t *) (ip3 + 1); + + clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0, + (void *) sr0 - (void *) ip0); + clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite_bsid), (u8 *) ip1, + (void *) sr1 - (void *) ip1); + clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite_bsid), (u8 *) ip2, + (void *) sr2 - (void *) ip2); + clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite_bsid), (u8 *) ip3, + (void *) sr3 - (void *) ip3); + + clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)), + sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid)); + clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite_bsid)), + sl1->rewrite_bsid, vec_len (sl1->rewrite_bsid)); + clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite_bsid)), + sl2->rewrite_bsid, vec_len (sl2->rewrite_bsid)); + clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite_bsid)), + sl3->rewrite_bsid, vec_len (sl3->rewrite_bsid)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite_bsid)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite_bsid)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite_bsid)); + + ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid); + ip1 = ((void *) ip1) - vec_len (sl1->rewrite_bsid); + ip2 = ((void *) ip2) - vec_len (sl2->rewrite_bsid); + ip3 = ((void *) ip3) - vec_len (sl3->rewrite_bsid); + + ip0->hop_limit -= 1; + ip1->hop_limit -= 1; + ip2->hop_limit -= 1; + ip3->hop_limit -= 1; + + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + + vec_len (sl0->rewrite_bsid); + new_l1 = + clib_net_to_host_u16 (ip1->payload_length) + + vec_len (sl1->rewrite_bsid); + new_l2 = + clib_net_to_host_u16 (ip2->payload_length) + + vec_len (sl2->rewrite_bsid); + new_l3 = + clib_net_to_host_u16 (ip3->payload_length) + + vec_len (sl3->rewrite_bsid); + + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip1->payload_length = clib_host_to_net_u16 (new_l1); + ip2->payload_length = clib_host_to_net_u16 (new_l2); + ip3->payload_length = clib_host_to_net_u16 (new_l3); + + sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid); + sr1 = ((void *) sr1) - vec_len (sl1->rewrite_bsid); + sr2 = ((void *) sr2) - vec_len (sl2->rewrite_bsid); + sr3 = ((void *) sr3) - vec_len (sl3->rewrite_bsid); + + ip0->dst_address.as_u64[0] = + (sr0->segments + sr0->segments_left)->as_u64[0]; + ip0->dst_address.as_u64[1] = + (sr0->segments + sr0->segments_left)->as_u64[1]; + ip1->dst_address.as_u64[0] = + (sr1->segments + sr1->segments_left)->as_u64[0]; + ip1->dst_address.as_u64[1] = + (sr1->segments + sr1->segments_left)->as_u64[1]; + ip2->dst_address.as_u64[0] = + (sr2->segments + sr2->segments_left)->as_u64[0]; + ip2->dst_address.as_u64[1] = + (sr2->segments + sr2->segments_left)->as_u64[1]; + ip3->dst_address.as_u64[0] = + (sr3->segments + sr3->segments_left)->as_u64[0]; + ip3->dst_address.as_u64[1] = + (sr3->segments + sr3->segments_left)->as_u64[1]; + + ip6_ext_header_t *ip_ext; + if (ip0 + 1 == (void *) sr0) + { + sr0->protocol = ip0->protocol; + ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip0 + 1); + sr0->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip1 + 1 == (void *) sr1) + { + sr1->protocol = ip1->protocol; + ip1->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip2 + 1); + sr2->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip2 + 1 == (void *) sr2) + { + sr2->protocol = ip2->protocol; + ip2->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip2 + 1); + sr2->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip3 + 1 == (void *) sr3) + { + sr3->protocol = ip3->protocol; + ip3->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip3 + 1); + sr3->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + insert_pkts += 4; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_sr_header_t *sr0 = 0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + u16 new_l0 = 0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite_bsid) + b0->current_data)); + + ip0 = vlib_buffer_get_current (b0); + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr0 = + (ip6_sr_header_t *) (((void *) (ip0 + 1)) + + ip6_ext_header_len (ip0 + 1)); + else + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0, + (void *) sr0 - (void *) ip0); + clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)), + sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid)); + + ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid); + ip0->hop_limit -= 1; + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + + vec_len (sl0->rewrite_bsid); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + + sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid); + + ip0->dst_address.as_u64[0] = + (sr0->segments + sr0->segments_left)->as_u64[0]; + ip0->dst_address.as_u64[1] = + (sr0->segments + sr0->segments_left)->as_u64[1]; + + if (ip0 + 1 == (void *) sr0) + { + sr0->protocol = ip0->protocol; + ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip6_ext_header_t *ip_ext = (void *) (ip0 + 1); + sr0->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + insert_pkts++; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + insert_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = { + .function = sr_policy_rewrite_b_insert, + .name = "sr-pl-rewrite-b-insert", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief Function BSID encapsulation + */ +static_always_inline void +end_bsid_encaps_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_sr_header_t * sr0, u32 * next0) +{ + ip6_address_t *new_dst0; + + if (PREDICT_FALSE (!sr0)) + goto error_bsid_encaps; + + if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR)) + { + if (PREDICT_TRUE (sr0->segments_left != 0)) + { + sr0->segments_left -= 1; + new_dst0 = (ip6_address_t *) (sr0->segments); + new_dst0 += sr0->segments_left; + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + return; + } + } + +error_bsid_encaps: + *next0 = SR_POLICY_REWRITE_NEXT_ERROR; + b0->error = node->errors[SR_POLICY_REWRITE_ERROR_BSID_ZERO]; +} + +/** + * @brief Graph node for applying a SR policy BSID - Encapsulation + */ +static uword +sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int encap_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_ext_header_t *prev0, *prev1, *prev2, *prev3; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl1->rewrite) + b1->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl2->rewrite) + b2->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl3->rewrite) + b3->current_data)); + + ip0_encap = vlib_buffer_get_current (b0); + ip1_encap = vlib_buffer_get_current (b1); + ip2_encap = vlib_buffer_get_current (b2); + ip3_encap = vlib_buffer_get_current (b3); + + ip6_ext_header_find_t (ip0_encap, prev0, sr0, + IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip1_encap, prev1, sr1, + IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip2_encap, prev2, sr2, + IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip3_encap, prev3, sr3, + IP_PROTOCOL_IPV6_ROUTE); + + end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0); + end_bsid_encaps_srh_processing (node, b1, ip1_encap, sr1, &next1); + end_bsid_encaps_srh_processing (node, b2, ip2_encap, sr2, &next2); + end_bsid_encaps_srh_processing (node, b3, ip3_encap, sr3, &next3); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite), + sl1->rewrite, vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite), + sl2->rewrite, vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite), + sl3->rewrite, vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + encaps_processing_v6 (node, b0, ip0, ip0_encap); + encaps_processing_v6 (node, b1, ip1, ip1_encap); + encaps_processing_v6 (node, b2, ip2, ip2_encap); + encaps_processing_v6 (node, b3, ip3, ip3_encap); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + encap_pkts += 4; + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0, *ip0_encap = 0; + ip6_ext_header_t *prev0; + ip6_sr_header_t *sr0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + + ip0_encap = vlib_buffer_get_current (b0); + ip6_ext_header_find_t (ip0_encap, prev0, sr0, + IP_PROTOCOL_IPV6_ROUTE); + end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + encaps_processing_v6 (node, b0, ip0, ip0_encap); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + encap_pkts++; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + encap_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = { + .function = sr_policy_rewrite_b_encaps, + .name = "sr-pl-rewrite-b-encaps", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/*************************** SR Segment Lists DPOs ****************************/ +static u8 * +format_sr_segment_list_dpo (u8 * s, va_list * args) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_address_t *addr; + ip6_sr_sl_t *sl; + + index_t index = va_arg (*args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (*args, u32); + s = format (s, "SR: Segment List index:[%d]", index); + s = format (s, "\n\tSegments:"); + + sl = pool_elt_at_index (sm->sid_lists, index); + + s = format (s, "< "); + vec_foreach (addr, sl->segments) + { + s = format (s, "%U, ", format_ip6_address, addr); + } + s = format (s, "\b\b > - "); + s = format (s, "Weight: %u", sl->weight); + + return s; +} + +const static dpo_vft_t sr_policy_rewrite_vft = { + .dv_lock = sr_dpo_lock, + .dv_unlock = sr_dpo_unlock, + .dv_format = format_sr_segment_list_dpo, +}; + +const static char *const sr_pr_encaps_ip6_nodes[] = { + "sr-pl-rewrite-encaps", + NULL, +}; + +const static char *const sr_pr_encaps_ip4_nodes[] = { + "sr-pl-rewrite-encaps-v4", + NULL, +}; + +const static char *const *const sr_pr_encaps_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_pr_encaps_ip6_nodes, + [DPO_PROTO_IP4] = sr_pr_encaps_ip4_nodes, +}; + +const static char *const sr_pr_insert_ip6_nodes[] = { + "sr-pl-rewrite-insert", + NULL, +}; + +const static char *const *const sr_pr_insert_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_pr_insert_ip6_nodes, +}; + +const static char *const sr_pr_bsid_insert_ip6_nodes[] = { + "sr-pl-rewrite-b-insert", + NULL, +}; + +const static char *const *const sr_pr_bsid_insert_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_pr_bsid_insert_ip6_nodes, +}; + +const static char *const sr_pr_bsid_encaps_ip6_nodes[] = { + "sr-pl-rewrite-b-encaps", + NULL, +}; + +const static char *const *const sr_pr_bsid_encaps_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_pr_bsid_encaps_ip6_nodes, +}; + +/********************* SR Policy Rewrite initialization ***********************/ +/** + * @brief SR Policy Rewrite initialization + */ +clib_error_t * +sr_policy_rewrite_init (vlib_main_t * vm) +{ + ip6_sr_main_t *sm = &sr_main; + + /* Init memory for sr policy keys (bsid <-> ip6_address_t) */ + sm->sr_policy_index_by_key = hash_create_mem (0, sizeof (ip6_address_t), + sizeof (uword)); + + /* Init SR VPO DPOs type */ + sr_pr_encaps_dpo_type = + dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_encaps_nodes); + + sr_pr_insert_dpo_type = + dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_insert_nodes); + + sr_pr_bsid_encaps_dpo_type = + dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_encaps_nodes); + + sr_pr_bsid_insert_dpo_type = + dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_insert_nodes); + + /* Register the L2 encaps node used in HW redirect */ + sm->l2_sr_policy_rewrite_index = sr_policy_rewrite_encaps_node.index; + + sm->fib_table_ip6 = (u32) ~ 0; + sm->fib_table_ip4 = (u32) ~ 0; + + return 0; +} + +VLIB_INIT_FUNCTION (sr_policy_rewrite_init); + + +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/sr/sr_steering.c b/src/vnet/sr/sr_steering.c new file mode 100755 index 00000000..86d6f27c --- /dev/null +++ b/src/vnet/sr/sr_steering.c @@ -0,0 +1,568 @@ +/* + * sr_steering.c: ipv6 segment routing steering into SR policy + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Packet steering into SR Policies + * + * This file is in charge of handling the FIB appropiatly to steer packets + * through SR Policies as defined in 'sr_policy_rewrite.c'. Notice that here + * we are only doing steering. SR policy application is done in + * sr_policy_rewrite.c + * + * Supports: + * - Steering of IPv6 traffic Destination Address based + * - Steering of IPv4 traffic Destination Address based + * - Steering of L2 frames, interface based (sw interface) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/** + * @brief Steer traffic L2 and L3 traffic through a given SR policy + * + * @param is_del + * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index) + * @param sr_policy is the index of the SR Policy (alt to bsid) + * @param table_id is the VRF where to install the FIB entry for the BSID + * @param prefix is the IPv4/v6 address for L3 traffic type + * @param mask_width is the mask for L3 traffic type + * @param sw_if_index is the incoming interface for L2 traffic + * @param traffic_type describes the type of traffic + * + * @return 0 if correct, else error + */ +int +sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index, + u32 table_id, ip46_address_t * prefix, u32 mask_width, + u32 sw_if_index, u8 traffic_type) +{ + ip6_sr_main_t *sm = &sr_main; + sr_steering_key_t key, *key_copy; + ip6_sr_steering_policy_t *steer_pl; + fib_prefix_t pfx = { 0 }; + + ip6_sr_policy_t *sr_policy = 0; + uword *p = 0; + + hash_pair_t *hp; + + /* Compute the steer policy key */ + if (prefix) + { + key.l3.prefix.as_u64[0] = prefix->as_u64[0]; + key.l3.prefix.as_u64[1] = prefix->as_u64[1]; + key.l3.mask_width = mask_width; + key.l3.fib_table = (table_id != (u32) ~ 0 ? table_id : 0); + } + else + { + key.l2.sw_if_index = sw_if_index; + + /* Sanitise the SW_IF_INDEX */ + if (pool_is_free_index (sm->vnet_main->interface_main.sw_interfaces, + sw_if_index)) + return -3; + + vnet_sw_interface_t *sw = + vnet_get_sw_interface (sm->vnet_main, sw_if_index); + if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE) + return -3; + } + + key.traffic_type = traffic_type; + + /* Search for the item */ + p = hash_get_mem (sm->steer_policies_index_by_key, &key); + + if (p) + { + /* Retrieve Steer Policy function */ + steer_pl = pool_elt_at_index (sm->steer_policies, p[0]); + + if (is_del) + { + if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6; + + fib_table_entry_delete (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP6, + steer_pl->classify.l3.fib_table), &pfx, + FIB_SOURCE_SR); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4; + + fib_table_entry_delete (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP4, + steer_pl->classify.l3.fib_table), &pfx, + FIB_SOURCE_SR); + } + else if (steer_pl->classify.traffic_type == SR_STEER_L2) + { + /* Remove HW redirection */ + vnet_feature_enable_disable ("device-input", + "sr-policy-rewrite-encaps-l2", + sw_if_index, 0, 0, 0); + sm->sw_iface_sr_policies[sw_if_index] = ~(u32) 0; + + /* Remove promiscous mode from interface */ + vnet_main_t *vnm = vnet_get_main (); + ethernet_main_t *em = ðernet_main; + ethernet_interface_t *eif = + ethernet_get_interface (em, sw_if_index); + + if (!eif) + goto cleanup_error_redirection; + + ethernet_set_flags (vnm, sw_if_index, 0); + } + + /* Delete SR steering policy entry */ + pool_put (sm->steer_policies, steer_pl); + hp = hash_get_pair (sm->steer_policies_index_by_key, &key); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->steer_policies_index_by_key, &key); + vec_free (key_copy); + return 1; + } + else /* It means user requested to update an existing SR steering policy */ + { + /* Retrieve SR steering policy */ + if (bsid) + { + p = hash_get_mem (sm->sr_policy_index_by_key, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -2; + } + else + sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index); + + if (!sr_policy) + return -2; + + steer_pl->sr_policy = sr_policy - sm->sr_policies; + + /* Remove old FIB/hw redirection and create a new one */ + if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6; + + fib_table_entry_delete (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP6, + steer_pl->classify.l3.fib_table), &pfx, + FIB_SOURCE_SR); + + /* Create a new one */ + goto update_fib; + + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4; + + fib_table_entry_delete (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP4, + steer_pl->classify.l3.fib_table), &pfx, + FIB_SOURCE_SR); + + /* Create a new one */ + goto update_fib; + } + else if (steer_pl->classify.traffic_type == SR_STEER_L2) + { + /* Update L2-HW redirection */ + goto update_fib; + } + } + } + else + /* delete; steering policy does not exist; complain */ + if (is_del) + return -4; + + /* Retrieve SR policy */ + if (bsid) + { + p = hash_get_mem (sm->sr_policy_index_by_key, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -2; + } + else + sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index); + + /* Create a new steering policy */ + pool_get (sm->steer_policies, steer_pl); + memset (steer_pl, 0, sizeof (*steer_pl)); + + if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6) + { + clib_memcpy (&steer_pl->classify.l3.prefix, prefix, + sizeof (ip46_address_t)); + steer_pl->classify.l3.mask_width = mask_width; + steer_pl->classify.l3.fib_table = + (table_id != (u32) ~ 0 ? table_id : 0); + steer_pl->classify.traffic_type = traffic_type; + } + else if (traffic_type == SR_STEER_L2) + { + steer_pl->classify.l2.sw_if_index = sw_if_index; + steer_pl->classify.traffic_type = traffic_type; + } + else + { + /* Incorrect API usage. Should never get here */ + pool_put (sm->steer_policies, steer_pl); + hp = hash_get_pair (sm->steer_policies_index_by_key, &key); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->steer_policies_index_by_key, &key); + vec_free (key_copy); + return -1; + } + steer_pl->sr_policy = sr_policy - sm->sr_policies; + + /* Create and store key */ + key_copy = vec_new (sr_steering_key_t, 1); + clib_memcpy (key_copy, &key, sizeof (sr_steering_key_t)); + hash_set_mem (sm->steer_policies_index_by_key, + key_copy, steer_pl - sm->steer_policies); + + if (traffic_type == SR_STEER_L2) + { + if (!sr_policy->is_encap) + goto cleanup_error_encap; + + if (vnet_feature_enable_disable + ("device-input", "sr-policy-rewrite-encaps-l2", sw_if_index, 1, 0, + 0)) + goto cleanup_error_redirection; + + /* Set promiscous mode on interface */ + vnet_main_t *vnm = vnet_get_main (); + ethernet_main_t *em = ðernet_main; + ethernet_interface_t *eif = ethernet_get_interface (em, sw_if_index); + + if (!eif) + goto cleanup_error_redirection; + + ethernet_set_flags (vnm, sw_if_index, + ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); + } + else if (traffic_type == SR_STEER_IPV4) + if (!sr_policy->is_encap) + goto cleanup_error_encap; + +update_fib: + /* FIB API calls - Recursive route through the BindingSID */ + if (traffic_type == SR_STEER_IPV6) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6; + + fib_table_entry_path_add (fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, + (table_id != + (u32) ~ 0 ? + table_id : 0)), + &pfx, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + (ip46_address_t *) & sr_policy->bsid, ~0, + sm->fib_table_ip6, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + } + else if (traffic_type == SR_STEER_IPV4) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4; + + fib_table_entry_path_add (fib_table_id_find_fib_index (FIB_PROTOCOL_IP4, + (table_id != + (u32) ~ 0 ? + table_id : 0)), + &pfx, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + (ip46_address_t *) & sr_policy->bsid, ~0, + sm->fib_table_ip4, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + } + else if (traffic_type == SR_STEER_L2) + { + if (sw_if_index < vec_len (sm->sw_iface_sr_policies)) + sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy; + else + { + vec_resize (sm->sw_iface_sr_policies, + (pool_len (sm->vnet_main->interface_main.sw_interfaces) + - vec_len (sm->sw_iface_sr_policies))); + sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy; + } + } + + return 0; + +cleanup_error_encap: + pool_put (sm->steer_policies, steer_pl); + hp = hash_get_pair (sm->steer_policies_index_by_key, &key); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->steer_policies_index_by_key, &key); + vec_free (key_copy); + return -5; + +cleanup_error_redirection: + pool_put (sm->steer_policies, steer_pl); + hp = hash_get_pair (sm->steer_policies_index_by_key, &key); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->steer_policies_index_by_key, &key); + vec_free (key_copy); + return -3; +} + +static clib_error_t * +sr_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + + int is_del = 0; + + ip46_address_t prefix; + u32 dst_mask_width = 0; + u32 sw_if_index = (u32) ~ 0; + u8 traffic_type = 0; + u32 fib_table = (u32) ~ 0; + + ip6_address_t bsid; + u32 sr_policy_index = (u32) ~ 0; + + u8 sr_policy_set = 0; + + int rv; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (!traffic_type + && unformat (input, "l3 %U/%d", unformat_ip6_address, + &prefix.ip6, &dst_mask_width)) + traffic_type = SR_STEER_IPV6; + else if (!traffic_type + && unformat (input, "l3 %U/%d", unformat_ip4_address, + &prefix.ip4, &dst_mask_width)) + traffic_type = SR_STEER_IPV4; + else if (!traffic_type + && unformat (input, "l2 %U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + traffic_type = SR_STEER_L2; + else if (!sr_policy_set + && unformat (input, "via sr policy index %d", + &sr_policy_index)) + sr_policy_set = 1; + else if (!sr_policy_set + && unformat (input, "via sr policy bsid %U", + unformat_ip6_address, &bsid)) + sr_policy_set = 1; + else if (fib_table == (u32) ~ 0 + && unformat (input, "fib-table %d", &fib_table)); + else + break; + } + + if (!traffic_type) + return clib_error_return (0, "No L2/L3 traffic specified"); + if (!sr_policy_set) + return clib_error_return (0, "No SR policy specified"); + + /* Make sure that the prefixes are clean */ + if (traffic_type == SR_STEER_IPV4) + { + u32 mask = + (dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0); + prefix.ip4.as_u32 &= mask; + } + else if (traffic_type == SR_STEER_IPV6) + { + ip6_address_t mask; + ip6_address_mask_from_width (&mask, dst_mask_width); + ip6_address_mask (&prefix.ip6, &mask); + } + + rv = + sr_steering_policy (is_del, (sr_policy_index == ~(u32) 0 ? &bsid : NULL), + sr_policy_index, fib_table, &prefix, dst_mask_width, + sw_if_index, traffic_type); + + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -1: + return clib_error_return (0, "Incorrect API usage."); + case -2: + return clib_error_return (0, + "The requested SR policy could not be located. Review the BSID/index."); + case -3: + return clib_error_return (0, + "Unable to do SW redirect. Incorrect interface."); + case -4: + return clib_error_return (0, + "The requested SR policy could not be deleted. Review the BSID/index."); + case -5: + return clib_error_return (0, + "The SR policy is not an encapsulation one."); + default: + return clib_error_return (0, "BUG: sr steer policy returns %d", rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_steer_policy_command, static) = { + .path = "sr steer", + .short_help = "sr steer (del) [l3 |l2 ]" + "via sr policy [index |bsid ]" + "(fib-table )", + .long_help = + "\tSteer a L2 or L3 traffic through an existing SR policy.\n" + "\tExamples:\n" + "\t\tsr steer l3 2001::/64 via sr_policy index 5\n" + "\t\tsr steer l3 2001::/64 via sr_policy bsid 2010::9999:1\n" + "\t\tsr steer l2 GigabitEthernet0/5/0 via sr_policy index 5\n" + "\t\tsr steer del l3 2001::/64 via sr_policy index 5\n", + .function = sr_steer_policy_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_sr_steering_policies_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_steering_policy_t **steer_policies = 0; + ip6_sr_steering_policy_t *steer_pl; + + vnet_main_t *vnm = vnet_get_main (); + + ip6_sr_policy_t *pl = 0; + int i; + + vlib_cli_output (vm, "SR steering policies:"); + /* *INDENT-OFF* */ + pool_foreach (steer_pl, sm->steer_policies, ({vec_add1(steer_policies, steer_pl);})); + /* *INDENT-ON* */ + vlib_cli_output (vm, "Traffic\t\tSR policy BSID"); + for (i = 0; i < vec_len (steer_policies); i++) + { + steer_pl = steer_policies[i]; + pl = pool_elt_at_index (sm->sr_policies, steer_pl->sr_policy); + if (steer_pl->classify.traffic_type == SR_STEER_L2) + { + vlib_cli_output (vm, "L2 %U\t%U", + format_vnet_sw_if_index_name, vnm, + steer_pl->classify.l2.sw_if_index, + format_ip6_address, &pl->bsid); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + vlib_cli_output (vm, "L3 %U/%d\t%U", + format_ip4_address, + &steer_pl->classify.l3.prefix.ip4, + steer_pl->classify.l3.mask_width, + format_ip6_address, &pl->bsid); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + vlib_cli_output (vm, "L3 %U/%d\t%U", + format_ip6_address, + &steer_pl->classify.l3.prefix.ip6, + steer_pl->classify.l3.mask_width, + format_ip6_address, &pl->bsid); + } + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_steering_policies_command, static) = { + .path = "show sr steering policies", + .short_help = "show sr steering policies", + .function = show_sr_steering_policies_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +sr_steering_init (vlib_main_t * vm) +{ + ip6_sr_main_t *sm = &sr_main; + + /* Init memory for function keys */ + sm->steer_policies_index_by_key = + hash_create_mem (0, sizeof (sr_steering_key_t), sizeof (uword)); + + sm->sw_iface_sr_policies = 0; + + sm->vnet_main = vnet_get_main (); + + return 0; +} + +VLIB_INIT_FUNCTION (sr_steering_init); + +VNET_FEATURE_INIT (sr_policy_rewrite_encaps_l2, static) = +{ +.arc_name = "device-input",.node_name = + "sr-pl-rewrite-encaps-l2",.runs_before = + VNET_FEATURES ("ethernet-input"),}; + +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index ee0c4629..8250279e 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -960,143 +960,215 @@ static void *vl_api_l2_patch_add_del_t_print FINISH; } -static void *vl_api_sr_tunnel_add_del_t_print - (vl_api_sr_tunnel_add_del_t * mp, void *handle) +static void *vl_api_sr_localsid_add_del_t_print + (vl_api_sr_localsid_add_del_t * mp, void *handle) { + vnet_main_t *vnm = vnet_get_main (); u8 *s; - ip6_address_t *this_address; - int i; - u16 flags_host_byte_order; - u8 pl_flag; - - s = format (0, "SCRIPT: sr_tunnel_add_del "); - - if (mp->name[0]) - s = format (s, "name %s ", mp->name); - s = format (s, "src %U dst %U/%d ", format_ip6_address, - (ip6_address_t *) mp->src_address, - format_ip6_address, - (ip6_address_t *) mp->dst_address, mp->dst_mask_width); + s = format (0, "SCRIPT: sr_localsid_add_del "); - this_address = (ip6_address_t *) mp->segs_and_tags; - for (i = 0; i < mp->n_segments; i++) + switch (mp->behavior) { - s = format (s, "next %U ", format_ip6_address, this_address); - this_address++; - } - for (i = 0; i < mp->n_tags; i++) - { - s = format (s, "tag %U ", format_ip6_address, this_address); - this_address++; + case SR_BEHAVIOR_END: + s = format (s, "Address: %U\nBehavior: End", + format_ip6_address, (ip6_address_t *) mp->localsid_addr); + s = format (s, (mp->end_psp ? "End.PSP: True" : "End.PSP: False")); + break; + case SR_BEHAVIOR_X: + s = + format (s, + "Address: %U\nBehavior: X (Endpoint with Layer-3 cross-connect)" + "\nIface: %U\nNext hop: %U", format_ip6_address, + (ip6_address_t *) mp->localsid_addr, + format_vnet_sw_if_index_name, vnm, ntohl (mp->sw_if_index), + format_ip6_address, (ip6_address_t *) mp->nh_addr); + s = format (s, (mp->end_psp ? "End.PSP: True" : "End.PSP: False")); + break; + case SR_BEHAVIOR_DX4: + s = + format (s, + "Address: %U\nBehavior: DX4 (Endpoint with decapsulation with IPv4 cross-connect)" + "\nIface: %U\nNext hop: %U", format_ip6_address, + (ip6_address_t *) mp->localsid_addr, + format_vnet_sw_if_index_name, vnm, ntohl (mp->sw_if_index), + format_ip4_address, (ip4_address_t *) mp->nh_addr); + break; + case SR_BEHAVIOR_DX6: + s = + format (s, + "Address: %U\nBehavior: DX6 (Endpoint with decapsulation with IPv6 cross-connect)" + "\nIface: %UNext hop: %U", format_ip6_address, + (ip6_address_t *) mp->localsid_addr, + format_vnet_sw_if_index_name, vnm, ntohl (mp->sw_if_index), + format_ip6_address, (ip6_address_t *) mp->nh_addr); + break; + case SR_BEHAVIOR_DX2: + s = + format (s, + "Address: %U\nBehavior: DX2 (Endpoint with decapulation and Layer-2 cross-connect)" + "\nIface: %U", format_ip6_address, + (ip6_address_t *) mp->localsid_addr, + format_vnet_sw_if_index_name, vnm, ntohl (mp->sw_if_index)); + break; + case SR_BEHAVIOR_DT6: + s = + format (s, + "Address: %U\nBehavior: DT6 (Endpoint with decapsulation and specific IPv6 table lookup)" + "\nTable: %u", format_ip6_address, + (ip6_address_t *) mp->localsid_addr, ntohl (mp->fib_table)); + break; + case SR_BEHAVIOR_DT4: + s = + format (s, + "Address: %U\nBehavior: DT4 (Endpoint with decapsulation and specific IPv4 table lookup)" + "\nTable: %u", format_ip6_address, + (ip6_address_t *) mp->localsid_addr, ntohl (mp->fib_table)); + break; + default: + if (mp->behavior >= SR_BEHAVIOR_LAST) + { + s = format (s, "Address: %U\n Behavior: %u", + format_ip6_address, (ip6_address_t *) mp->localsid_addr, + mp->behavior); + } + else + //Should never get here... + s = format (s, "Internal error"); + break; } + FINISH; +} - flags_host_byte_order = clib_net_to_host_u16 (mp->flags_net_byte_order); +static void *vl_api_sr_steering_add_del_t_print + (vl_api_sr_steering_add_del_t * mp, void *handle) +{ + u8 *s; - if (flags_host_byte_order & IP6_SR_HEADER_FLAG_CLEANUP) - s = format (s, " clean "); + s = format (0, "SCRIPT: sr_steering_add_del "); - if (flags_host_byte_order & IP6_SR_HEADER_FLAG_PROTECTED) - s = format (s, "protected "); + s = format (s, (mp->is_del ? "Del: True" : "Del: False")); - for (i = 1; i <= 4; i++) + switch (mp->traffic_type) { - pl_flag = ip6_sr_policy_list_flags (flags_host_byte_order, i); - - switch (pl_flag) - { - case IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT: - continue; - - case IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE: - s = format (s, "InPE %d ", i); - break; - - case IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE: - s = format (s, "EgPE %d ", i); - break; - - case IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR: - s = format (s, "OrgSrc %d ", i); - break; - - default: - clib_warning ("BUG: pl elt %d value %d", i, pl_flag); - break; - } + case SR_STEER_L2: + s = format (s, "Traffic type: L2 iface: %u", ntohl (mp->sw_if_index)); + break; + case SR_STEER_IPV4: + s = format (s, "Traffic type: IPv4 %U/%u", format_ip4_address, + (ip4_address_t *) mp->prefix_addr, ntohl (mp->mask_width)); + break; + case SR_STEER_IPV6: + s = format (s, "Traffic type: IPv6 %U/%u", format_ip6_address, + (ip6_address_t *) mp->prefix_addr, ntohl (mp->mask_width)); + break; + default: + s = format (s, "Traffic type: Unknown(%u)", mp->traffic_type); + break; } + s = format (s, "BindingSID: %U", format_ip6_address, + (ip6_address_t *) mp->bsid_addr); - if (mp->policy_name[0]) - s = format (s, "policy_name %s ", mp->policy_name); + s = format (s, "SR Policy Index: %u", ntohl (mp->sr_policy_index)); - if (mp->is_add == 0) - s = format (s, "del "); + s = format (s, "FIB_table: %u", ntohl (mp->table_id)); FINISH; } -static void *vl_api_sr_policy_add_del_t_print - (vl_api_sr_policy_add_del_t * mp, void *handle) +static void *vl_api_sr_policy_add_t_print + (vl_api_sr_policy_add_t * mp, void *handle) { u8 *s; + + ip6_address_t *segments = 0, *seg; + ip6_address_t *this_address = (ip6_address_t *) mp->segments; + int i; + for (i = 0; i < mp->n_segments; i++) + { + vec_add2 (segments, seg, 1); + clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); + this_address++; + } - s = format (0, "SCRIPT: sr_policy_add_del "); + s = format (0, "SCRIPT: sr_policy_add "); - if (mp->name[0]) - s = format (s, "name %s ", mp->name); + s = format (s, "BSID: %U", format_ip6_address, + (ip6_address_t *) mp->bsid_addr); + s = + format (s, + (mp->is_encap ? "Behavior: Encapsulation" : + "Behavior: SRH insertion")); - if (mp->tunnel_names[0]) - { - // start deserializing tunnel_names - int num_tunnels = mp->tunnel_names[0]; //number of tunnels - u8 *deser_tun_names = mp->tunnel_names; - deser_tun_names += 1; //moving along + s = format (s, "FIB_table: %u", ntohl (mp->fib_table)); - u8 *tun_name = 0; - int tun_name_len = 0; + s = format (s, (mp->type ? "Type: Default" : "Type: Spray")); - for (i = 0; i < num_tunnels; i++) - { - tun_name_len = *deser_tun_names; - deser_tun_names += 1; - vec_resize (tun_name, tun_name_len); - memcpy (tun_name, deser_tun_names, tun_name_len); - s = format (s, "tunnel %s ", tun_name); - deser_tun_names += tun_name_len; - tun_name = 0; - } - } + s = format (s, "SID list weight: %u", ntohl (mp->weight)); - if (mp->is_add == 0) - s = format (s, "del "); + s = format (s, "{"); + vec_foreach (seg, segments) + { + s = format (s, "%U, ", format_ip6_address, seg); + } + s = format (s, "\b\b } "); FINISH; } -static void *vl_api_sr_multicast_map_add_del_t_print - (vl_api_sr_multicast_map_add_del_t * mp, void *handle) +static void *vl_api_sr_policy_mod_t_print + (vl_api_sr_policy_mod_t * mp, void *handle) { + u8 *s; + u32 weight; - u8 *s = 0; - /* int i; */ + ip6_address_t *segments = 0, *seg; + ip6_address_t *this_address = (ip6_address_t *) mp->segments; - s = format (0, "SCRIPT: sr_multicast_map_add_del "); + int i; + for (i = 0; i < mp->n_segments; i++) + { + vec_add2 (segments, seg, 1); + clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); + this_address++; + } - if (mp->multicast_address[0]) - s = format (s, "address %U ", format_ip6_address, &mp->multicast_address); + s = format (0, "SCRIPT: sr_policy_mod "); - if (mp->policy_name[0]) - s = format (s, "sr-policy %s ", &mp->policy_name); + s = format (s, "BSID: %U", format_ip6_address, + (ip6_address_t *) mp->bsid_addr); + s = format (s, "SR Policy index: %u", ntohl (mp->sr_policy_index)); - if (mp->is_add == 0) - s = format (s, "del "); + s = format (s, "Operation: %u", mp->operation); + + s = format (s, "SID list index: %u", ntohl (mp->sl_index)); + + s = format (s, "SID list weight: %u", ntohl (mp->weight)); + + s = format (s, "{"); + vec_foreach (seg, segments) + { + s = format (s, "%U, ", format_ip6_address, seg); + } + s = format (s, "\b\b } "); FINISH; } +static void *vl_api_sr_policy_del_t_print + (vl_api_sr_policy_del_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sr_policy_del "); + u8 bsid_addr[16]; + u32 sr_policy_index; + s = format (s, "To be delivered. Good luck."); + FINISH; +} static void *vl_api_classify_add_del_table_t_print (vl_api_classify_add_del_table_t * mp, void *handle) @@ -2864,9 +2936,11 @@ _(SW_INTERFACE_IP6ND_RA_PREFIX, sw_interface_ip6nd_ra_prefix) \ _(SW_INTERFACE_IP6ND_RA_CONFIG, sw_interface_ip6nd_ra_config) \ _(SET_ARP_NEIGHBOR_LIMIT, set_arp_neighbor_limit) \ _(L2_PATCH_ADD_DEL, l2_patch_add_del) \ -_(SR_TUNNEL_ADD_DEL, sr_tunnel_add_del) \ -_(SR_POLICY_ADD_DEL, sr_policy_add_del) \ -_(SR_MULTICAST_MAP_ADD_DEL, sr_multicast_map_add_del) \ +_(SR_LOCALSID_ADD_DEL, sr_localsid_add_del) \ +_(SR_STEERING_ADD_DEL, sr_steering_add_del) \ +_(SR_POLICY_ADD, sr_policy_add) \ +_(SR_POLICY_MOD, sr_policy_mod) \ +_(SR_POLICY_DEL, sr_policy_del) \ _(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \ _(L2FIB_ADD_DEL, l2fib_add_del) \ _(L2_FLAGS, l2_flags) \ diff --git a/src/vpp/api/test_client.c b/src/vpp/api/test_client.c index 8b61f4f3..551bdab9 100644 --- a/src/vpp/api/test_client.c +++ b/src/vpp/api/test_client.c @@ -607,7 +607,6 @@ _(SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS_REPLY, sw_interface_ip6_set_link_local _(CREATE_LOOPBACK_REPLY, create_loopback_reply) \ _(CREATE_LOOPBACK_INSTANCE_REPLY, create_loopback_instance_reply) \ _(L2_PATCH_ADD_DEL_REPLY, l2_patch_add_del_reply) \ -_(SR_TUNNEL_ADD_DEL_REPLY,sr_tunnel_add_del_reply) \ _(SW_INTERFACE_SET_L2_XCONNECT_REPLY, sw_interface_set_l2_xconnect_reply) \ _(SW_INTERFACE_SET_L2_BRIDGE_REPLY, sw_interface_set_l2_bridge_reply) -- cgit 1.2.3-korg From 09a38a6db4235dcacbfb6d5e3686faaeb1c25a37 Mon Sep 17 00:00:00 2001 From: Wojciech Dec Date: Tue, 7 Mar 2017 19:30:39 +0100 Subject: Fix endian issue in ARP Event Reply ARP Event reply sw_if_index was getting passed in host byte order. Change-Id: Ifae8673906ac2c4233f146786a3d02c38280809b Signed-off-by: Wojciech Dec --- src/vat/api_format.c | 6 ++++-- src/vpp/api/api.c | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 53e9ca1f..0b60b910 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -1254,10 +1254,11 @@ static void vl_api_show_version_reply_t_handler_json static void vl_api_ip4_arp_event_t_handler (vl_api_ip4_arp_event_t * mp) { + u32 sw_if_index = ntohl (mp->sw_if_index); errmsg ("arp %s event: address %U new mac %U sw_if_index %d", mp->mac_ip ? "mac/ip binding" : "address resolution", format_ip4_address, &mp->address, - format_ethernet_address, mp->new_mac, mp->sw_if_index); + format_ethernet_address, mp->new_mac, sw_if_index); } static void @@ -1269,10 +1270,11 @@ vl_api_ip4_arp_event_t_handler_json (vl_api_ip4_arp_event_t * mp) static void vl_api_ip6_nd_event_t_handler (vl_api_ip6_nd_event_t * mp) { + u32 sw_if_index = ntohl (mp->sw_if_index); errmsg ("ip6 nd %s event: address %U new mac %U sw_if_index %d", mp->mac_ip ? "mac/ip binding" : "address resolution", format_ip6_address, mp->address, - format_ethernet_address, mp->new_mac, mp->sw_if_index); + format_ethernet_address, mp->new_mac, sw_if_index); } static void diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index f06894e8..828394ed 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -1493,7 +1493,7 @@ arp_change_data_callback (u32 pool_index, u8 * new_mac, } else { /* same mac */ - if (sw_if_index == event->sw_if_index && + if (sw_if_index == ntohl(event->sw_if_index) && (!event->mac_ip || /* for BD case, also check IP address with 10 sec timeout */ (address == event->address && @@ -1503,7 +1503,7 @@ arp_change_data_callback (u32 pool_index, u8 * new_mac, /* *INDENT-ON* */ arp_event_last_time = now; - event->sw_if_index = sw_if_index; + event->sw_if_index = htonl (sw_if_index); if (event->mac_ip) event->address = address; return 0; @@ -1531,7 +1531,7 @@ nd_change_data_callback (u32 pool_index, u8 * new_mac, } else { /* same mac */ - if (sw_if_index == event->sw_if_index && + if (sw_if_index == ntohl(event->sw_if_index) && (!event->mac_ip || /* for BD case, also check IP address with 10 sec timeout */ (ip6_address_is_equal (address, @@ -1542,7 +1542,7 @@ nd_change_data_callback (u32 pool_index, u8 * new_mac, /* *INDENT-ON* */ nd_event_last_time = now; - event->sw_if_index = sw_if_index; + event->sw_if_index = htonl (sw_if_index); if (event->mac_ip) clib_memcpy (event->address, address, sizeof (event->address)); return 0; -- cgit 1.2.3-korg From 45e4f365086267ef2551b1dedf4e309bdd00a34a Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 7 Mar 2017 12:52:31 -0500 Subject: Missing plugin binary API command fns found after brief search Create hash tables before loading plugins. Previous init sequence wiped out most if not all of them. Change-Id: I5dd628895f68f740d525951511a9fef1822d39da Signed-off-by: Dave Barach --- src/vat/api_format.c | 2 +- src/vat/plugin.c | 1 - src/vpp/api/api_main.c | 10 ++++++++++ src/vpp/vnet/main.c | 6 ++++++ 4 files changed, 17 insertions(+), 2 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index b5943f03..fb596fe6 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -18314,13 +18314,13 @@ vat_api_hookup (vat_main_t * vam) #if (VPP_API_TEST_BUILTIN==0) vl_msg_api_set_first_available_msg_id (VL_MSG_FIRST_AVAILABLE); -#endif vam->sw_if_index_by_interface_name = hash_create_string (0, sizeof (uword)); vam->function_by_name = hash_create_string (0, sizeof (uword)); vam->help_by_name = hash_create_string (0, sizeof (uword)); +#endif /* API messages we can send */ #define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); diff --git a/src/vat/plugin.c b/src/vat/plugin.c index c1cc928c..20de8c50 100644 --- a/src/vat/plugin.c +++ b/src/vat/plugin.c @@ -181,7 +181,6 @@ vat_plugin_init (vat_main_t * vam) { plugin_main_t *pm = &vat_plugin_main; - pm->plugin_path = format (0, "%s%c", vat_plugin_path, 0); if (vat_plugin_name_filter) pm->plugin_name_filter = format (0, "%s%c", vat_plugin_name_filter, 0); diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index 6ae510b1..7913bc01 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -56,6 +56,16 @@ api_main_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (api_main_init); +void +vat_plugin_hash_create (void) +{ + vat_main_t *vam = &vat_main; + + vam->sw_if_index_by_interface_name = hash_create_string (0, sizeof (uword)); + vam->function_by_name = hash_create_string (0, sizeof (uword)); + vam->help_by_name = hash_create_string (0, sizeof (uword)); +} + static clib_error_t * api_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c index 4a96ca94..a566d956 100644 --- a/src/vpp/vnet/main.c +++ b/src/vpp/vnet/main.c @@ -25,6 +25,8 @@ static void vpe_main_init (vlib_main_t * vm) { + void vat_plugin_hash_create (void); + if (CLIB_DEBUG > 0) vlib_unix_cli_set_prompt ("DBGvpp# "); else @@ -33,6 +35,10 @@ vpe_main_init (vlib_main_t * vm) /* Turn off network stack components which we don't want */ vlib_mark_init_function_complete (vm, srp_init); + /* + * Create the binary api plugin hashes before loading plugins + */ + vat_plugin_hash_create (); } /* -- cgit 1.2.3-korg From b3b2de71ceea0cc7ce18f89cc8180ed4a42e355d Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 8 Mar 2017 05:17:22 -0800 Subject: IMplementation for option to not create a FIB table entry when adding a neighbor entry Change-Id: I952039e101031ee6a06e63f4c73d8eb359423e1a Signed-off-by: Neale Ranns --- src/vat/api_format.c | 4 +++ src/vnet/ethernet/arp.c | 61 +++++++++++++++++++-------------- src/vnet/ethernet/arp_packet.h | 11 ++++-- src/vnet/ethernet/ethernet.h | 3 +- src/vnet/ip/ip6_neighbor.c | 59 ++++++++++++++++++++------------ src/vnet/ip/ip6_neighbor.h | 14 +++++--- src/vnet/ip/ip_api.c | 6 ++-- src/vpp/api/custom_dump.c | 3 ++ test/test_ip6.py | 28 +++++++++++++-- test/test_neighbor.py | 23 +++++++++++-- test/vpp_ip_route.py | 78 ++++++++++++++++++++++++------------------ test/vpp_neighbor.py | 6 ++-- test/vpp_papi_provider.py | 3 ++ 13 files changed, 203 insertions(+), 96 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 08a2a774..0da31208 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -7132,6 +7132,7 @@ api_ip_neighbor_add_del (vat_main_t * vam) u8 sw_if_index_set = 0; u8 is_add = 1; u8 is_static = 0; + u8 is_no_fib_entry = 0; u8 mac_address[6]; u8 mac_set = 0; u8 v4_address_set = 0; @@ -7158,6 +7159,8 @@ api_ip_neighbor_add_del (vat_main_t * vam) sw_if_index_set = 1; else if (unformat (i, "is_static")) is_static = 1; + else if (unformat (i, "no-fib-entry")) + is_no_fib_entry = 1; else if (unformat (i, "dst %U", unformat_ip4_address, &v4address)) v4_address_set = 1; else if (unformat (i, "dst %U", unformat_ip6_address, &v6address)) @@ -7191,6 +7194,7 @@ api_ip_neighbor_add_del (vat_main_t * vam) mp->sw_if_index = ntohl (sw_if_index); mp->is_add = is_add; mp->is_static = is_static; + mp->is_no_adj_fib = is_no_fib_entry; if (mac_set) clib_memcpy (mp->mac_address, mac_address, 6); if (v6_address_set) diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index 222415b0..d8ae8443 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -100,6 +100,7 @@ typedef struct u32 sw_if_index; ethernet_arp_ip4_over_ethernet_address_t a; int is_static; + int is_no_fib_entry; int flags; #define ETHERNET_ARP_ARGS_REMOVE (1<<0) #define ETHERNET_ARP_ARGS_FLUSH (1<<1) @@ -254,6 +255,9 @@ format_ethernet_arp_ip4_entry (u8 * s, va_list * va) if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) flags = format (flags, "D"); + if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY) + flags = format (flags, "N"); + s = format (s, "%=12U%=16U%=6s%=20U%=24U", format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated, format_ip4_address, &e->ip4_address, @@ -525,6 +529,7 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, ethernet_arp_interface_t *arp_int; int is_static = args->is_static; u32 sw_if_index = args->sw_if_index; + int is_no_fib_entry = args->is_no_fib_entry; vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index); @@ -546,16 +551,6 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, if (make_new_arp_cache_entry) { - fib_prefix_t pfx = { - .fp_len = 32, - .fp_proto = FIB_PROTOCOL_IP4, - .fp_addr = { - .ip4 = a->ip4, - } - , - }; - u32 fib_index; - pool_get (am->ip4_entry_pool, e); if (NULL == arp_int->arp_entries) @@ -570,17 +565,25 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, clib_memcpy (e->ethernet_address, a->ethernet, sizeof (e->ethernet_address)); - fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); - e->fib_entry_index = - fib_table_entry_update_one_path (fib_index, - &pfx, - FIB_SOURCE_ADJ, - FIB_ENTRY_FLAG_ATTACHED, - FIB_PROTOCOL_IP4, - &pfx.fp_addr, - e->sw_if_index, - ~0, - 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); + if (!is_no_fib_entry) + { + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = a->ip4, + }; + u32 fib_index; + + fib_index = + ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); + e->fib_entry_index = + fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, &pfx.fp_addr, + e->sw_if_index, ~0, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY; + } } else { @@ -1142,7 +1145,7 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0, &arp0->ip4_over_ethernet[0], - 0 /* is_static */ ); + 0 /* is_static */ , 0); error0 = ETHERNET_ARP_ERROR_l3_src_address_learned; } @@ -1658,6 +1661,8 @@ arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e) { ethernet_arp_main_t *am = ðernet_arp_main; + /* it's safe to delete the ADJ source on the FIB entry, even if it + * was added */ fib_table_entry_delete_index (e->fib_entry_index, FIB_SOURCE_ADJ); hash_unset (eai->arp_entries, e->ip4_address.as_u32); pool_put (am->ip4_entry_pool, e); @@ -1828,13 +1833,15 @@ increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a) int vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm, - u32 sw_if_index, void *a_arg, int is_static) + u32 sw_if_index, void *a_arg, + int is_static, int is_no_fib_entry) { ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; vnet_arp_set_ip4_over_ethernet_rpc_args_t args; args.sw_if_index = sw_if_index; args.is_static = is_static; + args.is_no_fib_entry = is_no_fib_entry; args.flags = 0; clib_memcpy (&args.a, a, sizeof (*a)); @@ -1931,6 +1938,7 @@ ip_arp_add_del_command_fn (vlib_main_t * vm, u32 fib_index = 0; u32 fib_id; int is_static = 0; + int is_no_fib_entry = 0; int is_proxy = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) @@ -1948,6 +1956,9 @@ ip_arp_add_del_command_fn (vlib_main_t * vm, else if (unformat (input, "static")) is_static = 1; + else if (unformat (input, "no-fib-entry")) + is_no_fib_entry = 1; + else if (unformat (input, "count %d", &count)) ; @@ -1991,7 +2002,7 @@ ip_arp_add_del_command_fn (vlib_main_t * vm, 1 /* type */ , 0 /* data */ ); vnet_arp_set_ip4_over_ethernet - (vnm, sw_if_index, &addr, is_static); + (vnm, sw_if_index, &addr, is_static, is_no_fib_entry); vlib_process_wait_for_event (vm); event_type = vlib_process_get_events (vm, &event_data); @@ -2046,7 +2057,7 @@ ip_arp_add_del_command_fn (vlib_main_t * vm, VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = { .path = "set ip arp", .short_help = - "set ip arp [del] [static] [count ] [fib-id ] [proxy - ]", + "set ip arp [del] [static] [no-fib-entry] [count ] [fib-id ] [proxy - ]", .function = ip_arp_add_del_command_fn, }; /* *INDENT-ON* */ diff --git a/src/vnet/ethernet/arp_packet.h b/src/vnet/ethernet/arp_packet.h index e762ffa4..17e64f43 100644 --- a/src/vnet/ethernet/arp_packet.h +++ b/src/vnet/ethernet/arp_packet.h @@ -140,6 +140,13 @@ typedef struct }; } ethernet_arp_header_t; +typedef enum ethernet_arp_entry_flags_t_ +{ + ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC = (1 << 0), + ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC = (1 << 1), + ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY = (1 << 2), +} __attribute__ ((packed)) ethernet_arp_entry_flags_t; + typedef struct { u32 sw_if_index; @@ -147,9 +154,7 @@ typedef struct u8 ethernet_address[6]; - u16 flags; -#define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC (1 << 0) -#define ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC (1 << 1) + ethernet_arp_entry_flags_t flags; u64 cpu_time_last_updated; diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h index ba84c69c..dcc656a7 100644 --- a/src/vnet/ethernet/ethernet.h +++ b/src/vnet/ethernet/ethernet.h @@ -403,7 +403,8 @@ void ethernet_set_rx_redirect (vnet_main_t * vnm, vnet_hw_interface_t * hi, int vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm, - u32 sw_if_index, void *a_arg, int is_static); + u32 sw_if_index, void *a_arg, + int is_static, int is_no_fib_entry); int vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 715891b8..8d355ab2 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -235,6 +235,9 @@ format_ip6_neighbor_ip6_entry (u8 * s, va_list * va) if (n->flags & IP6_NEIGHBOR_FLAG_STATIC) flags = format (flags, "S"); + if (n->flags & IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY) + flags = format (flags, "N"); + si = vnet_get_sw_interface (vnm, n->key.sw_if_index); s = format (s, "%=12U%=20U%=6s%=20U%=40U", format_vlib_cpu_time, vm, n->cpu_time_last_updated, @@ -317,6 +320,7 @@ typedef struct { u8 is_add; u8 is_static; + u8 is_no_fib_entry; u8 link_layer_address[6]; u32 sw_if_index; ip6_address_t addr; @@ -328,7 +332,8 @@ static void ip6_neighbor_set_unset_rpc_callback static void set_unset_ip6_neighbor_rpc (vlib_main_t * vm, u32 sw_if_index, - ip6_address_t * a, u8 * link_layer_address, int is_add, int is_static) + ip6_address_t * a, u8 * link_layer_address, int is_add, int is_static, + int is_no_fib_entry) { ip6_neighbor_set_unset_rpc_args_t args; void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); @@ -336,6 +341,7 @@ static void set_unset_ip6_neighbor_rpc args.sw_if_index = sw_if_index; args.is_add = is_add; args.is_static = is_static; + args.is_no_fib_entry = is_no_fib_entry; clib_memcpy (&args.addr, a, sizeof (*a)); if (NULL != link_layer_address) clib_memcpy (args.link_layer_address, link_layer_address, 6); @@ -565,7 +571,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, ip6_address_t * a, u8 * link_layer_address, uword n_bytes_link_layer_address, - int is_static) + int is_static, int is_no_fib_entry) { ip6_neighbor_main_t *nm = &ip6_neighbor_main; ip6_neighbor_key_t k; @@ -578,7 +584,8 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, if (os_get_cpu_number ()) { set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address, - 1 /* set new neighbor */ , is_static); + 1 /* set new neighbor */ , is_static, + is_no_fib_entry); return 0; } @@ -598,16 +605,6 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, if (make_new_nd_cache_entry) { - fib_prefix_t pfx = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr = { - .ip6 = k.ip6_address, - } - , - }; - u32 fib_index; - pool_get (nm->neighbor_pool, n); mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool, /* old value */ 0); @@ -619,9 +616,25 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, /* * create the adj-fib. the entry in the FIB table for and to the peer. */ - fib_index = ip6_main.fib_index_by_sw_if_index[n->key.sw_if_index]; - n->fib_entry_index = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_NONE, FIB_PROTOCOL_IP6, &pfx.fp_addr, n->key.sw_if_index, ~0, 1, NULL, // no label stack - FIB_ROUTE_PATH_FLAG_NONE); + if (!is_no_fib_entry) + { + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = k.ip6_address, + }; + u32 fib_index; + + fib_index = ip6_main.fib_index_by_sw_if_index[n->key.sw_if_index]; + n->fib_entry_index = + fib_table_entry_update_one_path (fib_index, &pfx, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, &pfx.fp_addr, + n->key.sw_if_index, ~0, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + n->flags |= IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY; + } } else { @@ -712,7 +725,7 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, if (os_get_cpu_number ()) { set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address, - 0 /* unset */ , 0); + 0 /* unset */ , 0, 0); return 0; } @@ -746,7 +759,8 @@ static void ip6_neighbor_set_unset_rpc_callback vlib_main_t *vm = vlib_get_main (); if (a->is_add) vnet_set_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr, - a->link_layer_address, 6, a->is_static); + a->link_layer_address, 6, a->is_static, + a->is_no_fib_entry); else vnet_unset_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr, a->link_layer_address, 6); @@ -850,6 +864,7 @@ set_ip6_neighbor (vlib_main_t * vm, int addr_valid = 0; int is_del = 0; int is_static = 0; + int is_no_fib_entry = 0; u32 sw_if_index; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) @@ -865,6 +880,8 @@ set_ip6_neighbor (vlib_main_t * vm, is_del = 1; else if (unformat (input, "static")) is_static = 1; + else if (unformat (input, "no-fib-entry")) + is_no_fib_entry = 1; else break; } @@ -875,7 +892,7 @@ set_ip6_neighbor (vlib_main_t * vm, if (!is_del) vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, &addr, mac_address, sizeof (mac_address), - is_static); + is_static, is_no_fib_entry); else vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, &addr, mac_address, sizeof (mac_address)); @@ -1023,7 +1040,7 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm, &h0->target_address, o0->ethernet_address, sizeof (o0->ethernet_address), - 0); + 0, 0); } if (is_solicitation && error0 == ICMP6_ERROR_NONE) @@ -1338,7 +1355,7 @@ icmp6_router_solicitation (vlib_main_t * vm, &ip0->src_address, o0->ethernet_address, sizeof (o0->ethernet_address), - 0); + 0, 0); } /* default is to drop */ diff --git a/src/vnet/ip/ip6_neighbor.h b/src/vnet/ip/ip6_neighbor.h index 3d256316..ef1e84c2 100644 --- a/src/vnet/ip/ip6_neighbor.h +++ b/src/vnet/ip/ip6_neighbor.h @@ -28,13 +28,18 @@ typedef struct u32 pad; } ip6_neighbor_key_t; +typedef enum ip6_neighbor_flags_t_ +{ + IP6_NEIGHBOR_FLAG_STATIC = (1 << 0), + IP6_NEIGHBOR_FLAG_DYNAMIC = (1 << 1), + IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY = (1 << 2), +} __attribute__ ((packed)) ip6_neighbor_flags_t; + typedef struct { ip6_neighbor_key_t key; u8 link_layer_address[8]; - u16 flags; -#define IP6_NEIGHBOR_FLAG_STATIC (1 << 0) -#define IP6_NEIGHBOR_FLAG_DYNAMIC (2 << 0) + ip6_neighbor_flags_t flags; u64 cpu_time_last_updated; fib_node_index_t fib_entry_index; } ip6_neighbor_t; @@ -69,7 +74,8 @@ extern int vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, ip6_address_t * a, u8 * link_layer_address, uword n_bytes_link_layer_address, - int is_static); + int is_static, + int is_no_fib_entry); extern int vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index 0ca058ab..e3a1fee8 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -655,7 +655,8 @@ vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp, rv = vnet_set_ip6_ethernet_neighbor (vm, ntohl (mp->sw_if_index), (ip6_address_t *) (mp->dst_address), - mp->mac_address, sizeof (mp->mac_address), mp->is_static); + mp->mac_address, sizeof (mp->mac_address), mp->is_static, + mp->is_no_adj_fib); else rv = vnet_unset_ip6_ethernet_neighbor (vm, ntohl (mp->sw_if_index), @@ -671,7 +672,8 @@ vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp, if (mp->is_add) rv = vnet_arp_set_ip4_over_ethernet (vnm, ntohl (mp->sw_if_index), - &a, mp->is_static); + &a, mp->is_static, + mp->is_no_adj_fib); else rv = vnet_arp_unset_ip4_over_ethernet (vnm, ntohl (mp->sw_if_index), &a); diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 8250279e..648ac5f3 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -593,6 +593,9 @@ static void *vl_api_ip_neighbor_add_del_t_print if (mp->is_static) s = format (s, "is_static "); + if (mp->is_no_adj_fib) + s = format (s, "is_no_fib_entry "); + if (memcmp (mp->mac_address, null_mac, 6)) s = format (s, "mac %U ", format_ethernet_address, mp->mac_address); diff --git a/test/test_ip6.py b/test/test_ip6.py index b95809b1..e57e034d 100644 --- a/test/test_ip6.py +++ b/test/test_ip6.py @@ -6,8 +6,8 @@ from socket import AF_INET6 from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppSubInterface, VppDot1QSubint from vpp_pg_interface import is_ipv6_misc -from vpp_neighbor import find_nbr -from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip_route import VppIpRoute, VppRoutePath, find_route +from vpp_neighbor import find_nbr, VppNeighbor from scapy.packet import Raw from scapy.layers.l2 import Ether, Dot1Q @@ -351,6 +351,30 @@ class TestIPv6(TestIPv6ND): self.send_and_assert_no_replies(self.pg0, pkts, "No response to NS for unknown target") + # + # A neighbor entry that has no associated FIB-entry + # + self.pg0.generate_remote_hosts(4) + nd_entry = VppNeighbor(self, + self.pg0.sw_if_index, + self.pg0.remote_hosts[2].mac, + self.pg0.remote_hosts[2].ip6, + af=AF_INET6, + is_no_fib_entry=1) + nd_entry.add_vpp_config() + + # + # check we have the neighbor, but no route + # + self.assertTrue(find_nbr(self, + self.pg0.sw_if_index, + self.pg0._remote_hosts[2].ip6, + inet=AF_INET6)) + self.assertFalse(find_route(self, + self.pg0._remote_hosts[2].ip6, + 128, + inet=AF_INET6)) + def validate_ra(self, intf, rx, dst_ip=None, mtu=9000, pi_opt=None): if not dst_ip: dst_ip = intf.remote_ip6 diff --git a/test/test_neighbor.py b/test/test_neighbor.py index 2ce0a635..1dfae241 100644 --- a/test/test_neighbor.py +++ b/test/test_neighbor.py @@ -5,7 +5,7 @@ from socket import AF_INET, AF_INET6, inet_pton from framework import VppTestCase, VppTestRunner from vpp_neighbor import VppNeighbor, find_nbr -from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_ip_route import VppIpRoute, VppRoutePath, find_route from scapy.packet import Raw from scapy.layers.l2 import Ether, ARP @@ -115,7 +115,7 @@ class ARPTestCase(VppTestCase): # # Generate some hosts on the LAN # - self.pg1.generate_remote_hosts(4) + self.pg1.generate_remote_hosts(5) # # Send IP traffic to one of these unresolved hosts. @@ -301,6 +301,25 @@ class ARPTestCase(VppTestCase): "ARP req for non-local source") # + # A neighbor entry that has no associated FIB-entry + # + arp_no_fib = VppNeighbor(self, + self.pg1.sw_if_index, + self.pg1.remote_hosts[4].mac, + self.pg1.remote_hosts[4].ip4, + is_no_fib_entry=1) + arp_no_fib.add_vpp_config() + + # + # check we have the neighbor, but no route + # + self.assertTrue(find_nbr(self, + self.pg1.sw_if_index, + self.pg1._remote_hosts[4].ip4)) + self.assertFalse(find_route(self, + self.pg1._remote_hosts[4].ip4, + 32)) + # # cleanup # dyn_arp.remove_vpp_config() diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py index 247263a3..7a62b230 100644 --- a/test/vpp_ip_route.py +++ b/test/vpp_ip_route.py @@ -4,14 +4,31 @@ object abstractions for representing IP routes in VPP """ -import socket from vpp_object import * +from socket import inet_pton, inet_ntop, AF_INET, AF_INET6 # from vnet/vnet/mpls/mpls_types.h MPLS_IETF_MAX_LABEL = 0xfffff MPLS_LABEL_INVALID = MPLS_IETF_MAX_LABEL + 1 +def find_route(test, ip_addr, len, table_id=0, inet=AF_INET): + if inet == AF_INET: + s = 4 + routes = test.vapi.ip_fib_dump() + else: + s = 16 + routes = test.vapi.ip6_fib_dump() + + route_addr = inet_pton(inet, ip_addr) + for e in routes: + if route_addr == e.address[:s] \ + and len == e.address_length \ + and table_id == e.table_id: + return True + return False + + class VppRoutePath(object): def __init__( @@ -27,9 +44,9 @@ class VppRoutePath(object): self.nh_via_label = nh_via_label self.nh_labels = labels if is_ip6: - self.nh_addr = socket.inet_pton(socket.AF_INET6, nh_addr) + self.nh_addr = inet_pton(AF_INET6, nh_addr) else: - self.nh_addr = socket.inet_pton(socket.AF_INET, nh_addr) + self.nh_addr = inet_pton(AF_INET, nh_addr) class VppMRoutePath(VppRoutePath): @@ -56,17 +73,18 @@ class VppIpRoute(VppObject): self.is_local = is_local self.is_unreach = is_unreach self.is_prohibit = is_prohibit + self.dest_addr_p = dest_addr if is_ip6: - self.dest_addr = socket.inet_pton(socket.AF_INET6, dest_addr) + self.dest_addr = inet_pton(AF_INET6, dest_addr) else: - self.dest_addr = socket.inet_pton(socket.AF_INET, dest_addr) + self.dest_addr = inet_pton(AF_INET, dest_addr) def add_vpp_config(self): if self.is_local or self.is_unreach or self.is_prohibit: self._test.vapi.ip_add_del_route( self.dest_addr, self.dest_addr_len, - socket.inet_pton(socket.AF_INET6, "::"), + inet_pton(AF_INET6, "::"), 0xffffffff, is_local=self.is_local, is_unreach=self.is_unreach, @@ -93,7 +111,7 @@ class VppIpRoute(VppObject): self._test.vapi.ip_add_del_route( self.dest_addr, self.dest_addr_len, - socket.inet_pton(socket.AF_INET6, "::"), + inet_pton(AF_INET6, "::"), 0xffffffff, is_local=self.is_local, is_unreach=self.is_unreach, @@ -111,28 +129,20 @@ class VppIpRoute(VppObject): is_add=0) def query_vpp_config(self): - dump = self._test.vapi.ip_fib_dump() - for e in dump: - if self.dest_addr == e.address \ - and self.dest_addr_len == e.address_length \ - and self.table_id == e.table_id: - return True - return False + return find_route(self._test, + self.dest_addr_p, + self.dest_addr_len, + self.table_id, + inet=AF_INET6 if self.is_ip6 == 1 else AF_INET) def __str__(self): return self.object_id() def object_id(self): - if self.is_ip6: - return ("%d:%s/%d" - % (self.table_id, - socket.inet_ntop(socket.AF_INET6, self.dest_addr), - self.dest_addr_len)) - else: - return ("%d:%s/%d" - % (self.table_id, - socket.inet_ntop(socket.AF_INET, self.dest_addr), - self.dest_addr_len)) + return ("%d:%s/%d" + % (self.table_id, + self.dest_addr_p, + self.dest_addr_len)) class VppIpMRoute(VppObject): @@ -150,11 +160,11 @@ class VppIpMRoute(VppObject): self.is_ip6 = is_ip6 if is_ip6: - self.grp_addr = socket.inet_pton(socket.AF_INET6, grp_addr) - self.src_addr = socket.inet_pton(socket.AF_INET6, src_addr) + self.grp_addr = inet_pton(AF_INET6, grp_addr) + self.src_addr = inet_pton(AF_INET6, src_addr) else: - self.grp_addr = socket.inet_pton(socket.AF_INET, grp_addr) - self.src_addr = socket.inet_pton(socket.AF_INET, src_addr) + self.grp_addr = inet_pton(AF_INET, grp_addr) + self.src_addr = inet_pton(AF_INET, src_addr) def add_vpp_config(self): for path in self.paths: @@ -221,14 +231,14 @@ class VppIpMRoute(VppObject): if self.is_ip6: return ("%d:(%s,%s/%d)" % (self.table_id, - socket.inet_ntop(socket.AF_INET6, self.src_addr), - socket.inet_ntop(socket.AF_INET6, self.grp_addr), + inet_ntop(AF_INET6, self.src_addr), + inet_ntop(AF_INET6, self.grp_addr), self.grp_addr_len)) else: return ("%d:(%s,%s/%d)" % (self.table_id, - socket.inet_ntop(socket.AF_INET, self.src_addr), - socket.inet_ntop(socket.AF_INET, self.grp_addr), + inet_ntop(AF_INET, self.src_addr), + inet_ntop(AF_INET, self.grp_addr), self.grp_addr_len)) @@ -261,7 +271,7 @@ class VppMplsIpBind(VppObject): def __init__(self, test, local_label, dest_addr, dest_addr_len, table_id=0, ip_table_id=0): self._test = test - self.dest_addr = socket.inet_pton(socket.AF_INET, dest_addr) + self.dest_addr = inet_pton(AF_INET, dest_addr) self.dest_addr_len = dest_addr_len self.local_label = local_label self.table_id = table_id @@ -298,7 +308,7 @@ class VppMplsIpBind(VppObject): % (self.table_id, self.local_label, self.ip_table_id, - socket.inet_ntop(socket.AF_INET, self.dest_addr), + inet_ntop(AF_INET, self.dest_addr), self.dest_addr_len)) diff --git a/test/vpp_neighbor.py b/test/vpp_neighbor.py index 5c2e3479..6968b5f6 100644 --- a/test/vpp_neighbor.py +++ b/test/vpp_neighbor.py @@ -31,12 +31,13 @@ class VppNeighbor(VppObject): """ def __init__(self, test, sw_if_index, mac_addr, nbr_addr, - af=AF_INET, is_static=False): + af=AF_INET, is_static=False, is_no_fib_entry=False): self._test = test self.sw_if_index = sw_if_index self.mac_addr = mactobinary(mac_addr) self.af = af self.is_static = is_static + self.is_no_fib_entry = is_no_fib_entry self.nbr_addr = inet_pton(af, nbr_addr) def add_vpp_config(self): @@ -46,7 +47,8 @@ class VppNeighbor(VppObject): self.nbr_addr, is_add=1, is_ipv6=1 if AF_INET6 == self.af else 0, - is_static=self.is_static) + is_static=self.is_static, + is_no_adj_fib=self.is_no_fib_entry) self._test.registry.register(self, self._test.logger) def remove_vpp_config(self): diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 92070424..2d683dc2 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -663,6 +663,7 @@ class VppPapiProvider(object): is_add=1, is_ipv6=0, is_static=0, + is_no_adj_fib=0, ): """ Add neighbor MAC to IPv4 or IPv6 address. @@ -672,6 +673,7 @@ class VppPapiProvider(object): :param is_add: (Default value = 1) :param is_ipv6: (Default value = 0) :param is_static: (Default value = 0) + :param is_no_adj_fib: (Default value = 0) """ return self.api( @@ -680,6 +682,7 @@ class VppPapiProvider(object): 'is_add': is_add, 'is_ipv6': is_ipv6, 'is_static': is_static, + 'is_no_adj_fib': is_no_adj_fib, 'mac_address': mac_address, 'dst_address': dst_address } -- cgit 1.2.3-korg From 80f54e20270ed0628ee725e3e3c515731a0188f2 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Wed, 8 Mar 2017 19:08:56 -0500 Subject: vlib_mains == 0 special cases be gone Clean up spurious binary API client link dependency on libvlib.so, which managed to hide behind vlib_mains == 0 checks reached by VLIB_xxx_FUNCTION macros. Change-Id: I5df1f8ab07dca1944250e643ccf06e60a8462325 Signed-off-by: Dave Barach --- src/plugins/dpdk/ipsec/ipsec.c | 8 +- src/vlib-api.am | 4 +- src/vlib/buffer.c | 27 +- src/vlib/global_funcs.h | 2 +- src/vlib/node_cli.c | 28 +- src/vlib/node_funcs.h | 4 +- src/vlib/threads.c | 16 +- src/vlib/threads.h | 43 ++- src/vlibapi/api.h | 4 +- src/vlibapi/api_shared.c | 530 ++--------------------------------- src/vlibapi/node_serialize.c | 15 +- src/vlibmemory/memory_vlib.c | 471 +++++++++++++++++++++++++++++++ src/vnet/devices/virtio/vhost-user.c | 9 +- src/vpp-api-test.am | 2 - src/vpp/api/api.c | 1 - src/vpp/api/gmon.c | 9 +- 16 files changed, 575 insertions(+), 598 deletions(-) (limited to 'src/vpp/api') diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c index 16bec20a..b0aaaaec 100644 --- a/src/plugins/dpdk/ipsec/ipsec.c +++ b/src/plugins/dpdk/ipsec/ipsec.c @@ -380,13 +380,9 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, im->cb.check_support_cb = dpdk_ipsec_check_support; im->cb.add_del_sa_sess_cb = add_del_sa_sess; - if (vec_len (vlib_mains) == 0) - vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index, + for (i = 1; i < tm->n_vlib_mains; i++) + vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index, VLIB_NODE_STATE_POLLING); - else - for (i = 1; i < tm->n_vlib_mains; i++) - vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index, - VLIB_NODE_STATE_POLLING); /* TODO cryptodev counters */ diff --git a/src/vlib-api.am b/src/vlib-api.am index c05929b1..4e1dae99 100644 --- a/src/vlib-api.am +++ b/src/vlib-api.am @@ -14,7 +14,7 @@ lib_LTLIBRARIES += libvlibmemory.la libvlibapi.la libvlibmemoryclient.la \ libvlibsocket.la -libvlibmemory_la_DEPENDENCIES = libvppinfra.la libsvm.la libvlib.la +libvlibmemory_la_DEPENDENCIES = libvppinfra.la libsvm.la libvlibmemory_la_LIBADD = $(libvlibmemory_la_DEPENDENCIES) -lpthread libvlibmemory_la_SOURCES = \ vlibmemory/api.h \ @@ -26,7 +26,7 @@ libvlibmemory_la_SOURCES = \ vlibmemory/unix_shared_memory_queue.c \ vlibmemory/unix_shared_memory_queue.h -libvlibapi_la_DEPENDENCIES = libvppinfra.la libvlib.la libvlibmemory.la +libvlibapi_la_DEPENDENCIES = libvppinfra.la libvlibapi_la_LIBADD = $(libvlibapi_la_DEPENDENCIES) libvlibapi_la_SOURCES = \ vlibapi/api.h \ diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 9f26bec7..6ba82584 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -261,7 +261,28 @@ done: return result; } -vlib_main_t **vlib_mains; +/* + * Hand-craft a static vector w/ length 1, so vec_len(vlib_mains) =1 + * and vlib_mains[0] = &vlib_global_main from the beginning of time. + * + * The only place which should ever expand vlib_mains is start_workers() + * in threads.c. It knows about the bootstrap vector. + */ +/* *INDENT-OFF* */ +static struct +{ + vec_header_t h; + vlib_main_t *vm; +} __attribute__ ((packed)) __bootstrap_vlib_main_vector + __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES))) = +{ + .h.len = 1, + .vm = &vlib_global_main, +}; +/* *INDENT-ON* */ + +vlib_main_t **vlib_mains = &__bootstrap_vlib_main_vector.vm; + /* When dubugging validate that given buffers are either known allocated or known free. */ @@ -280,7 +301,7 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm, ASSERT (os_get_cpu_number () == 0); /* smp disaster check */ - if (vlib_mains) + if (vec_len (vlib_mains) > 1) ASSERT (vm == vlib_mains[0]); is_free = expected_state == VLIB_BUFFER_KNOWN_ALLOCATED; @@ -956,7 +977,7 @@ show_buffers (vlib_main_t * vm, do { - curr_vm = vec_len (vlib_mains) ? vlib_mains[vm_index] : vm; + curr_vm = vlib_mains[vm_index]; bm = curr_vm->buffer_main; /* *INDENT-OFF* */ diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h index bbdbdef5..f51ec381 100644 --- a/src/vlib/global_funcs.h +++ b/src/vlib/global_funcs.h @@ -23,7 +23,7 @@ always_inline vlib_main_t * vlib_get_main (void) { vlib_main_t *vm; - vm = vlib_mains ? vlib_mains[os_get_cpu_number ()] : &vlib_global_main; + vm = vlib_mains[os_get_cpu_number ()]; ASSERT (vm); return vm; } diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c index 05d0f0b5..62ab2e64 100644 --- a/src/vlib/node_cli.c +++ b/src/vlib/node_cli.c @@ -248,16 +248,11 @@ show_node_runtime (vlib_main_t * vm, if (unformat (input, "max") || unformat (input, "m")) max = 1; - if (vec_len (vlib_mains) == 0) - vec_add1 (stat_vms, vm); - else + for (i = 0; i < vec_len (vlib_mains); i++) { - for (i = 0; i < vec_len (vlib_mains); i++) - { - stat_vm = vlib_mains[i]; - if (stat_vm) - vec_add1 (stat_vms, stat_vm); - } + stat_vm = vlib_mains[i]; + if (stat_vm) + vec_add1 (stat_vms, stat_vm); } /* @@ -331,7 +326,7 @@ show_node_runtime (vlib_main_t * vm, } } - if (vec_len (vlib_mains)) + if (vec_len (vlib_mains) > 1) { vlib_worker_thread_t *w = vlib_worker_threads + j; if (j > 0) @@ -404,16 +399,11 @@ clear_node_runtime (vlib_main_t * vm, vlib_main_t **stat_vms = 0, *stat_vm; vlib_node_runtime_t *r; - if (vec_len (vlib_mains) == 0) - vec_add1 (stat_vms, vm); - else + for (i = 0; i < vec_len (vlib_mains); i++) { - for (i = 0; i < vec_len (vlib_mains); i++) - { - stat_vm = vlib_mains[i]; - if (stat_vm) - vec_add1 (stat_vms, stat_vm); - } + stat_vm = vlib_mains[i]; + if (stat_vm) + vec_add1 (stat_vms, stat_vm); } vlib_worker_thread_barrier_sync (vm); diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index f49a8d6f..8ccfc438 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -201,7 +201,7 @@ vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index) vlib_frame_t *f; u32 cpu_index = frame_index & VLIB_CPU_MASK; u32 offset = frame_index & VLIB_OFFSET_MASK; - vm = vlib_mains ? vlib_mains[cpu_index] : vm; + vm = vlib_mains[cpu_index]; f = vm->heap_base + offset; return f; } @@ -213,7 +213,7 @@ vlib_frame_index_no_check (vlib_main_t * vm, vlib_frame_t * f) ASSERT (((uword) f & VLIB_CPU_MASK) == 0); - vm = vlib_mains ? vlib_mains[f->cpu_index] : vm; + vm = vlib_mains[f->cpu_index]; i = ((u8 *) f - (u8 *) vm->heap_base); return i | f->cpu_index; diff --git a/src/vlib/threads.c b/src/vlib/threads.c index e3ea3c9c..4676be97 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -570,9 +570,13 @@ start_workers (vlib_main_t * vm) if (n_vlib_mains > 1) { - vec_validate (vlib_mains, tm->n_vlib_mains - 1); + /* Replace hand-crafted length-1 vector with a real vector */ + vlib_mains = 0; + + vec_validate_aligned (vlib_mains, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); _vec_len (vlib_mains) = 0; - vec_add1 (vlib_mains, vm); + vec_add1_aligned (vlib_mains, vm, CLIB_CACHE_LINE_BYTES); vlib_worker_threads->wait_at_barrier = clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES); @@ -685,7 +689,7 @@ start_workers (vlib_main_t * vm) /* Packet trace buffers are guaranteed to be empty, nothing to do here */ clib_mem_set_heap (oldheap); - vec_add1 (vlib_mains, vm_clone); + vec_add1_aligned (vlib_mains, vm_clone, CLIB_CACHE_LINE_BYTES); vm_clone->error_main.counters = vec_dup (vlib_mains[0]->error_main.counters); @@ -805,7 +809,7 @@ vlib_worker_thread_node_runtime_update (void) ASSERT (os_get_cpu_number () == 0); - if (vec_len (vlib_mains) == 0) + if (vec_len (vlib_mains) == 1) return; vm = vlib_mains[0]; @@ -1148,7 +1152,7 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm) f64 deadline; u32 count; - if (!vlib_mains) + if (vec_len (vlib_mains) < 2) return; count = vec_len (vlib_mains) - 1; @@ -1179,7 +1183,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) { f64 deadline; - if (!vlib_mains) + if (vec_len (vlib_mains) < 2) return; if (--vlib_worker_threads[0].recursion_level > 0) diff --git a/src/vlib/threads.h b/src/vlib/threads.h index 75a5a281..a032311c 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -222,30 +222,25 @@ vlib_worker_thread_barrier_check (void) } } -#define foreach_vlib_main(body) \ -do { \ - vlib_main_t ** __vlib_mains = 0, *this_vlib_main; \ - int ii; \ - \ - if (vec_len (vlib_mains) == 0) \ - vec_add1 (__vlib_mains, &vlib_global_main); \ - else \ - { \ - for (ii = 0; ii < vec_len (vlib_mains); ii++) \ - { \ - this_vlib_main = vlib_mains[ii]; \ - if (this_vlib_main) \ - vec_add1 (__vlib_mains, this_vlib_main); \ - } \ - } \ - \ - for (ii = 0; ii < vec_len (__vlib_mains); ii++) \ - { \ - this_vlib_main = __vlib_mains[ii]; \ - /* body uses this_vlib_main... */ \ - (body); \ - } \ - vec_free (__vlib_mains); \ +#define foreach_vlib_main(body) \ +do { \ + vlib_main_t ** __vlib_mains = 0, *this_vlib_main; \ + int ii; \ + \ + for (ii = 0; ii < vec_len (vlib_mains); ii++) \ + { \ + this_vlib_main = vlib_mains[ii]; \ + if (this_vlib_main) \ + vec_add1 (__vlib_mains, this_vlib_main); \ + } \ + \ + for (ii = 0; ii < vec_len (__vlib_mains); ii++) \ + { \ + this_vlib_main = __vlib_mains[ii]; \ + /* body uses this_vlib_main... */ \ + (body); \ + } \ + vec_free (__vlib_mains); \ } while (0); #define foreach_sched_policy \ diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h index 2cbeb63c..87a56121 100644 --- a/src/vlibapi/api.h +++ b/src/vlibapi/api.h @@ -252,11 +252,13 @@ void vl_msg_api_queue_handler (unix_shared_memory_queue_t * q); vl_api_trace_t *vl_msg_api_trace_get (api_main_t * am, vl_api_trace_which_t which); +void vl_msg_api_barrier_sync (void) __attribute__ ((weak)); +void vl_msg_api_barrier_release (void) __attribute__ ((weak)); void vl_msg_api_free (void *); void vl_noop_handler (void *mp); -clib_error_t *vl_api_init (vlib_main_t * vm); void vl_msg_api_increment_missing_client_counter (void); void vl_msg_api_post_mortem_dump (void); +void vl_msg_api_post_mortem_dump_enable_disable (int enable); void vl_msg_api_register_pd_handler (void *handler, u16 msg_id_host_byte_order); int vl_msg_api_pd_handler (void *mp, int rv); diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c index 69ba10c1..6774e3dd 100644 --- a/src/vlibapi/api_shared.c +++ b/src/vlibapi/api_shared.c @@ -23,11 +23,6 @@ #include #include #include -#include -#include -#include -#include -#include #include #include #include @@ -36,19 +31,14 @@ #include #include -api_main_t api_main; - -void vl_msg_api_barrier_sync (void) __attribute__ ((weak)); -void -vl_msg_api_barrier_sync (void) -{ -} - -void vl_msg_api_barrier_release (void) __attribute__ ((weak)); -void -vl_msg_api_barrier_release (void) -{ -} +/* *INDENT-OFF* */ +api_main_t api_main = + { + .region_name = "/unset", + .api_uid = -1, + .api_gid = -1, + }; +/* *INDENT-ON* */ void vl_msg_api_increment_missing_client_counter (void) @@ -57,14 +47,6 @@ vl_msg_api_increment_missing_client_counter (void) am->missing_clients++; } -typedef enum -{ - DUMP, - CUSTOM_DUMP, - REPLAY, - INITIALIZERS, -} vl_api_replay_t; - int vl_msg_api_rx_trace_enabled (api_main_t * am) { @@ -397,6 +379,16 @@ vl_msg_api_trace_configure (api_main_t * am, vl_api_trace_which_t which, return 0; } +void +vl_msg_api_barrier_sync (void) +{ +} + +void +vl_msg_api_barrier_release (void) +{ +} + always_inline void msg_handler_internal (api_main_t * am, void *the_msg, int trace_it, int do_it, int free_it) @@ -748,495 +740,15 @@ vl_noop_handler (void *mp) { } -clib_error_t * -vl_api_init (vlib_main_t * vm) -{ - static u8 once; - api_main_t *am = &api_main; - - if (once) - return 0; - - once = 1; - - am->region_name = "/unset"; - /* - * Eventually passed to fchown, -1 => "current user" - * instead of 0 => "root". A very fine disctinction at best. - */ - if (am->api_uid == 0) - am->api_uid = -1; - if (am->api_gid == 0) - am->api_gid = -1; - - return (0); -} - -void vl_msg_api_custom_dump_configure (api_main_t * am) - __attribute__ ((weak)); -void -vl_msg_api_custom_dump_configure (api_main_t * am) -{ -} - -VLIB_INIT_FUNCTION (vl_api_init); - -static void -vl_msg_api_process_file (vlib_main_t * vm, u8 * filename, - u32 first_index, u32 last_index, - vl_api_replay_t which) -{ - vl_api_trace_file_header_t *hp; - int i, fd; - struct stat statb; - size_t file_size; - u8 *msg; - u8 endian_swap_needed = 0; - api_main_t *am = &api_main; - u8 *tmpbuf = 0; - u32 nitems; - void **saved_print_handlers = 0; - - fd = open ((char *) filename, O_RDONLY); - - if (fd < 0) - { - vlib_cli_output (vm, "Couldn't open %s\n", filename); - return; - } - - if (fstat (fd, &statb) < 0) - { - vlib_cli_output (vm, "Couldn't stat %s\n", filename); - close (fd); - return; - } - - if (!(statb.st_mode & S_IFREG) || (statb.st_size < sizeof (*hp))) - { - vlib_cli_output (vm, "File not plausible: %s\n", filename); - close (fd); - return; - } - - file_size = statb.st_size; - file_size = (file_size + 4095) & ~(4096); - - hp = mmap (0, file_size, PROT_READ, MAP_PRIVATE, fd, 0); - - if (hp == (vl_api_trace_file_header_t *) MAP_FAILED) - { - vlib_cli_output (vm, "mmap failed: %s\n", filename); - close (fd); - return; - } - close (fd); - - if ((clib_arch_is_little_endian && hp->endian == VL_API_BIG_ENDIAN) - || (clib_arch_is_big_endian && hp->endian == VL_API_LITTLE_ENDIAN)) - endian_swap_needed = 1; - - if (endian_swap_needed) - nitems = ntohl (hp->nitems); - else - nitems = hp->nitems; - - if (last_index == (u32) ~ 0) - { - last_index = nitems - 1; - } - - if (first_index >= nitems || last_index >= nitems) - { - vlib_cli_output (vm, "Range (%d, %d) outside file range (0, %d)\n", - first_index, last_index, nitems - 1); - munmap (hp, file_size); - return; - } - if (hp->wrapped) - vlib_cli_output (vm, - "Note: wrapped/incomplete trace, results may vary\n"); - - if (which == CUSTOM_DUMP) - { - saved_print_handlers = (void **) vec_dup (am->msg_print_handlers); - vl_msg_api_custom_dump_configure (am); - } - - - msg = (u8 *) (hp + 1); - - for (i = 0; i < first_index; i++) - { - trace_cfg_t *cfgp; - int size; - u16 msg_id; - - size = clib_host_to_net_u32 (*(u32 *) msg); - msg += sizeof (u32); - - if (clib_arch_is_little_endian) - msg_id = ntohs (*((u16 *) msg)); - else - msg_id = *((u16 *) msg); - - cfgp = am->api_trace_cfg + msg_id; - if (!cfgp) - { - vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id); - munmap (hp, file_size); - return; - } - msg += size; - } - - if (which == REPLAY) - am->replay_in_progress = 1; - - for (; i <= last_index; i++) - { - trace_cfg_t *cfgp; - u16 *msg_idp; - u16 msg_id; - int size; - - if (which == DUMP) - vlib_cli_output (vm, "---------- trace %d -----------\n", i); - - size = clib_host_to_net_u32 (*(u32 *) msg); - msg += sizeof (u32); - - if (clib_arch_is_little_endian) - msg_id = ntohs (*((u16 *) msg)); - else - msg_id = *((u16 *) msg); - - cfgp = am->api_trace_cfg + msg_id; - if (!cfgp) - { - vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id); - munmap (hp, file_size); - vec_free (tmpbuf); - am->replay_in_progress = 0; - return; - } - - /* Copy the buffer (from the read-only mmap'ed file) */ - vec_validate (tmpbuf, size - 1 + sizeof (uword)); - clib_memcpy (tmpbuf + sizeof (uword), msg, size); - memset (tmpbuf, 0xf, sizeof (uword)); - - /* - * Endian swap if needed. All msg data is supposed to be - * in network byte order. All msg handlers are supposed to - * know that. The generic message dumpers don't know that. - * One could fix apigen, I suppose. - */ - if ((which == DUMP && clib_arch_is_little_endian) || endian_swap_needed) - { - void (*endian_fp) (void *); - if (msg_id >= vec_len (am->msg_endian_handlers) - || (am->msg_endian_handlers[msg_id] == 0)) - { - vlib_cli_output (vm, "Ugh: msg id %d no endian swap\n", msg_id); - munmap (hp, file_size); - vec_free (tmpbuf); - am->replay_in_progress = 0; - return; - } - endian_fp = am->msg_endian_handlers[msg_id]; - (*endian_fp) (tmpbuf + sizeof (uword)); - } - - /* msg_id always in network byte order */ - if (clib_arch_is_little_endian) - { - msg_idp = (u16 *) (tmpbuf + sizeof (uword)); - *msg_idp = msg_id; - } - - switch (which) - { - case CUSTOM_DUMP: - case DUMP: - if (msg_id < vec_len (am->msg_print_handlers) && - am->msg_print_handlers[msg_id]) - { - u8 *(*print_fp) (void *, void *); - - print_fp = (void *) am->msg_print_handlers[msg_id]; - (*print_fp) (tmpbuf + sizeof (uword), vm); - } - else - { - vlib_cli_output (vm, "Skipping msg id %d: no print fcn\n", - msg_id); - break; - } - break; - - case INITIALIZERS: - if (msg_id < vec_len (am->msg_print_handlers) && - am->msg_print_handlers[msg_id]) - { - u8 *s; - int j; - u8 *(*print_fp) (void *, void *); - - print_fp = (void *) am->msg_print_handlers[msg_id]; - - vlib_cli_output (vm, "/*"); - - (*print_fp) (tmpbuf + sizeof (uword), vm); - vlib_cli_output (vm, "*/\n"); - - s = format (0, "static u8 * vl_api_%s_%d[%d] = {", - am->msg_names[msg_id], i, - am->api_trace_cfg[msg_id].size); - - for (j = 0; j < am->api_trace_cfg[msg_id].size; j++) - { - if ((j & 7) == 0) - s = format (s, "\n "); - s = format (s, "0x%02x,", tmpbuf[sizeof (uword) + j]); - } - s = format (s, "\n};\n%c", 0); - vlib_cli_output (vm, (char *) s); - vec_free (s); - } - break; - - case REPLAY: - if (msg_id < vec_len (am->msg_print_handlers) && - am->msg_print_handlers[msg_id] && cfgp->replay_enable) - { - void (*handler) (void *); - - handler = (void *) am->msg_handlers[msg_id]; - - if (!am->is_mp_safe[msg_id]) - vl_msg_api_barrier_sync (); - (*handler) (tmpbuf + sizeof (uword)); - if (!am->is_mp_safe[msg_id]) - vl_msg_api_barrier_release (); - } - else - { - if (cfgp->replay_enable) - vlib_cli_output (vm, "Skipping msg id %d: no handler\n", - msg_id); - break; - } - break; - } - - _vec_len (tmpbuf) = 0; - msg += size; - } - - if (saved_print_handlers) - { - clib_memcpy (am->msg_print_handlers, saved_print_handlers, - vec_len (am->msg_print_handlers) * sizeof (void *)); - vec_free (saved_print_handlers); - } - - munmap (hp, file_size); - vec_free (tmpbuf); - am->replay_in_progress = 0; -} - -u8 * -format_vl_msg_api_trace_status (u8 * s, va_list * args) -{ - api_main_t *am = va_arg (*args, api_main_t *); - vl_api_trace_which_t which = va_arg (*args, vl_api_trace_which_t); - vl_api_trace_t *tp; - char *trace_name; - - switch (which) - { - case VL_API_TRACE_TX: - tp = am->tx_trace; - trace_name = "TX trace"; - break; - - case VL_API_TRACE_RX: - tp = am->rx_trace; - trace_name = "RX trace"; - break; - - default: - abort (); - } - - if (tp == 0) - { - s = format (s, "%s: not yet configured.\n", trace_name); - return s; - } - - s = format (s, "%s: used %d of %d items, %s enabled, %s wrapped\n", - trace_name, vec_len (tp->traces), tp->nitems, - tp->enabled ? "is" : "is not", tp->wrapped ? "has" : "has not"); - return s; -} static u8 post_mortem_dump_enabled; -static clib_error_t * -api_trace_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - u32 nitems = 256 << 10; - api_main_t *am = &api_main; - vl_api_trace_which_t which = VL_API_TRACE_RX; - u8 *filename; - u32 first = 0; - u32 last = (u32) ~ 0; - FILE *fp; - int rv; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "on") || unformat (input, "enable")) - { - if (unformat (input, "nitems %d", &nitems)) - ; - vl_msg_api_trace_configure (am, which, nitems); - vl_msg_api_trace_onoff (am, which, 1 /* on */ ); - } - else if (unformat (input, "off")) - { - vl_msg_api_trace_onoff (am, which, 0); - } - else if (unformat (input, "save %s", &filename)) - { - u8 *chroot_filename; - if (strstr ((char *) filename, "..") - || index ((char *) filename, '/')) - { - vlib_cli_output (vm, "illegal characters in filename '%s'", - filename); - return 0; - } - - chroot_filename = format (0, "/tmp/%s%c", filename, 0); - - vec_free (filename); - - fp = fopen ((char *) chroot_filename, "w"); - if (fp == NULL) - { - vlib_cli_output (vm, "Couldn't create %s\n", chroot_filename); - return 0; - } - rv = vl_msg_api_trace_save (am, which, fp); - fclose (fp); - if (rv == -1) - vlib_cli_output (vm, "API Trace data not present\n"); - else if (rv == -2) - vlib_cli_output (vm, "File for writing is closed\n"); - else if (rv == -10) - vlib_cli_output (vm, "Error while writing header to file\n"); - else if (rv == -11) - vlib_cli_output (vm, "Error while writing trace to file\n"); - else if (rv == -12) - vlib_cli_output (vm, - "Error while writing end of buffer trace to file\n"); - else if (rv == -13) - vlib_cli_output (vm, - "Error while writing start of buffer trace to file\n"); - else if (rv < 0) - vlib_cli_output (vm, "Unkown error while saving: %d", rv); - else - vlib_cli_output (vm, "API trace saved to %s\n", chroot_filename); - vec_free (chroot_filename); - } - else if (unformat (input, "dump %s", &filename)) - { - vl_msg_api_process_file (vm, filename, first, last, DUMP); - } - else if (unformat (input, "custom-dump %s", &filename)) - { - vl_msg_api_process_file (vm, filename, first, last, CUSTOM_DUMP); - } - else if (unformat (input, "replay %s", &filename)) - { - vl_msg_api_process_file (vm, filename, first, last, REPLAY); - } - else if (unformat (input, "initializers %s", &filename)) - { - vl_msg_api_process_file (vm, filename, first, last, INITIALIZERS); - } - else if (unformat (input, "tx")) - { - which = VL_API_TRACE_TX; - } - else if (unformat (input, "first %d", &first)) - { - ; - } - else if (unformat (input, "last %d", &last)) - { - ; - } - else if (unformat (input, "status")) - { - vlib_cli_output (vm, "%U", format_vl_msg_api_trace_status, - am, which); - } - else if (unformat (input, "free")) - { - vl_msg_api_trace_onoff (am, which, 0); - vl_msg_api_trace_free (am, which); - } - else if (unformat (input, "post-mortem-on")) - post_mortem_dump_enabled = 1; - else if (unformat (input, "post-mortem-off")) - post_mortem_dump_enabled = 0; - else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (api_trace_command, static) = { - .path = "api trace", - .short_help = - "api trace [on|off][dump|save|replay ][status][free][post-mortem-on]", - .function = api_trace_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -api_config_fn (vlib_main_t * vm, unformat_input_t * input) +void +vl_msg_api_post_mortem_dump_enable_disable (int enable) { - u32 nitems = 256 << 10; - vl_api_trace_which_t which = VL_API_TRACE_RX; - api_main_t *am = &api_main; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "on") || unformat (input, "enable")) - { - if (unformat (input, "nitems %d", &nitems)) - ; - vl_msg_api_trace_configure (am, which, nitems); - vl_msg_api_trace_onoff (am, which, 1 /* on */ ); - post_mortem_dump_enabled = 1; - } - else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - return 0; + post_mortem_dump_enabled = enable; } -VLIB_CONFIG_FUNCTION (api_config_fn, "api-trace"); - void vl_msg_api_post_mortem_dump (void) { diff --git a/src/vlibapi/node_serialize.c b/src/vlibapi/node_serialize.c index 4dc1a7d2..50e5c41c 100644 --- a/src/vlibapi/node_serialize.c +++ b/src/vlibapi/node_serialize.c @@ -73,16 +73,11 @@ vlib_node_serialize (vlib_node_main_t * nm, u8 * vector, if (vec_len (stat_vms) == 0) { - if (vec_len (vlib_mains) == 0) - vec_add1 (stat_vms, vm); - else + for (i = 0; i < vec_len (vlib_mains); i++) { - for (i = 0; i < vec_len (vlib_mains); i++) - { - stat_vm = vlib_mains[i]; - if (stat_vm) - vec_add1 (stat_vms, stat_vm); - } + stat_vm = vlib_mains[i]; + if (stat_vm) + vec_add1 (stat_vms, stat_vm); } } @@ -286,7 +281,7 @@ vlib_node_unserialize (u8 * vector) return nodes_by_thread; } -#if CLIB_DEBUG > 0 +#if TEST_CODE static clib_error_t * test_node_serialize_command_fn (vlib_main_t * vm, diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index 3a7415c0..d2e05968 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -1437,6 +1439,475 @@ rpc_api_hookup (vlib_main_t * vm) VLIB_API_INIT_FUNCTION (rpc_api_hookup); +typedef enum +{ + DUMP, + CUSTOM_DUMP, + REPLAY, + INITIALIZERS, +} vl_api_replay_t; + +u8 * +format_vl_msg_api_trace_status (u8 * s, va_list * args) +{ + api_main_t *am = va_arg (*args, api_main_t *); + vl_api_trace_which_t which = va_arg (*args, vl_api_trace_which_t); + vl_api_trace_t *tp; + char *trace_name; + + switch (which) + { + case VL_API_TRACE_TX: + tp = am->tx_trace; + trace_name = "TX trace"; + break; + + case VL_API_TRACE_RX: + tp = am->rx_trace; + trace_name = "RX trace"; + break; + + default: + abort (); + } + + if (tp == 0) + { + s = format (s, "%s: not yet configured.\n", trace_name); + return s; + } + + s = format (s, "%s: used %d of %d items, %s enabled, %s wrapped\n", + trace_name, vec_len (tp->traces), tp->nitems, + tp->enabled ? "is" : "is not", tp->wrapped ? "has" : "has not"); + return s; +} + +void vl_msg_api_custom_dump_configure (api_main_t * am) + __attribute__ ((weak)); +void +vl_msg_api_custom_dump_configure (api_main_t * am) +{ +} + +static void +vl_msg_api_process_file (vlib_main_t * vm, u8 * filename, + u32 first_index, u32 last_index, + vl_api_replay_t which) +{ + vl_api_trace_file_header_t *hp; + int i, fd; + struct stat statb; + size_t file_size; + u8 *msg; + u8 endian_swap_needed = 0; + api_main_t *am = &api_main; + u8 *tmpbuf = 0; + u32 nitems; + void **saved_print_handlers = 0; + + fd = open ((char *) filename, O_RDONLY); + + if (fd < 0) + { + vlib_cli_output (vm, "Couldn't open %s\n", filename); + return; + } + + if (fstat (fd, &statb) < 0) + { + vlib_cli_output (vm, "Couldn't stat %s\n", filename); + close (fd); + return; + } + + if (!(statb.st_mode & S_IFREG) || (statb.st_size < sizeof (*hp))) + { + vlib_cli_output (vm, "File not plausible: %s\n", filename); + close (fd); + return; + } + + file_size = statb.st_size; + file_size = (file_size + 4095) & ~(4096); + + hp = mmap (0, file_size, PROT_READ, MAP_PRIVATE, fd, 0); + + if (hp == (vl_api_trace_file_header_t *) MAP_FAILED) + { + vlib_cli_output (vm, "mmap failed: %s\n", filename); + close (fd); + return; + } + close (fd); + + if ((clib_arch_is_little_endian && hp->endian == VL_API_BIG_ENDIAN) + || (clib_arch_is_big_endian && hp->endian == VL_API_LITTLE_ENDIAN)) + endian_swap_needed = 1; + + if (endian_swap_needed) + nitems = ntohl (hp->nitems); + else + nitems = hp->nitems; + + if (last_index == (u32) ~ 0) + { + last_index = nitems - 1; + } + + if (first_index >= nitems || last_index >= nitems) + { + vlib_cli_output (vm, "Range (%d, %d) outside file range (0, %d)\n", + first_index, last_index, nitems - 1); + munmap (hp, file_size); + return; + } + if (hp->wrapped) + vlib_cli_output (vm, + "Note: wrapped/incomplete trace, results may vary\n"); + + if (which == CUSTOM_DUMP) + { + saved_print_handlers = (void **) vec_dup (am->msg_print_handlers); + vl_msg_api_custom_dump_configure (am); + } + + + msg = (u8 *) (hp + 1); + + for (i = 0; i < first_index; i++) + { + trace_cfg_t *cfgp; + int size; + u16 msg_id; + + size = clib_host_to_net_u32 (*(u32 *) msg); + msg += sizeof (u32); + + if (clib_arch_is_little_endian) + msg_id = ntohs (*((u16 *) msg)); + else + msg_id = *((u16 *) msg); + + cfgp = am->api_trace_cfg + msg_id; + if (!cfgp) + { + vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id); + munmap (hp, file_size); + return; + } + msg += size; + } + + if (which == REPLAY) + am->replay_in_progress = 1; + + for (; i <= last_index; i++) + { + trace_cfg_t *cfgp; + u16 *msg_idp; + u16 msg_id; + int size; + + if (which == DUMP) + vlib_cli_output (vm, "---------- trace %d -----------\n", i); + + size = clib_host_to_net_u32 (*(u32 *) msg); + msg += sizeof (u32); + + if (clib_arch_is_little_endian) + msg_id = ntohs (*((u16 *) msg)); + else + msg_id = *((u16 *) msg); + + cfgp = am->api_trace_cfg + msg_id; + if (!cfgp) + { + vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id); + munmap (hp, file_size); + vec_free (tmpbuf); + am->replay_in_progress = 0; + return; + } + + /* Copy the buffer (from the read-only mmap'ed file) */ + vec_validate (tmpbuf, size - 1 + sizeof (uword)); + clib_memcpy (tmpbuf + sizeof (uword), msg, size); + memset (tmpbuf, 0xf, sizeof (uword)); + + /* + * Endian swap if needed. All msg data is supposed to be + * in network byte order. All msg handlers are supposed to + * know that. The generic message dumpers don't know that. + * One could fix apigen, I suppose. + */ + if ((which == DUMP && clib_arch_is_little_endian) || endian_swap_needed) + { + void (*endian_fp) (void *); + if (msg_id >= vec_len (am->msg_endian_handlers) + || (am->msg_endian_handlers[msg_id] == 0)) + { + vlib_cli_output (vm, "Ugh: msg id %d no endian swap\n", msg_id); + munmap (hp, file_size); + vec_free (tmpbuf); + am->replay_in_progress = 0; + return; + } + endian_fp = am->msg_endian_handlers[msg_id]; + (*endian_fp) (tmpbuf + sizeof (uword)); + } + + /* msg_id always in network byte order */ + if (clib_arch_is_little_endian) + { + msg_idp = (u16 *) (tmpbuf + sizeof (uword)); + *msg_idp = msg_id; + } + + switch (which) + { + case CUSTOM_DUMP: + case DUMP: + if (msg_id < vec_len (am->msg_print_handlers) && + am->msg_print_handlers[msg_id]) + { + u8 *(*print_fp) (void *, void *); + + print_fp = (void *) am->msg_print_handlers[msg_id]; + (*print_fp) (tmpbuf + sizeof (uword), vm); + } + else + { + vlib_cli_output (vm, "Skipping msg id %d: no print fcn\n", + msg_id); + break; + } + break; + + case INITIALIZERS: + if (msg_id < vec_len (am->msg_print_handlers) && + am->msg_print_handlers[msg_id]) + { + u8 *s; + int j; + u8 *(*print_fp) (void *, void *); + + print_fp = (void *) am->msg_print_handlers[msg_id]; + + vlib_cli_output (vm, "/*"); + + (*print_fp) (tmpbuf + sizeof (uword), vm); + vlib_cli_output (vm, "*/\n"); + + s = format (0, "static u8 * vl_api_%s_%d[%d] = {", + am->msg_names[msg_id], i, + am->api_trace_cfg[msg_id].size); + + for (j = 0; j < am->api_trace_cfg[msg_id].size; j++) + { + if ((j & 7) == 0) + s = format (s, "\n "); + s = format (s, "0x%02x,", tmpbuf[sizeof (uword) + j]); + } + s = format (s, "\n};\n%c", 0); + vlib_cli_output (vm, (char *) s); + vec_free (s); + } + break; + + case REPLAY: + if (msg_id < vec_len (am->msg_print_handlers) && + am->msg_print_handlers[msg_id] && cfgp->replay_enable) + { + void (*handler) (void *); + + handler = (void *) am->msg_handlers[msg_id]; + + if (!am->is_mp_safe[msg_id]) + vl_msg_api_barrier_sync (); + (*handler) (tmpbuf + sizeof (uword)); + if (!am->is_mp_safe[msg_id]) + vl_msg_api_barrier_release (); + } + else + { + if (cfgp->replay_enable) + vlib_cli_output (vm, "Skipping msg id %d: no handler\n", + msg_id); + break; + } + break; + } + + _vec_len (tmpbuf) = 0; + msg += size; + } + + if (saved_print_handlers) + { + clib_memcpy (am->msg_print_handlers, saved_print_handlers, + vec_len (am->msg_print_handlers) * sizeof (void *)); + vec_free (saved_print_handlers); + } + + munmap (hp, file_size); + vec_free (tmpbuf); + am->replay_in_progress = 0; +} + +static clib_error_t * +api_trace_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + u32 nitems = 256 << 10; + api_main_t *am = &api_main; + vl_api_trace_which_t which = VL_API_TRACE_RX; + u8 *filename; + u32 first = 0; + u32 last = (u32) ~ 0; + FILE *fp; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "on") || unformat (input, "enable")) + { + if (unformat (input, "nitems %d", &nitems)) + ; + vl_msg_api_trace_configure (am, which, nitems); + vl_msg_api_trace_onoff (am, which, 1 /* on */ ); + } + else if (unformat (input, "off")) + { + vl_msg_api_trace_onoff (am, which, 0); + } + else if (unformat (input, "save %s", &filename)) + { + u8 *chroot_filename; + if (strstr ((char *) filename, "..") + || index ((char *) filename, '/')) + { + vlib_cli_output (vm, "illegal characters in filename '%s'", + filename); + return 0; + } + + chroot_filename = format (0, "/tmp/%s%c", filename, 0); + + vec_free (filename); + + fp = fopen ((char *) chroot_filename, "w"); + if (fp == NULL) + { + vlib_cli_output (vm, "Couldn't create %s\n", chroot_filename); + return 0; + } + rv = vl_msg_api_trace_save (am, which, fp); + fclose (fp); + if (rv == -1) + vlib_cli_output (vm, "API Trace data not present\n"); + else if (rv == -2) + vlib_cli_output (vm, "File for writing is closed\n"); + else if (rv == -10) + vlib_cli_output (vm, "Error while writing header to file\n"); + else if (rv == -11) + vlib_cli_output (vm, "Error while writing trace to file\n"); + else if (rv == -12) + vlib_cli_output (vm, + "Error while writing end of buffer trace to file\n"); + else if (rv == -13) + vlib_cli_output (vm, + "Error while writing start of buffer trace to file\n"); + else if (rv < 0) + vlib_cli_output (vm, "Unkown error while saving: %d", rv); + else + vlib_cli_output (vm, "API trace saved to %s\n", chroot_filename); + vec_free (chroot_filename); + } + else if (unformat (input, "dump %s", &filename)) + { + vl_msg_api_process_file (vm, filename, first, last, DUMP); + } + else if (unformat (input, "custom-dump %s", &filename)) + { + vl_msg_api_process_file (vm, filename, first, last, CUSTOM_DUMP); + } + else if (unformat (input, "replay %s", &filename)) + { + vl_msg_api_process_file (vm, filename, first, last, REPLAY); + } + else if (unformat (input, "initializers %s", &filename)) + { + vl_msg_api_process_file (vm, filename, first, last, INITIALIZERS); + } + else if (unformat (input, "tx")) + { + which = VL_API_TRACE_TX; + } + else if (unformat (input, "first %d", &first)) + { + ; + } + else if (unformat (input, "last %d", &last)) + { + ; + } + else if (unformat (input, "status")) + { + vlib_cli_output (vm, "%U", format_vl_msg_api_trace_status, + am, which); + } + else if (unformat (input, "free")) + { + vl_msg_api_trace_onoff (am, which, 0); + vl_msg_api_trace_free (am, which); + } + else if (unformat (input, "post-mortem-on")) + vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ ); + else if (unformat (input, "post-mortem-off")) + vl_msg_api_post_mortem_dump_enable_disable (0 /* enable */ ); + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (api_trace_command, static) = { + .path = "api trace", + .short_help = + "api trace [on|off][dump|save|replay ][status][free][post-mortem-on]", + .function = api_trace_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +api_config_fn (vlib_main_t * vm, unformat_input_t * input) +{ + u32 nitems = 256 << 10; + vl_api_trace_which_t which = VL_API_TRACE_RX; + api_main_t *am = &api_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "on") || unformat (input, "enable")) + { + if (unformat (input, "nitems %d", &nitems)) + ; + vl_msg_api_trace_configure (am, which, nitems); + vl_msg_api_trace_onoff (am, which, 1 /* on */ ); + vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ ); + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + return 0; +} + +VLIB_CONFIG_FUNCTION (api_config_fn, "api-trace"); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index b6b4c04a..100ec613 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -374,8 +374,7 @@ vhost_user_rx_thread_placement () for (i = vum->input_cpu_first_index; i < vum->input_cpu_first_index + vum->input_cpu_count; i++) { - vlib_node_set_state (vlib_mains ? vlib_mains[i] : &vlib_global_main, - vhost_user_input_node.index, + vlib_node_set_state (vlib_mains[i], vhost_user_input_node.index, VLIB_NODE_STATE_DISABLED); vec_add1 (workers, i); } @@ -406,9 +405,9 @@ vhost_user_rx_thread_placement () iaq.qid = qid; iaq.vhost_iface_index = vui - vum->vhost_user_interfaces; vec_add1 (vhc->rx_queues, iaq); - vlib_node_set_state (vlib_mains ? vlib_mains[cpu_index] : - &vlib_global_main, vhost_user_input_node.index, - VLIB_NODE_STATE_POLLING); + vlib_node_set_state (vlib_mains[cpu_index], + vhost_user_input_node.index, + VLIB_NODE_STATE_POLLING); } }); /* *INDENT-ON* */ diff --git a/src/vpp-api-test.am b/src/vpp-api-test.am index f0d5df62..ceab687c 100644 --- a/src/vpp-api-test.am +++ b/src/vpp-api-test.am @@ -34,14 +34,12 @@ vpp_json_test_SOURCES = \ vat/json_test.c vpp_api_test_LDADD = \ - libvlib.la \ libvlibmemoryclient.la \ libsvm.la \ libvatplugin.la \ libvppinfra.la \ libvlibapi.la \ libvlibmemory.la \ - libvnet.la \ -lpthread -lm -lrt -ldl -lcrypto vpp_api_test_LDFLAGS = -Wl,--export-dynamic diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 828394ed..c85dc680 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -2143,7 +2143,6 @@ vpe_api_init (vlib_main_t * vm) am->oam_events_registration_hash = hash_create (0, sizeof (uword)); am->bfd_events_registration_hash = hash_create (0, sizeof (uword)); - vl_api_init (vm); vl_set_memory_region_name ("/vpe-api"); vl_enable_disable_memory_api (vm, 1 /* enable it */ ); diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c index 610f40ed..277be8c0 100644 --- a/src/vpp/api/gmon.c +++ b/src/vpp/api/gmon.c @@ -122,13 +122,8 @@ gmon_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* Initial wait for the world to settle down */ vlib_process_suspend (vm, 5.0); - if (vec_len (vlib_mains) == 0) - vec_add1 (gm->my_vlib_mains, &vlib_global_main); - else - { - for (i = 0; i < vec_len (vlib_mains); i++) - vec_add1 (gm->my_vlib_mains, vlib_mains[i]); - } + for (i = 0; i < vec_len (vlib_mains); i++) + vec_add1 (gm->my_vlib_mains, vlib_mains[i]); while (1) { -- cgit 1.2.3-korg From a331e6fc45e175aa2a0544150ddae16f82fae833 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Thu, 9 Mar 2017 17:10:25 -0500 Subject: VPP-608: warnings be gone Change-Id: I6d30c6a8d1a425c531e7206e46143d528980c48e Signed-off-by: Dave Barach --- src/vpp/api/api_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/vpp/api') diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index 7913bc01..d3764337 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -47,6 +47,8 @@ api_main_init (vlib_main_t * vm) vam->vlib_main = vm; vam->my_client_index = (u32) ~ 0; + /* Ensure that vam->inbuf is never NULL */ + vec_validate (vam->inbuf, 0); init_error_string_table (vam); rv = vat_plugin_init (vam); if (rv) @@ -82,6 +84,14 @@ api_command_fn (vlib_main_t * vm, vam->vl_input_queue = am->shmem_hdr->vl_input_queue; +#ifdef __COVERITY + /* + * Convince Coverity that it's not a NULL pointer... + * Done once for real below, since we never vec_free(vam->inbuf); + */ + vec_validate (vam->inbuf, 0); +#endif + vec_reset_length (vam->inbuf); vam->input = &_input; -- cgit 1.2.3-korg From 8d55247297a335241097cc503a99854bbc79d4cd Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 9 Mar 2017 20:54:43 +0100 Subject: gmon: fix code commented out by mistake Change-Id: If540787e2443330673ad6721094c4765c32ef59b Signed-off-by: Damjan Marion --- src/vpp/api/gmon.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c index 277be8c0..6b333730 100644 --- a/src/vpp/api/gmon.c +++ b/src/vpp/api/gmon.c @@ -132,8 +132,7 @@ gmon_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) *gm->vector_rate_ptr = vector_rate; now = vlib_time_now (vm); dt = now - last_runtime; - // TODO - //input_packets = vnet_get_aggregate_rx_packets (); + input_packets = vnet_get_aggregate_rx_packets (); *gm->input_rate_ptr = (f64) (input_packets - last_input_packets) / dt; last_runtime = now; last_input_packets = input_packets; -- cgit 1.2.3-korg From b64e4e2af314e1b2bc074b12ede50ad6d96c37c0 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 14 Mar 2017 09:10:56 -0400 Subject: Clean up dead API client reaper callack scheme Change-Id: Iec3df234ca9f717d87787cefc76b73ed9ad42332 Signed-off-by: Dave Barach --- src/vlibapi/api.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/vlibmemory/memory_vlib.c | 22 +++++++++++++++------- src/vpp/api/api.c | 6 ++++-- 3 files changed, 63 insertions(+), 9 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h index 87a56121..a62fa644 100644 --- a/src/vlibapi/api.h +++ b/src/vlibapi/api.h @@ -112,6 +112,14 @@ typedef struct u16 last_msg_id; } vl_api_msg_range_t; +typedef clib_error_t *(vl_msg_api_init_function_t) (u32 client_index); + +typedef struct _vl_msg_api_init_function_list_elt +{ + struct _vl_msg_api_init_function_list_elt *next_init_function; + vl_msg_api_init_function_t *f; +} _vl_msg_api_function_list_elt_t; + typedef struct { void (**msg_handlers) (void *); @@ -192,6 +200,10 @@ typedef struct /* Replay in progress? */ int replay_in_progress; + + /* List of API client reaper functions */ + _vl_msg_api_function_list_elt_t *reaper_function_registrations; + } api_main_t; extern api_main_t api_main; @@ -291,6 +303,38 @@ vlib_node_t **vlib_node_unserialize (u8 * vector); }) +#define _VL_MSG_API_FUNCTION_SYMBOL(x, type) \ + _vl_msg_api_##type##_function_##x + +#define VL_MSG_API_FUNCTION_SYMBOL(x) \ + _VL_MSG_API_FUNCTION_SYMBOL(x, reaper) + +#define VLIB_DECLARE_REAPER_FUNCTION(x, tag) \ +vl_msg_api_init_function_t * _VL_MSG_API_FUNCTION_SYMBOL (x, tag) = x; \ +static void __vl_msg_api_add_##tag##_function_##x (void) \ + __attribute__((__constructor__)) ; \ + \ +static void __vl_msg_api_add_##tag##_function_##x (void) \ +{ \ + api_main_t * am = &api_main; \ + static _vl_msg_api_function_list_elt_t _vl_msg_api_function; \ + _vl_msg_api_function.next_init_function \ + = am->tag##_function_registrations; \ + am->tag##_function_registrations = &_vl_msg_api_function; \ + _vl_msg_api_function.f = &x; \ +} + +#define VL_MSG_API_REAPER_FUNCTION(x) VLIB_DECLARE_REAPER_FUNCTION(x,reaper) + +/* Call reaper function with client index */ +#define vl_msg_api_call_reaper_function(ci) \ + ({ \ + extern vlib_init_function_t * VLIB_INIT_FUNCTION_SYMBOL (reaper); \ + vlib_init_function_t * _f = VLIB_INIT_FUNCTION_SYMBOL (reaper); \ + clib_error_t * _error = 0; \ + _error = _f (ci); \ + }) + #endif /* included_api_h */ /* diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index d2e05968..7a536ee8 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -221,12 +221,20 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) vl_msg_api_send_shmem (q, (u8 *) & rp); } -/* Application callback to clean up leftover registrations from this client */ -int vl_api_memclnt_delete_callback (u32 client_index) __attribute__ ((weak)); - -int -vl_api_memclnt_delete_callback (u32 client_index) +static int +call_reaper_functions (u32 client_index) { + clib_error_t *error = 0; + _vl_msg_api_function_list_elt_t *i; + + i = api_main.reaper_function_registrations; + while (i) + { + error = i->f (client_index); + if (error) + clib_error_report (error); + i = i->next_init_function; + } return 0; } @@ -246,7 +254,7 @@ vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp) handle = mp->index; - if (vl_api_memclnt_delete_callback (handle)) + if (call_reaper_functions (handle)) return; epoch = vl_msg_api_handle_get_epoch (handle); @@ -621,7 +629,7 @@ memclnt_process (vlib_main_t * vm, handle = vl_msg_api_handle_from_index_and_epoch (dead_indices[i], shm->application_restarts); - (void) vl_api_memclnt_delete_callback (handle); + (void) call_reaper_functions (handle); } } diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index c85dc680..673ffe56 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -164,8 +164,8 @@ static int arp_change_delete_callback (u32 pool_index, u8 * notused); static int nd_change_delete_callback (u32 pool_index, u8 * notused); /* Clean up all registrations belonging to the indicated client */ -int -vl_api_memclnt_delete_callback (u32 client_index) +static clib_error_t * +memclnt_delete_callback (u32 client_index) { vpe_api_main_t *vam = &vpe_api_main; vpe_client_registration_t *rp; @@ -186,6 +186,8 @@ vl_api_memclnt_delete_callback (u32 client_index) return 0; } +VL_MSG_API_REAPER_FUNCTION (memclnt_delete_callback); + pub_sub_handler (oam_events, OAM_EVENTS); #define RESOLUTION_EVENT 1 -- cgit 1.2.3-korg From 3d6b2b5649fc2858c6ebc433a79c45d9bff3175e Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 14 Mar 2017 09:44:11 -0400 Subject: Try again: __COVERITY__ vs __COVERITY Hate it when that happens... Change-Id: I725c8539d6349755e9d75ae05f2703c83ea704be Signed-off-by: Dave Barach --- src/vpp/api/api_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/vpp/api') diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index d3764337..a59524cf 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -84,7 +84,7 @@ api_command_fn (vlib_main_t * vm, vam->vl_input_queue = am->shmem_hdr->vl_input_queue; -#ifdef __COVERITY +#ifdef __COVERITY__ /* * Convince Coverity that it's not a NULL pointer... * Done once for real below, since we never vec_free(vam->inbuf); -- cgit 1.2.3-korg From 1c82cd4f491ff83127fbacfb6b09b9492eff1b62 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Tue, 14 Mar 2017 14:39:51 +0200 Subject: API:support hidden sw interfaces validate interfaces - added check for hidden interfaces interface dump - dont send hidden interfaces set_unnumbered - test for hidden vl_api_create_vlan_subif_t_handler, vl_api_create_subif_t_handler - fixed potential memory leak some other minor refactors to make code clearer and shorter Change-Id: Icce6b724336b7d1536fbd07a74bf7abe4916d2c0 Signed-off-by: Eyal Bari --- src/vlibapi/api_helper_macros.h | 18 ++++++------ src/vnet/interface_api.c | 65 +++++++++++++++++------------------------ src/vpp/api/api.c | 63 +++++++++++++++------------------------ 3 files changed, 60 insertions(+), 86 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vlibapi/api_helper_macros.h b/src/vlibapi/api_helper_macros.h index 7f94e446..4e281342 100644 --- a/src/vlibapi/api_helper_macros.h +++ b/src/vlibapi/api_helper_macros.h @@ -103,11 +103,15 @@ do { \ /* "trust, but verify" */ +static inline uword +vnet_sw_if_index_is_api_valid (u32 sw_if_index) +{ + return vnet_sw_interface_is_api_valid (vnet_get_main (), sw_if_index); +} + #define VALIDATE_SW_IF_INDEX(mp) \ do { u32 __sw_if_index = ntohl(mp->sw_if_index); \ - vnet_main_t *__vnm = vnet_get_main(); \ - if (pool_is_free_index(__vnm->interface_main.sw_interfaces, \ - __sw_if_index)) { \ + if (!vnet_sw_if_index_is_api_valid(__sw_if_index)) { \ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \ goto bad_sw_if_index; \ } \ @@ -121,9 +125,7 @@ bad_sw_if_index: \ #define VALIDATE_RX_SW_IF_INDEX(mp) \ do { u32 __rx_sw_if_index = ntohl(mp->rx_sw_if_index); \ - vnet_main_t *__vnm = vnet_get_main(); \ - if (pool_is_free_index(__vnm->interface_main.sw_interfaces, \ - __rx_sw_if_index)) { \ + if (!vnet_sw_if_index_is_api_valid(__rx_sw_if_index)) { \ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \ goto bad_rx_sw_if_index; \ } \ @@ -137,9 +139,7 @@ bad_rx_sw_if_index: \ #define VALIDATE_TX_SW_IF_INDEX(mp) \ do { u32 __tx_sw_if_index = ntohl(mp->tx_sw_if_index); \ - vnet_main_t *__vnm = vnet_get_main(); \ - if (pool_is_free_index(__vnm->interface_main.sw_interfaces, \ - __tx_sw_if_index)) { \ + if (!vnet_sw_if_index_is_api_valid(__tx_sw_if_index)) { \ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \ goto bad_tx_sw_if_index; \ } \ diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 2b6ff0c5..28b09b55 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -133,14 +133,10 @@ send_sw_interface_details (vpe_api_main_t * am, vnet_sw_interface_t * swif, u8 * interface_name, u32 context) { - vl_api_sw_interface_details_t *mp; - vnet_main_t *vnm = vnet_get_main (); - vnet_hw_interface_t *hi; - u8 *tag; + vnet_hw_interface_t *hi = + vnet_get_sup_hw_interface (am->vnet_main, swif->sw_if_index); - hi = vnet_get_sup_hw_interface (am->vnet_main, swif->sw_if_index); - - mp = vl_msg_api_alloc (sizeof (*mp)); + vl_api_sw_interface_details_t *mp = vl_msg_api_alloc (sizeof (*mp)); memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_DETAILS); mp->sw_if_index = ntohl (swif->sw_if_index); @@ -224,7 +220,7 @@ send_sw_interface_details (vpe_api_main_t * am, mp->i_sid = i_sid; } - tag = vnet_get_sw_interface_tag (vnm, swif->sw_if_index); + u8 *tag = vnet_get_sw_interface_tag (vnet_get_main (), swif->sw_if_index); if (tag) strncpy ((char *) mp->tag, (char *) tag, ARRAY_LEN (mp->tag) - 1); @@ -237,39 +233,38 @@ vl_api_sw_interface_dump_t_handler (vl_api_sw_interface_dump_t * mp) vpe_api_main_t *am = &vpe_api_main; vnet_sw_interface_t *swif; vnet_interface_main_t *im = &am->vnet_main->interface_main; - u8 *filter_string = 0, *name_string = 0; - unix_shared_memory_queue_t *q; - char *strcasestr (char *, char *); /* lnx hdr file botch */ - - q = vl_api_client_index_to_input_queue (mp->client_index); + unix_shared_memory_queue_t *q = + vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) return; + u8 *filter = 0, *name = 0; if (mp->name_filter_valid) { mp->name_filter[ARRAY_LEN (mp->name_filter) - 1] = 0; - filter_string = format (0, "%s%c", mp->name_filter, 0); + filter = format (0, "%s%c", mp->name_filter, 0); } + char *strcasestr (char *, char *); /* lnx hdr file botch */ /* *INDENT-OFF* */ pool_foreach (swif, im->sw_interfaces, ({ - name_string = format (name_string, "%U%c", - format_vnet_sw_interface_name, - am->vnet_main, swif, 0); + if (!vnet_swif_is_api_visible (swif)) + continue; + vec_reset_length(name); + name = format (name, "%U%c", format_vnet_sw_interface_name, am->vnet_main, + swif, 0); - if (mp->name_filter_valid == 0 || - strcasestr((char *) name_string, (char *) filter_string)) { + if (filter && !strcasestr((char *) name, (char *) filter)) + continue; - send_sw_interface_details (am, q, swif, name_string, mp->context); - } - _vec_len (name_string) = 0; + send_sw_interface_details (am, q, swif, name, mp->context); })); /* *INDENT-ON* */ - vec_free (name_string); - vec_free (filter_string); + vec_free (name); + vec_free (filter); } static void @@ -435,49 +430,43 @@ static void vl_api_sw_interface_set_unnumbered_t_handler { vl_api_sw_interface_set_unnumbered_reply_t *rmp; int rv = 0; - vnet_sw_interface_t *si; vnet_main_t *vnm = vnet_get_main (); - u32 sw_if_index, unnumbered_sw_if_index; - - sw_if_index = ntohl (mp->sw_if_index); - unnumbered_sw_if_index = ntohl (mp->unnumbered_sw_if_index); + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 unnumbered_sw_if_index = ntohl (mp->unnumbered_sw_if_index); /* * The API message field names are backwards from * the underlying data structure names. * It's not worth changing them now. */ - if (pool_is_free_index (vnm->interface_main.sw_interfaces, - unnumbered_sw_if_index)) + if (!vnet_sw_interface_is_api_valid (vnm, unnumbered_sw_if_index)) { rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; goto done; } /* Only check the "use loop0" field when setting the binding */ - if (mp->is_add && - pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + if (mp->is_add && !vnet_sw_interface_is_api_valid (vnm, sw_if_index)) { rv = VNET_API_ERROR_INVALID_SW_IF_INDEX_2; goto done; } - si = vnet_get_sw_interface (vnm, unnumbered_sw_if_index); + vnet_sw_interface_t *si = + vnet_get_sw_interface (vnm, unnumbered_sw_if_index); if (mp->is_add) { si->flags |= VNET_SW_INTERFACE_FLAG_UNNUMBERED; si->unnumbered_sw_if_index = sw_if_index; - ip4_sw_interface_enable_disable (unnumbered_sw_if_index, 1); - ip6_sw_interface_enable_disable (unnumbered_sw_if_index, 1); } else { si->flags &= ~(VNET_SW_INTERFACE_FLAG_UNNUMBERED); si->unnumbered_sw_if_index = (u32) ~ 0; - ip4_sw_interface_enable_disable (unnumbered_sw_if_index, 0); - ip6_sw_interface_enable_disable (unnumbered_sw_if_index, 0); } + ip4_sw_interface_enable_disable (unnumbered_sw_if_index, mp->is_add); + ip6_sw_interface_enable_disable (unnumbered_sw_if_index, mp->is_add); done: REPLY_MACRO (VL_API_SW_INTERFACE_SET_UNNUMBERED_REPLY); diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 673ffe56..d8301fa6 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -477,7 +477,6 @@ vl_api_create_vlan_subif_t_handler (vl_api_create_vlan_subif_t * mp) uword *p; vnet_interface_main_t *im = &vnm->interface_main; u64 sup_and_sub_key; - u64 *kp; unix_shared_memory_queue_t *q; clib_error_t *error; @@ -507,9 +506,6 @@ vl_api_create_vlan_subif_t_handler (vl_api_create_vlan_subif_t * mp) goto out; } - kp = clib_mem_alloc (sizeof (*kp)); - *kp = sup_and_sub_key; - memset (&template, 0, sizeof (template)); template.type = VNET_SW_INTERFACE_TYPE_SUB; template.sup_sw_if_index = hi->sw_if_index; @@ -526,6 +522,10 @@ vl_api_create_vlan_subif_t_handler (vl_api_create_vlan_subif_t * mp) rv = VNET_API_ERROR_INVALID_REGISTRATION; goto out; } + + u64 *kp = clib_mem_alloc (sizeof (*kp)); + *kp = sup_and_sub_key; + hash_set (hi->sub_interface_sw_if_index_by_id, id, sw_if_index); hash_set_mem (im->sw_if_index_by_sup_and_sub, kp, sw_if_index); @@ -537,10 +537,10 @@ out: return; rmp = vl_msg_api_alloc (sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_CREATE_VLAN_SUBIF_REPLY); + rmp->_vl_msg_id = htons (VL_API_CREATE_VLAN_SUBIF_REPLY); rmp->context = mp->context; - rmp->retval = ntohl (rv); - rmp->sw_if_index = ntohl (sw_if_index); + rmp->retval = htonl (rv); + rmp->sw_if_index = htonl (sw_if_index); vl_msg_api_send_shmem (q, (u8 *) & rmp); } @@ -558,7 +558,6 @@ vl_api_create_subif_t_handler (vl_api_create_subif_t * mp) uword *p; vnet_interface_main_t *im = &vnm->interface_main; u64 sup_and_sub_key; - u64 *kp; clib_error_t *error; VALIDATE_SW_IF_INDEX (mp); @@ -587,9 +586,6 @@ vl_api_create_subif_t_handler (vl_api_create_subif_t * mp) goto out; } - kp = clib_mem_alloc (sizeof (*kp)); - *kp = sup_and_sub_key; - memset (&template, 0, sizeof (template)); template.type = VNET_SW_INTERFACE_TYPE_SUB; template.sup_sw_if_index = sw_if_index; @@ -613,6 +609,9 @@ vl_api_create_subif_t_handler (vl_api_create_subif_t * mp) goto out; } + u64 *kp = clib_mem_alloc (sizeof (*kp)); + *kp = sup_and_sub_key; + hash_set (hi->sub_interface_sw_if_index_by_id, sub_id, sw_if_index); hash_set_mem (im->sw_if_index_by_sup_and_sub, kp, sw_if_index); @@ -669,20 +668,11 @@ static void int rv = 0; vnet_main_t *vnm = vnet_get_main (); vl_api_proxy_arp_intfc_enable_disable_reply_t *rmp; - vnet_sw_interface_t *si; - u32 sw_if_index; VALIDATE_SW_IF_INDEX (mp); - sw_if_index = ntohl (mp->sw_if_index); - - if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) - { - rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; - goto out; - } - - si = vnet_get_sw_interface (vnm, sw_if_index); + vnet_sw_interface_t *si = + vnet_get_sw_interface (vnm, ntohl (mp->sw_if_index)); ASSERT (si); @@ -1223,13 +1213,12 @@ static void vl_api_classify_set_interface_ip_table_t_handler vlib_main_t *vm = vlib_get_main (); vl_api_classify_set_interface_ip_table_reply_t *rmp; int rv; - u32 table_index, sw_if_index; - - table_index = ntohl (mp->table_index); - sw_if_index = ntohl (mp->sw_if_index); VALIDATE_SW_IF_INDEX (mp); + u32 table_index = ntohl (mp->table_index); + u32 sw_if_index = ntohl (mp->sw_if_index); + if (mp->is_ipv6) rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index); else @@ -1658,15 +1647,14 @@ static void vl_api_input_acl_set_interface_t_handler vlib_main_t *vm = vlib_get_main (); vl_api_input_acl_set_interface_reply_t *rmp; int rv; - u32 sw_if_index, ip4_table_index, ip6_table_index, l2_table_index; - - ip4_table_index = ntohl (mp->ip4_table_index); - ip6_table_index = ntohl (mp->ip6_table_index); - l2_table_index = ntohl (mp->l2_table_index); - sw_if_index = ntohl (mp->sw_if_index); VALIDATE_SW_IF_INDEX (mp); + u32 ip4_table_index = ntohl (mp->ip4_table_index); + u32 ip6_table_index = ntohl (mp->ip6_table_index); + u32 l2_table_index = ntohl (mp->l2_table_index); + u32 sw_if_index = ntohl (mp->sw_if_index); + rv = vnet_set_input_acl_intfc (vm, sw_if_index, ip4_table_index, ip6_table_index, l2_table_index, mp->is_add); @@ -2013,25 +2001,22 @@ vl_api_feature_enable_disable_t_handler (vl_api_feature_enable_disable_t * mp) { vl_api_feature_enable_disable_reply_t *rmp; int rv = 0; - u8 *arc_name, *feature_name; VALIDATE_SW_IF_INDEX (mp); - arc_name = format (0, "%s%c", mp->arc_name, 0); - feature_name = format (0, "%s%c", mp->feature_name, 0); + u8 *arc_name = format (0, "%s%c", mp->arc_name, 0); + u8 *feature_name = format (0, "%s%c", mp->feature_name, 0); - vnet_feature_registration_t *reg; - reg = + vnet_feature_registration_t *reg = vnet_get_feature_reg ((const char *) arc_name, (const char *) feature_name); if (reg == 0) rv = VNET_API_ERROR_INVALID_VALUE; else { - u32 sw_if_index; + u32 sw_if_index = ntohl (mp->sw_if_index); clib_error_t *error = 0; - sw_if_index = ntohl (mp->sw_if_index); if (reg->enable_disable_cb) error = reg->enable_disable_cb (sw_if_index, mp->enable); if (!error) -- cgit 1.2.3-korg From e101e1f52f1da71575588fac30b984a0e94fb5a7 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Wed, 15 Mar 2017 08:23:42 +0200 Subject: VXLAN:add hidden multicast interface check and some refactoring Change-Id: I99e3c5e782ce65cb9779ccc3a9a3151ef1429e07 Signed-off-by: Eyal Bari --- src/vat/api_format.c | 6 ++---- src/vnet/ip/ip6_packet.h | 14 ++++++++------ src/vnet/vxlan/vxlan_api.c | 40 +++++++++++++++++----------------------- src/vpp/api/custom_dump.c | 6 ++---- 4 files changed, 29 insertions(+), 37 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index e6e4acd9..391fe9cf 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -10671,10 +10671,8 @@ static void vl_api_vxlan_tunnel_details_t_handler (vl_api_vxlan_tunnel_details_t * mp) { vat_main_t *vam = &vat_main; - ip46_address_t src, dst; - - ip46_from_addr_buf (mp->is_ipv6, mp->src_address, &src); - ip46_from_addr_buf (mp->is_ipv6, mp->dst_address, &dst); + ip46_address_t src = to_ip46 (mp->is_ipv6, mp->dst_address); + ip46_address_t dst = to_ip46 (mp->is_ipv6, mp->src_address); print (vam->ofp, "%11d%24U%24U%14d%18d%13d%19d", ntohl (mp->sw_if_index), diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index bb7d7039..cdd7eed5 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -82,21 +82,23 @@ typedef CLIB_PACKED (union { #define ip46_address_is_equal(a1, a2) (((a1)->as_u64[0] == (a2)->as_u64[0]) \ && ((a1)->as_u64[1] == (a2)->as_u64[1])) -always_inline void -ip46_from_addr_buf (u32 is_ipv6, u8 * buf, ip46_address_t * ip) +always_inline ip46_address_t +to_ip46 (u32 is_ipv6, u8 * buf) { + ip46_address_t ip; if (is_ipv6) - ip->ip6 = *((ip6_address_t *) buf); + ip.ip6 = *((ip6_address_t *) buf); else - ip46_address_set_ip4 (ip, (ip4_address_t *) buf); + ip46_address_set_ip4 (&ip, (ip4_address_t *) buf); + return ip; } + always_inline void ip6_addr_fib_init (ip6_address_fib_t * addr_fib, ip6_address_t * address, u32 fib_index) { - addr_fib->ip6_addr.as_u64[0] = address->as_u64[0]; - addr_fib->ip6_addr.as_u64[1] = address->as_u64[1]; + addr_fib->ip6_addr = *address; addr_fib->fib_index = fib_index; } diff --git a/src/vnet/vxlan/vxlan_api.c b/src/vnet/vxlan/vxlan_api.c index c726c7c5..a2d41232 100644 --- a/src/vnet/vxlan/vxlan_api.c +++ b/src/vnet/vxlan/vxlan_api.c @@ -70,47 +70,41 @@ static void vl_api_vxlan_add_del_tunnel_t_handler { vl_api_vxlan_add_del_tunnel_reply_t *rmp; int rv = 0; - vnet_vxlan_add_del_tunnel_args_t _a, *a = &_a; - u32 encap_fib_index; - uword *p; ip4_main_t *im = &ip4_main; - vnet_main_t *vnm = vnet_get_main (); - u32 sw_if_index = ~0; - p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id)); + uword *p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id)); if (!p) { rv = VNET_API_ERROR_NO_SUCH_FIB; goto out; } - encap_fib_index = p[0]; - memset (a, 0, sizeof (*a)); - - a->is_add = mp->is_add; - a->is_ip6 = mp->is_ipv6; - /* ip addresses sent in network byte order */ - ip46_from_addr_buf (mp->is_ipv6, mp->dst_address, &a->dst); - ip46_from_addr_buf (mp->is_ipv6, mp->src_address, &a->src); + vnet_vxlan_add_del_tunnel_args_t a = { + .is_add = mp->is_add, + .is_ip6 = mp->is_ipv6, + .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index), + .encap_fib_index = p[0], + .decap_next_index = ntohl (mp->decap_next_index), + .vni = ntohl (mp->vni), + .dst = to_ip46 (mp->is_ipv6, mp->dst_address), + .src = to_ip46 (mp->is_ipv6, mp->src_address), + }; /* Check src & dst are different */ - if (ip46_address_cmp (&a->dst, &a->src) == 0) + if (ip46_address_cmp (&a.dst, &a.src) == 0) { rv = VNET_API_ERROR_SAME_SRC_DST; goto out; } - a->mcast_sw_if_index = ntohl (mp->mcast_sw_if_index); - if (ip46_address_is_multicast (&a->dst) && - pool_is_free_index (vnm->interface_main.sw_interfaces, - a->mcast_sw_if_index)) + if (ip46_address_is_multicast (&a.dst) && + !vnet_sw_if_index_is_api_valid (a.mcast_sw_if_index)) { rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; goto out; } - a->encap_fib_index = encap_fib_index; - a->decap_next_index = ntohl (mp->decap_next_index); - a->vni = ntohl (mp->vni); - rv = vnet_vxlan_add_del_tunnel (a, &sw_if_index); + + u32 sw_if_index = ~0; + rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index); out: /* *INDENT-OFF* */ diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 648ac5f3..a76840a2 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -1384,10 +1384,8 @@ static void *vl_api_vxlan_add_del_tunnel_t_print u8 *s; s = format (0, "SCRIPT: vxlan_add_del_tunnel "); - ip46_address_t src, dst; - - ip46_from_addr_buf (mp->is_ipv6, mp->dst_address, &dst); - ip46_from_addr_buf (mp->is_ipv6, mp->src_address, &src); + ip46_address_t src = to_ip46 (mp->is_ipv6, mp->dst_address); + ip46_address_t dst = to_ip46 (mp->is_ipv6, mp->src_address); u8 is_grp = ip46_address_is_multicast (&dst); char *dst_name = is_grp ? "group" : "dst"; -- cgit 1.2.3-korg From 7c0858e2a40a5e496ed39a815566ff47aa2fc2be Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Wed, 15 Mar 2017 09:52:19 -0400 Subject: Fix binary-api cmd/arg split logic Change-Id: If3dbce830684b5eab8944519424074b03cc7d703 Signed-off-by: Dave Barach --- src/vpp/api/api_main.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index a59524cf..d48e4eff 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -84,25 +84,20 @@ api_command_fn (vlib_main_t * vm, vam->vl_input_queue = am->shmem_hdr->vl_input_queue; -#ifdef __COVERITY__ - /* - * Convince Coverity that it's not a NULL pointer... - * Done once for real below, since we never vec_free(vam->inbuf); - */ - vec_validate (vam->inbuf, 0); -#endif + /* vec_validated in the init routine */ + _vec_len (vam->inbuf) = 0; - vec_reset_length (vam->inbuf); vam->input = &_input; while (((c = unformat_get_input (input)) != '\n') && (c != UNFORMAT_END_OF_INPUT)) vec_add1 (vam->inbuf, c); - /* Add 1 octet's worth of extra space in case there are no args... */ + /* Null-terminate the command */ vec_add1 (vam->inbuf, 0); - /*$$$$ reinstall macro evaluator */ + /* In case no args given */ + vec_add1 (vam->inbuf, 0); /* Split input into cmd + args */ this_cmd = cmdp = vam->inbuf; @@ -123,7 +118,7 @@ api_command_fn (vlib_main_t * vm, /* Advance past the command */ while (argsp < (this_cmd + vec_len (this_cmd))) { - if (*argsp != ' ' && *argsp != '\t' && *argsp != '\n' && argsp != 0) + if (*argsp != ' ' && *argsp != '\t' && *argsp != '\n' && *argsp != 0) { argsp++; } @@ -133,15 +128,19 @@ api_command_fn (vlib_main_t * vm, /* NULL terminate the command */ *argsp++ = 0; - while (argsp < (this_cmd + vec_len (this_cmd))) - { - if (*argsp == ' ' || *argsp == '\t' || *argsp == '\n') - { - argsp++; - } - else - break; - } + /* No arguments? Ensure that argsp points to a proper (empty) string */ + if (argsp == (this_cmd + vec_len (this_cmd) - 1)) + argsp[0] = 0; + else + while (argsp < (this_cmd + vec_len (this_cmd))) + { + if (*argsp == ' ' || *argsp == '\t' || *argsp == '\n') + { + argsp++; + } + else + break; + } /* Blank input line? */ if (*cmdp == 0) -- cgit 1.2.3-korg From 1bd01099a6512b6119bbf337b36222a6f0770d49 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 15 Mar 2017 15:41:17 -0400 Subject: 64 bit per-thread counters after: TenGigabitEthernet5/0/1-output active 107522 17375708 0 7.22e0 161.60 TenGigabitEthernet5/0/1-tx active 107522 17375708 0 6.93e1 161.60 ip4-input-no-checksum active 107522 17375708 0 2.52e1 161.60 ip4-lookup active 107522 17375708 0 3.10e1 161.60 ip4-rewrite active 107522 17375708 0 2.52e1 161.60 before TenGigabitEthernet5/0/1-output active 433575 110995200 0 6.95e0 256.00 TenGigabitEthernet5/0/1-tx active 433575 110995200 0 7.14e1 256.00 ip4-input-no-checksum active 433575 110995200 0 2.66e1 256.00 ip4-lookup active 433575 110995200 0 3.29e1 256.00 ip4-rewrite active 433575 110995200 0 2.59e1 256.00 Change-Id: I46405bd22189f48a39f06e3443bb7e13f410b539 Signed-off-by: Neale Ranns --- src/vlib/counter.c | 66 +++++++-------- src/vlib/counter.h | 207 ++++++++++++++++------------------------------ src/vnet/ip/ip4_forward.c | 36 ++++---- src/vnet/map/map.c | 2 +- src/vnet/map/map_api.c | 2 +- src/vnet/rewrite.c | 2 +- src/vpp/api/api.c | 2 +- src/vpp/stats/stats.c | 16 ++-- 8 files changed, 132 insertions(+), 201 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vlib/counter.c b/src/vlib/counter.c index 9f66e04d..62f4bd66 100644 --- a/src/vlib/counter.c +++ b/src/vlib/counter.c @@ -42,56 +42,36 @@ void vlib_clear_simple_counters (vlib_simple_counter_main_t * cm) { + counter_t *my_counters; uword i, j; - u16 *my_minis; - for (i = 0; i < vec_len (cm->minis); i++) + for (i = 0; i < vec_len (cm->counters); i++) { - my_minis = cm->minis[i]; + my_counters = cm->counters[i]; - for (j = 0; j < vec_len (my_minis); j++) + for (j = 0; j < vec_len (my_counters); j++) { - cm->maxi[j] += my_minis[j]; - my_minis[j] = 0; + my_counters[j] = 0; } } - - j = vec_len (cm->maxi); - if (j > 0) - vec_validate (cm->value_at_last_clear, j - 1); - for (i = 0; i < j; i++) - cm->value_at_last_clear[i] = cm->maxi[i]; } void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm) { + vlib_counter_t *my_counters; uword i, j; - vlib_mini_counter_t *my_minis; - for (i = 0; i < vec_len (cm->minis); i++) + for (i = 0; i < vec_len (cm->counters); i++) { - my_minis = cm->minis[i]; + my_counters = cm->counters[i]; - for (j = 0; j < vec_len (my_minis); j++) + for (j = 0; j < vec_len (my_counters); j++) { - cm->maxi[j].packets += my_minis[j].packets; - cm->maxi[j].bytes += my_minis[j].bytes; - my_minis[j].packets = 0; - my_minis[j].bytes = 0; + my_counters[j].packets = 0; + my_counters[j].bytes = 0; } } - - j = vec_len (cm->maxi); - if (j > 0) - vec_validate (cm->value_at_last_clear, j - 1); - - for (i = 0; i < j; i++) - { - vlib_counter_t *c = vec_elt_at_index (cm->value_at_last_clear, i); - - c[0] = cm->maxi[i]; - } } void @@ -100,10 +80,9 @@ vlib_validate_simple_counter (vlib_simple_counter_main_t * cm, u32 index) vlib_thread_main_t *tm = vlib_get_thread_main (); int i; - vec_validate (cm->minis, tm->n_vlib_mains - 1); + vec_validate (cm->counters, tm->n_vlib_mains - 1); for (i = 0; i < tm->n_vlib_mains; i++) - vec_validate_aligned (cm->minis[i], index, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (cm->maxi, index, CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (cm->counters[i], index, CLIB_CACHE_LINE_BYTES); } void @@ -112,10 +91,23 @@ vlib_validate_combined_counter (vlib_combined_counter_main_t * cm, u32 index) vlib_thread_main_t *tm = vlib_get_thread_main (); int i; - vec_validate (cm->minis, tm->n_vlib_mains - 1); + vec_validate (cm->counters, tm->n_vlib_mains - 1); for (i = 0; i < tm->n_vlib_mains; i++) - vec_validate_aligned (cm->minis[i], index, CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (cm->maxi, index, CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (cm->counters[i], index, CLIB_CACHE_LINE_BYTES); +} + +u32 +vlib_combined_counter_n_counters (const vlib_combined_counter_main_t * cm) +{ + ASSERT (cm->counters); + return (vec_len (cm->counters[0])); +} + +u32 +vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm) +{ + ASSERT (cm->counters); + return (vec_len (cm->counters[0])); } void diff --git a/src/vlib/counter.h b/src/vlib/counter.h index abfa89ee..17a85217 100644 --- a/src/vlib/counter.h +++ b/src/vlib/counter.h @@ -44,59 +44,48 @@ Optimized thread-safe counters. - Each vlib_[simple|combined]_counter_main_t consists of a single - vector of thread-safe / atomically-updated u64 counters [the - "maxi" vector], and a (u16 **) per-thread vector [the "minis" - vector] of narrow, per-thread counters. - - The idea is to drastically reduce the number of atomic operations. - In the case of packet counts, we divide the number of atomic ops - by 2**16, etc. + Each vlib_[simple|combined]_counter_main_t consists of a per-thread + vector of per-object counters. + + The idea is to drastically eliminate atomic operations. */ +/** 64bit counters */ +typedef u64 counter_t; + /** A collection of simple counters */ typedef struct { - u16 **minis; /**< Per-thread u16 non-atomic counters */ - u64 *maxi; /**< Shared wide counters */ - u64 *value_at_last_clear; /**< Counter values as of last clear. */ - u64 *value_at_last_serialize; /**< Values as of last serialize. */ + counter_t **counters; /**< Per-thread u64 non-atomic counters */ + counter_t *value_at_last_serialize; /**< Values as of last serialize. */ u32 last_incremental_serialize_index; /**< Last counter index serialized incrementally. */ char *name; /**< The counter collection's name. */ } vlib_simple_counter_main_t; +/** The number of counters (not the number of per-thread counters) */ +u32 vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm); + /** Increment a simple counter @param cm - (vlib_simple_counter_main_t *) simple counter main pointer @param cpu_index - (u32) the current cpu index @param index - (u32) index of the counter to increment - @param increment - (u32) quantitiy to add to the counter + @param increment - (u64) quantitiy to add to the counter */ always_inline void vlib_increment_simple_counter (vlib_simple_counter_main_t * cm, - u32 cpu_index, u32 index, u32 increment) + u32 cpu_index, u32 index, u64 increment) { - u16 *my_minis; - u16 *mini; - u32 old, new; - - my_minis = cm->minis[cpu_index]; - mini = vec_elt_at_index (my_minis, index); - old = mini[0]; - new = old + increment; - mini[0] = new; + counter_t *my_counters; - if (PREDICT_FALSE (mini[0] != new)) - { - __sync_fetch_and_add (&cm->maxi[index], new); - my_minis[index] = 0; - } + my_counters = cm->counters[cpu_index]; + my_counters[index] += increment; } /** Get the value of a simple counter - Scrapes the entire set of mini counters. Innacurate unless + Scrapes the entire set of per-thread counters. Innacurate unless worker threads which might increment the counter are barrier-synchronized @@ -104,30 +93,21 @@ vlib_increment_simple_counter (vlib_simple_counter_main_t * cm, @param index - (u32) index of the counter to fetch @returns - (u64) current counter value */ -always_inline u64 +always_inline counter_t vlib_get_simple_counter (vlib_simple_counter_main_t * cm, u32 index) { - u16 *my_minis, *mini; - u64 v; + counter_t *my_counters; + counter_t v; int i; - ASSERT (index < vec_len (cm->maxi)); + ASSERT (index < vlib_simple_counter_n_counters (cm)); v = 0; - for (i = 0; i < vec_len (cm->minis); i++) + for (i = 0; i < vec_len (cm->counters); i++) { - my_minis = cm->minis[i]; - mini = vec_elt_at_index (my_minis, index); - v += mini[0]; - } - - v += cm->maxi[index]; - - if (index < vec_len (cm->value_at_last_clear)) - { - ASSERT (v >= cm->value_at_last_clear[index]); - v -= cm->value_at_last_clear[index]; + my_counters = cm->counters[i]; + v += my_counters[index]; } return v; @@ -142,29 +122,24 @@ vlib_get_simple_counter (vlib_simple_counter_main_t * cm, u32 index) always_inline void vlib_zero_simple_counter (vlib_simple_counter_main_t * cm, u32 index) { - u16 *my_minis; + counter_t *my_counters; int i; - ASSERT (index < vec_len (cm->maxi)); + ASSERT (index < vlib_simple_counter_n_counters (cm)); - for (i = 0; i < vec_len (cm->minis); i++) + for (i = 0; i < vec_len (cm->counters); i++) { - my_minis = cm->minis[i]; - my_minis[index] = 0; + my_counters = cm->counters[i]; + my_counters[index] = 0; } - - cm->maxi[index] = 0; - - if (index < vec_len (cm->value_at_last_clear)) - cm->value_at_last_clear[index] = 0; } /** Combined counter to hold both packets and byte differences. */ typedef struct { - u64 packets; /**< packet counter */ - u64 bytes; /**< byte counter */ + counter_t packets; /**< packet counter */ + counter_t bytes; /**< byte counter */ } vlib_counter_t; /** Add two combined counters, results in the first counter @@ -201,24 +176,19 @@ vlib_counter_zero (vlib_counter_t * a) a->packets = a->bytes = 0; } -/** Mini combined counter */ -typedef struct -{ - u16 packets; /**< Packet count */ - i16 bytes; /**< Byte count */ -} vlib_mini_counter_t; - /** A collection of combined counters */ typedef struct { - vlib_mini_counter_t **minis; /**< Per-thread u16 non-atomic counter pairs */ - vlib_counter_t *maxi; /**< Shared wide counter pairs */ - vlib_counter_t *value_at_last_clear; /**< Counter values as of last clear. */ + vlib_counter_t **counters; /**< Per-thread u64 non-atomic counter pairs */ vlib_counter_t *value_at_last_serialize; /**< Counter values as of last serialize. */ u32 last_incremental_serialize_index; /**< Last counter index serialized incrementally. */ char *name; /**< The counter collection's name. */ } vlib_combined_counter_main_t; +/** The number of counters (not the number of per-thread counters) */ +u32 vlib_combined_counter_n_counters (const vlib_combined_counter_main_t * + cm); + /** Clear a collection of simple counters @param cm - (vlib_simple_counter_main_t *) collection to clear */ @@ -233,62 +203,41 @@ void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm); @param cm - (vlib_combined_counter_main_t *) comined counter main pointer @param cpu_index - (u32) the current cpu index @param index - (u32) index of the counter to increment - @param packet_increment - (u32) number of packets to add to the counter - @param byte_increment - (u32) number of bytes to add to the counter + @param packet_increment - (u64) number of packets to add to the counter + @param byte_increment - (u64) number of bytes to add to the counter */ always_inline void vlib_increment_combined_counter (vlib_combined_counter_main_t * cm, u32 cpu_index, - u32 index, - u32 packet_increment, u32 byte_increment) + u32 index, u64 n_packets, u64 n_bytes) { - vlib_mini_counter_t *my_minis, *mini; - u32 old_packets, new_packets; - i32 old_bytes, new_bytes; - - /* Use this CPU's mini counter array */ - my_minis = cm->minis[cpu_index]; - - mini = vec_elt_at_index (my_minis, index); - old_packets = mini->packets; - old_bytes = mini->bytes; - - new_packets = old_packets + packet_increment; - new_bytes = old_bytes + byte_increment; - - mini->packets = new_packets; - mini->bytes = new_bytes; - - /* Bytes always overflow before packets.. */ - if (PREDICT_FALSE (mini->bytes != new_bytes)) - { - vlib_counter_t *maxi = vec_elt_at_index (cm->maxi, index); + vlib_counter_t *my_counters; - __sync_fetch_and_add (&maxi->packets, new_packets); - __sync_fetch_and_add (&maxi->bytes, new_bytes); + /* Use this CPU's counter array */ + my_counters = cm->counters[cpu_index]; - mini->packets = 0; - mini->bytes = 0; - } + my_counters[index].packets += n_packets; + my_counters[index].bytes += n_bytes; } -#define vlib_prefetch_combined_counter(_cm, _cpu_index, _index) \ -{ \ - vlib_mini_counter_t *_cpu_minis; \ - \ - /* \ - * This CPU's mini index is assumed to already be in cache \ - */ \ - _cpu_minis = (_cm)->minis[(_cpu_index)]; \ - CLIB_PREFETCH(_cpu_minis + (_index), \ - sizeof(*_cpu_minis), \ - STORE); \ +/** Pre-fetch a per-thread combined counter for the given object index */ +always_inline void +vlib_prefetch_combined_counter (const vlib_combined_counter_main_t * cm, + u32 cpu_index, u32 index) +{ + vlib_counter_t *cpu_counters; + + /* + * This CPU's index is assumed to already be in cache + */ + cpu_counters = cm->counters[cpu_index]; + CLIB_PREFETCH (cpu_counters + index, CLIB_CACHE_LINE_BYTES, STORE); } /** Get the value of a combined counter, never called in the speed path - Scrapes the entire set of mini counters. Innacurate unless + Scrapes the entire set of per-thread counters. Innacurate unless worker threads which might increment the counter are barrier-synchronized @@ -298,35 +247,27 @@ vlib_increment_combined_counter (vlib_combined_counter_main_t * cm, */ static inline void -vlib_get_combined_counter (vlib_combined_counter_main_t * cm, +vlib_get_combined_counter (const vlib_combined_counter_main_t * cm, u32 index, vlib_counter_t * result) { - vlib_mini_counter_t *my_minis, *mini; - vlib_counter_t *maxi; + vlib_counter_t *my_counters, *counter; int i; result->packets = 0; result->bytes = 0; - for (i = 0; i < vec_len (cm->minis); i++) + for (i = 0; i < vec_len (cm->counters); i++) { - my_minis = cm->minis[i]; + my_counters = cm->counters[i]; - mini = vec_elt_at_index (my_minis, index); - result->packets += mini->packets; - result->bytes += mini->bytes; + counter = vec_elt_at_index (my_counters, index); + result->packets += counter->packets; + result->bytes += counter->bytes; } - - maxi = vec_elt_at_index (cm->maxi, index); - result->packets += maxi->packets; - result->bytes += maxi->bytes; - - if (index < vec_len (cm->value_at_last_clear)) - vlib_counter_sub (result, &cm->value_at_last_clear[index]); } /** Clear a combined counter - Clears the set of per-thread u16 counters, and the shared vlib_counter_t + Clears the set of per-thread counters. @param cm - (vlib_combined_counter_main_t *) combined counter main pointer @param index - (u32) index of the counter to clear @@ -334,21 +275,17 @@ vlib_get_combined_counter (vlib_combined_counter_main_t * cm, always_inline void vlib_zero_combined_counter (vlib_combined_counter_main_t * cm, u32 index) { - vlib_mini_counter_t *mini, *my_minis; + vlib_counter_t *my_counters, *counter; int i; - for (i = 0; i < vec_len (cm->minis); i++) + for (i = 0; i < vec_len (cm->counters); i++) { - my_minis = cm->minis[i]; + my_counters = cm->counters[i]; - mini = vec_elt_at_index (my_minis, index); - mini->packets = 0; - mini->bytes = 0; + counter = vec_elt_at_index (my_counters, index); + counter->packets = 0; + counter->bytes = 0; } - - vlib_counter_zero (&cm->maxi[index]); - if (index < vec_len (cm->value_at_last_clear)) - vlib_counter_zero (&cm->value_at_last_clear[index]); } /** validate a simple counter diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 0dad61d4..7352c2e7 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -2375,6 +2375,17 @@ ip4_rewrite_inline (vlib_main_t * vm, adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + /* + * pre-fetch the per-adjacency counters + */ + if (do_counters) + { + vlib_prefetch_combined_counter (&adjacency_counters, + cpu_index, adj_index0); + vlib_prefetch_combined_counter (&adjacency_counters, + cpu_index, adj_index1); + } + /* We should never rewrite a pkt using the MISS adjacency */ ASSERT (adj_index0 && adj_index1); @@ -2480,17 +2491,6 @@ ip4_rewrite_inline (vlib_main_t * vm, rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED : error1); - /* - * pre-fetch the per-adjacency counters - */ - if (do_counters) - { - vlib_prefetch_combined_counter (&adjacency_counters, - cpu_index, adj_index0); - vlib_prefetch_combined_counter (&adjacency_counters, - cpu_index, adj_index1); - } - /* Don't adjust the buffer for ttl issue; icmp-error node wants * to see the IP headerr */ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE)) @@ -2624,8 +2624,9 @@ ip4_rewrite_inline (vlib_main_t * vm, p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; } - vlib_prefetch_combined_counter (&adjacency_counters, - cpu_index, adj_index0); + if (do_counters) + vlib_prefetch_combined_counter (&adjacency_counters, + cpu_index, adj_index0); /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); @@ -2641,10 +2642,11 @@ ip4_rewrite_inline (vlib_main_t * vm, rw_len0 = adj0[0].rewrite_header.data_bytes; vnet_buffer (p0)->ip.save_rewrite_length = rw_len0; - vlib_increment_combined_counter - (&adjacency_counters, - cpu_index, - adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0); + if (do_counters) + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, adj_index0, 1, + vlib_buffer_length_in_chain (vm, p0) + rw_len0); /* Check MTU of outgoing interface. */ error0 = (vlib_buffer_length_in_chain (vm, p0) diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c index 7006b1db..99305afa 100644 --- a/src/vnet/map/map.c +++ b/src/vnet/map/map.c @@ -1304,7 +1304,7 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input, { which = cm - mm->domain_counters; - for (i = 0; i < vec_len (cm->maxi); i++) + for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) { vlib_get_combined_counter (cm, i, &v); total_pkts[which] += v.packets; diff --git a/src/vnet/map/map_api.c b/src/vnet/map/map_api.c index 7febeb3d..d618e7a6 100644 --- a/src/vnet/map/map_api.c +++ b/src/vnet/map/map_api.c @@ -211,7 +211,7 @@ vl_api_map_summary_stats_t_handler (vl_api_map_summary_stats_t * mp) { which = cm - mm->domain_counters; - for (i = 0; i < vec_len (cm->maxi); i++) + for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) { vlib_get_combined_counter (cm, i, &v); total_pkts[which] += v.packets; diff --git a/src/vnet/rewrite.c b/src/vnet/rewrite.c index c4a171c1..47fb74df 100644 --- a/src/vnet/rewrite.c +++ b/src/vnet/rewrite.c @@ -79,7 +79,7 @@ format_vnet_rewrite (u8 * s, va_list * args) if (NULL != si) s = format (s, "%U: ", format_vnet_sw_interface_name, vnm, si); else - s = format (s, "DELETED"); + s = format (s, "DELETED:%d", rw->sw_if_index); } /* Format rewrite string. */ diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index d8301fa6..8df40406 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -849,7 +849,7 @@ vl_api_vnet_get_summary_stats_t_handler (vl_api_vnet_get_summary_stats_t * mp) { which = cm - im->combined_sw_if_counters; - for (i = 0; i < vec_len (cm->maxi); i++) + for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) { vlib_get_combined_counter (cm, i, &v); total_pkts[which] += v.packets; diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c index c46d441a..1927da0b 100644 --- a/src/vpp/stats/stats.c +++ b/src/vpp/stats/stats.c @@ -134,7 +134,7 @@ do_simple_interface_counters (stats_main_t * sm) vlib_simple_counter_main_t *cm; u32 items_this_message = 0; u64 v, *vp = 0; - int i; + int i, n_counts; /* * Prevent interface registration from expanding / moving the vectors... @@ -144,13 +144,13 @@ do_simple_interface_counters (stats_main_t * sm) vec_foreach (cm, im->sw_if_counters) { - - for (i = 0; i < vec_len (cm->maxi); i++) + n_counts = vlib_simple_counter_n_counters (cm); + for (i = 0; i < n_counts; i++) { if (mp == 0) { items_this_message = clib_min (SIMPLE_COUNTER_BATCH_SIZE, - vec_len (cm->maxi) - i); + n_counts - i); mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) + items_this_message * sizeof (v)); @@ -189,19 +189,19 @@ do_combined_interface_counters (stats_main_t * sm) vlib_combined_counter_main_t *cm; u32 items_this_message = 0; vlib_counter_t v, *vp = 0; - int i; + int i, n_counts; vnet_interface_counter_lock (im); vec_foreach (cm, im->combined_sw_if_counters) { - - for (i = 0; i < vec_len (cm->maxi); i++) + n_counts = vlib_combined_counter_n_counters (cm); + for (i = 0; i < n_counts; i++) { if (mp == 0) { items_this_message = clib_min (COMBINED_COUNTER_BATCH_SIZE, - vec_len (cm->maxi) - i); + n_counts - i); mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) + items_this_message * sizeof (v)); -- cgit 1.2.3-korg From f15866146adcf3273da6f29f05d42193c7af4b07 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Thu, 30 Mar 2017 09:33:01 -0400 Subject: Fix more "symbol XXX found in none of the libs" warnings Change-Id: I4467f26da5bdcfd76e5914e0124a83b659757947 Signed-off-by: Dave Barach --- gmod/Makefile.am | 2 +- gmod/gmod/mod_vpp.c | 6 +++--- src/vat/main.c | 1 - src/vnet/interface_api.c | 1 + src/vpp/api/api.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/vpp/api') diff --git a/gmod/Makefile.am b/gmod/Makefile.am index e57f5788..643e170d 100644 --- a/gmod/Makefile.am +++ b/gmod/Makefile.am @@ -21,7 +21,7 @@ libgmodvpp_la_SOURCES = gmod/mod_vpp.c libgmodvpp_la_LDFLAGS = -module -avoid-version -libgmodvpp_la_LIBADD = -lsvm -lsvmdb -lvppinfra +libgmodvpp_la_LIBADD = -lsvm -lsvmdb -lvppinfra -lapr-1 gconfdir = $(prefix)/etc/conf.d gconf_DATA = gmod/vpp.conf diff --git a/gmod/gmod/mod_vpp.c b/gmod/gmod/mod_vpp.c index 572f9ef5..71479d2a 100644 --- a/gmod/gmod/mod_vpp.c +++ b/gmod/gmod/mod_vpp.c @@ -37,14 +37,14 @@ static int vpp_metric_init (apr_pool_t *p) int i; if (str_params) { - debug_msg("[mod_vpp]Received string params: %s", str_params); + clib_warning("[mod_vpp]Received string params: %s", str_params); } /* Multiple name/value pair parameters. */ if (list_params) { - debug_msg("[mod_vpp]Received following params list: "); + clib_warning("[mod_vpp]Received following params list: "); params = (mmparam*) list_params->elts; for(i=0; i< list_params->nelts; i++) { - debug_msg("\tParam: %s = %s", params[i].name, params[i].value); + clib_warning("\tParam: %s = %s", params[i].name, params[i].value); } } diff --git a/src/vat/main.c b/src/vat/main.c index e01d15ec..9e8bb2fe 100644 --- a/src/vat/main.c +++ b/src/vat/main.c @@ -19,7 +19,6 @@ vat_main_t vat_main; #include -vpe_api_main_t vpe_api_main; void vat_suspend (vlib_main_t * vm, f64 interval) diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 39c06271..0b4fa81a 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -46,6 +46,7 @@ #undef vl_printfun #include +vpe_api_main_t vpe_api_main; #define foreach_vpe_api_msg \ _(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 8df40406..7f6a125e 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -158,7 +158,7 @@ typedef enum } resolve_t; static vlib_node_registration_t vpe_resolver_process_node; -vpe_api_main_t vpe_api_main; +extern vpe_api_main_t vpe_api_main; static int arp_change_delete_callback (u32 pool_index, u8 * notused); static int nd_change_delete_callback (u32 pool_index, u8 * notused); -- cgit 1.2.3-korg From 20e1f2acd5d05a0a238ab8b8a870273799423e83 Mon Sep 17 00:00:00 2001 From: John Lo Date: Wed, 29 Mar 2017 13:35:43 -0400 Subject: Fix pid field endian in ARP/ND/DHCP event related API messages Make sure pid field in these API messages is stored in network order (it is also kept and used by VPP in network order). Change-Id: Id5d08e7a45b7e49d4b840a337458d99414d0b949 Signed-off-by: John Lo --- src/vat/api_format.c | 18 +++++++++--------- src/vpp/api/api.c | 4 ++-- src/vpp/api/custom_dump.c | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 3b57ac61..fca2b37a 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -1295,9 +1295,9 @@ static void vl_api_ip4_arp_event_t_handler (vl_api_ip4_arp_event_t * mp) { u32 sw_if_index = ntohl (mp->sw_if_index); - errmsg ("arp %s event: address %U new mac %U sw_if_index %d", + errmsg ("arp %s event: pid %d address %U new mac %U sw_if_index %d\n", mp->mac_ip ? "mac/ip binding" : "address resolution", - format_ip4_address, &mp->address, + ntohl (mp->pid), format_ip4_address, &mp->address, format_ethernet_address, mp->new_mac, sw_if_index); } @@ -1311,9 +1311,9 @@ static void vl_api_ip6_nd_event_t_handler (vl_api_ip6_nd_event_t * mp) { u32 sw_if_index = ntohl (mp->sw_if_index); - errmsg ("ip6 nd %s event: address %U new mac %U sw_if_index %d", + errmsg ("ip6 nd %s event: pid %d address %U new mac %U sw_if_index %d\n", mp->mac_ip ? "mac/ip binding" : "address resolution", - format_ip6_address, mp->address, + ntohl (mp->pid), format_ip6_address, mp->address, format_ethernet_address, mp->new_mac, sw_if_index); } @@ -2025,7 +2025,7 @@ vl_api_dhcp_compl_event_t_handler (vl_api_dhcp_compl_event_t * mp) { errmsg ("DHCP compl event: pid %d %s hostname %s host_addr %U " "router_addr %U host_mac %U", - mp->pid, mp->is_ipv6 ? "ipv6" : "ipv4", mp->hostname, + ntohl (mp->pid), mp->is_ipv6 ? "ipv6" : "ipv4", mp->hostname, format_ip4_address, &mp->host_address, format_ip4_address, &mp->router_address, format_ethernet_address, mp->host_mac); @@ -8035,12 +8035,12 @@ api_dhcp_client_config (vat_main_t * vam) /* Construct the API message */ M (DHCP_CLIENT_CONFIG, mp); - mp->sw_if_index = ntohl (sw_if_index); + mp->sw_if_index = htonl (sw_if_index); clib_memcpy (mp->hostname, hostname, vec_len (hostname)); vec_free (hostname); mp->is_add = is_add; mp->want_dhcp_event = disable_event ? 0 : 1; - mp->pid = getpid (); + mp->pid = htonl (getpid ()); /* send it... */ S (mp); @@ -11833,7 +11833,7 @@ api_want_ip4_arp_events (vat_main_t * vam) M (WANT_IP4_ARP_EVENTS, mp); mp->enable_disable = enable_disable; - mp->pid = getpid (); + mp->pid = htonl (getpid ()); mp->address = address.as_u32; S (mp); @@ -11869,7 +11869,7 @@ api_want_ip6_nd_events (vat_main_t * vam) M (WANT_IP6_ND_EVENTS, mp); mp->enable_disable = enable_disable; - mp->pid = getpid (); + mp->pid = htonl (getpid ()); clib_memcpy (mp->address, &address, sizeof (ip6_address_t)); S (mp); diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 7f6a125e..f169d7fc 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -2268,7 +2268,7 @@ format_arp_event (u8 * s, va_list * args) { vl_api_ip4_arp_event_t *event = va_arg (*args, vl_api_ip4_arp_event_t *); - s = format (s, "pid %d: ", event->pid); + s = format (s, "pid %d: ", ntohl (event->pid)); if (event->mac_ip) s = format (s, "bd mac/ip4 binding events"); else @@ -2281,7 +2281,7 @@ format_nd_event (u8 * s, va_list * args) { vl_api_ip6_nd_event_t *event = va_arg (*args, vl_api_ip6_nd_event_t *); - s = format (s, "pid %d: ", event->pid); + s = format (s, "pid %d: ", ntohl (event->pid)); if (event->mac_ip) s = format (s, "bd mac/ip6 binding events"); else diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index a76840a2..1e841eff 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -792,7 +792,7 @@ static void *vl_api_dhcp_client_config_t_print s = format (s, "want_dhcp_event %d ", mp->want_dhcp_event); - s = format (s, "pid %d ", mp->pid); + s = format (s, "pid %d ", ntohl (mp->pid)); if (mp->is_add == 0) s = format (s, "del "); @@ -1710,7 +1710,7 @@ static void *vl_api_want_ip4_arp_events_t_print u8 *s; s = format (0, "SCRIPT: want_ip4_arp_events "); - s = format (s, "pid %d address %U ", mp->pid, + s = format (s, "pid %d address %U ", ntohl (mp->pid), format_ip4_address, &mp->address); if (mp->enable_disable == 0) s = format (s, "del "); @@ -1724,7 +1724,7 @@ static void *vl_api_want_ip6_nd_events_t_print u8 *s; s = format (0, "SCRIPT: want_ip6_nd_events "); - s = format (s, "pid %d address %U ", mp->pid, + s = format (s, "pid %d address %U ", ntohl (mp->pid), format_ip6_address, mp->address); if (mp->enable_disable == 0) s = format (s, "del "); -- cgit 1.2.3-korg From 8db1de83ec540e01bb0577b726770bbb2338edcb Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Fri, 31 Mar 2017 02:15:17 +0300 Subject: ARP/API:protect against identical registrations Change-Id: Ia3acf87d3e07a7d41c047869de504e1972334b55 Signed-off-by: Eyal Bari --- src/vnet/api_errno.h | 3 +- src/vnet/ethernet/arp.c | 93 ++++++++++++++++++++----------------------------- src/vpp/api/api.c | 15 ++++---- 3 files changed, 49 insertions(+), 62 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 74d39bdb..f3ffd2a6 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -104,7 +104,8 @@ _(BFD_EAGAIN, -111, "BFD object cannot be manipulated at this time") \ _(INVALID_GPE_MODE, -112, "Invalid GPE mode") \ _(LISP_GPE_ENTRIES_PRESENT, -113, "LISP GPE entries are present") \ _(ADDRESS_FOUND_FOR_INTERFACE, -114, "Address found for interface") \ -_(SESSION_CONNECT_FAIL, -115, "Session failed to connect") +_(SESSION_CONNECT_FAIL, -115, "Session failed to connect") \ +_(ENTRY_ALREADY_EXISTS, -116, "Entry already exists") typedef enum { diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index 711b7867..09ecd0cc 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -702,76 +702,59 @@ vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, { ethernet_arp_main_t *am = ðernet_arp_main; ip4_address_t *address = address_arg; - uword *p; + + /* Try to find an existing entry */ + u32 *first = (u32 *) hash_get (am->mac_changes_by_address, address->as_u32); + u32 *p = first; pending_resolution_t *mc; - void (*fp) (u32, u8 *) = data_callback; + while (p && *p != ~0) + { + mc = pool_elt_at_index (am->mac_changes, *p); + if (mc->node_index == node_index && mc->type_opaque == type_opaque + && mc->pid == pid) + break; + p = &mc->next_index; + } + int found = p && *p != ~0; if (is_add) { - pool_get (am->mac_changes, mc); + if (found) + return VNET_API_ERROR_ENTRY_ALREADY_EXISTS; - mc->next_index = ~0; - mc->node_index = node_index; - mc->type_opaque = type_opaque; - mc->data = data; - mc->data_callback = data_callback; - mc->pid = pid; + pool_get (am->mac_changes, mc); + *mc = (pending_resolution_t) + { + .next_index = ~0,.node_index = node_index,.type_opaque = + type_opaque,.data = data,.data_callback = data_callback,.pid = + pid,}; - p = hash_get (am->mac_changes_by_address, address->as_u32); + /* Insert new resolution at the end of the list */ + u32 new_idx = mc - am->mac_changes; if (p) - { - /* Insert new resolution at the head of the list */ - mc->next_index = p[0]; - hash_unset (am->mac_changes_by_address, address->as_u32); - } - - hash_set (am->mac_changes_by_address, address->as_u32, - mc - am->mac_changes); - return 0; + p[0] = new_idx; + else + hash_set (am->mac_changes_by_address, address->as_u32, new_idx); } else { - u32 index; - pending_resolution_t *mc_last = 0; - - p = hash_get (am->mac_changes_by_address, address->as_u32); - if (p == 0) + if (!found) return VNET_API_ERROR_NO_SUCH_ENTRY; - index = p[0]; + /* Clients may need to clean up pool entries, too */ + void (*fp) (u32, u8 *) = data_callback; + if (fp) + (*fp) (mc->data, 0 /* no new mac addrs */ ); - while (index != (u32) ~ 0) - { - mc = pool_elt_at_index (am->mac_changes, index); - if (mc->node_index == node_index && - mc->type_opaque == type_opaque && mc->pid == pid) - { - /* Clients may need to clean up pool entries, too */ - if (fp) - (*fp) (mc->data, 0 /* no new mac addrs */ ); - if (index == p[0]) - { - hash_unset (am->mac_changes_by_address, address->as_u32); - if (mc->next_index != ~0) - hash_set (am->mac_changes_by_address, address->as_u32, - mc->next_index); - pool_put (am->mac_changes, mc); - return 0; - } - else - { - ASSERT (mc_last); - mc_last->next_index = mc->next_index; - pool_put (am->mac_changes, mc); - return 0; - } - } - mc_last = mc; - index = mc->next_index; - } + /* Remove the entry from the list and delete the entry */ + *p = mc->next_index; + pool_put (am->mac_changes, mc); - return VNET_API_ERROR_NO_SUCH_ENTRY; + /* Remove from hash if we deleted the last entry */ + if (*p == ~0 && p == first) + hash_unset (am->mac_changes_by_address, address->as_u32); } + return 0; } /* Either we drop the packet or we send a reply to the sender. */ diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index f169d7fc..14ccd864 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -1574,6 +1574,14 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) if (mp->enable_disable) { + rv = vnet_add_del_ip4_arp_change_event + (vnm, arp_change_data_callback, + mp->pid, &mp->address /* addr, in net byte order */ , + vpe_resolver_process_node.index, + IP4_ARP_EVENT, event - am->arp_events, 1 /* is_add */ ); + + if (rv) + goto out; pool_get (am->arp_events, event); memset (event, 0, sizeof (*event)); @@ -1584,12 +1592,6 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) event->pid = mp->pid; if (mp->address == 0) event->mac_ip = 1; - - rv = vnet_add_del_ip4_arp_change_event - (vnm, arp_change_data_callback, - mp->pid, &mp->address /* addr, in net byte order */ , - vpe_resolver_process_node.index, - IP4_ARP_EVENT, event - am->arp_events, 1 /* is_add */ ); } else { @@ -1599,6 +1601,7 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) vpe_resolver_process_node.index, IP4_ARP_EVENT, ~0 /* pool index */ , 0 /* is_add */ ); } +out: REPLY_MACRO (VL_API_WANT_IP4_ARP_EVENTS_REPLY); } -- cgit 1.2.3-korg From a3af337e06a79f7d1dacf42a319f241c907122fc Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 28 Mar 2017 03:49:52 -0700 Subject: MTRIE Optimisations 2 1) 16-8-8 stride. Reduce trie depth walk traded with increased memory in the top PLY. 2) separate the vector of protocol-independent (PI) fib_table_t with the vector of protocol dependent (PD) FIBs. PD FIBs are large structures, we don't want to burn the memory for ech PD type 3) Go straight to the PD FIB in the data-path thus avoiding an indirection through, e.g., a PLY pool. Change-Id: I800d1ed0b2049040d5da95213f3ed6b12bdd78b7 Signed-off-by: Neale Ranns --- src/vnet/cop/ip4_whitelist.c | 9 - src/vnet/dpo/load_balance.c | 6 +- src/vnet/dpo/lookup_dpo.c | 4 - src/vnet/fib/fib.c | 2 + src/vnet/fib/fib_entry.c | 4 +- src/vnet/fib/fib_path.c | 1 + src/vnet/fib/fib_table.c | 24 +- src/vnet/fib/fib_table.h | 12 - src/vnet/fib/fib_test.c | 6 +- src/vnet/fib/ip4_fib.c | 60 +++- src/vnet/fib/ip4_fib.h | 35 ++- src/vnet/fib/ip6_fib.c | 18 +- src/vnet/fib/ip6_fib.h | 2 +- src/vnet/fib/mpls_fib.c | 21 +- src/vnet/fib/mpls_fib.h | 28 +- src/vnet/ip/ip4.h | 29 +- src/vnet/ip/ip4_forward.c | 21 -- src/vnet/ip/ip4_mtrie.c | 611 +++++++++++++++++++++++++++-------------- src/vnet/ip/ip4_mtrie.h | 106 +++++-- src/vnet/ip/ip4_packet.h | 1 + src/vnet/ip/ip4_source_check.c | 8 - src/vnet/ip/ip6.h | 3 + src/vnet/ip/ip_api.c | 43 ++- src/vnet/mpls/interface.c | 1 + src/vnet/mpls/mpls.h | 26 +- src/vnet/mpls/mpls_api.c | 38 ++- src/vpp/api/api.c | 6 +- src/vpp/stats/stats.c | 8 +- 28 files changed, 720 insertions(+), 413 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c index ccb9dc03..6ef3d7d7 100644 --- a/src/vnet/cop/ip4_whitelist.c +++ b/src/vnet/cop/ip4_whitelist.c @@ -127,9 +127,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); @@ -166,9 +163,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2); @@ -263,9 +257,6 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index d5e98e4e..6b0eda0e 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -827,14 +827,18 @@ const static char* const * const load_balance_nodes[DPO_PROTO_NUM] = void load_balance_module_init (void) { + index_t lbi; + dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes); /* * Special LB with index zero. we need to define this since the v4 mtrie * assumes an index of 0 implies the ply is empty. therefore all 'real' * adjs need a non-zero index. + * This should never be used, but just in case, stack it on a drop. */ - load_balance_create(0, DPO_PROTO_IP4, 0); + lbi = load_balance_create(1, DPO_PROTO_IP4, 0); + load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4)); load_balance_map_module_init(); } diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c index 3726c8fe..e94e871c 100644 --- a/src/vnet/dpo/lookup_dpo.c +++ b/src/vnet/dpo/lookup_dpo.c @@ -211,7 +211,6 @@ ip4_src_fib_lookup_one (u32 src_fib_index0, mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie; leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); @@ -235,9 +234,6 @@ ip4_src_fib_lookup_two (u32 src_fib_index0, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0); leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, addr1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2); diff --git a/src/vnet/fib/fib.c b/src/vnet/fib/fib.c index 413f93e8..b430e113 100644 --- a/src/vnet/fib/fib.c +++ b/src/vnet/fib/fib.c @@ -28,6 +28,8 @@ fib_module_init (vlib_main_t * vm) return (error); if ((error = vlib_call_init_function (vm, adj_module_init))) return (error); + if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init))) + return (error); fib_entry_module_init(); fib_entry_src_module_init(); diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index 25005e11..6ac5461d 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -924,10 +924,10 @@ fib_entry_path_remove (fib_node_index_t fib_entry_index, /* * no more sources left. this entry is toast. */ - fib_entry_src_action_uninstall(fib_entry); fib_entry = fib_entry_post_flag_update_actions(fib_entry, source, bflags); + fib_entry_src_action_uninstall(fib_entry); return (FIB_ENTRY_SRC_FLAG_NONE); } @@ -1014,10 +1014,10 @@ fib_entry_special_remove (fib_node_index_t fib_entry_index, /* * no more sources left. this entry is toast. */ - fib_entry_src_action_uninstall(fib_entry); fib_entry = fib_entry_post_flag_update_actions(fib_entry, source, bflags); + fib_entry_src_action_uninstall(fib_entry); return (FIB_ENTRY_SRC_FLAG_NONE); } diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index 3ed309f3..928a9d43 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -32,6 +32,7 @@ #include #include #include +#include /** * Enurmeration of path types diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 7818d02e..6c3162e7 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -47,7 +47,7 @@ fib_table_lookup_i (fib_table_t *fib_table, switch (prefix->fp_proto) { case FIB_PROTOCOL_IP4: - return (ip4_fib_table_lookup(&fib_table->v4, + return (ip4_fib_table_lookup(ip4_fib_get(fib_table->ft_index), &prefix->fp_addr.ip4, prefix->fp_len)); case FIB_PROTOCOL_IP6: @@ -55,7 +55,7 @@ fib_table_lookup_i (fib_table_t *fib_table, &prefix->fp_addr.ip6, prefix->fp_len)); case FIB_PROTOCOL_MPLS: - return (mpls_fib_table_lookup(&fib_table->mpls, + return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index), prefix->fp_label, prefix->fp_eos)); } @@ -76,7 +76,7 @@ fib_table_lookup_exact_match_i (const fib_table_t *fib_table, switch (prefix->fp_proto) { case FIB_PROTOCOL_IP4: - return (ip4_fib_table_lookup_exact_match(&fib_table->v4, + return (ip4_fib_table_lookup_exact_match(ip4_fib_get(fib_table->ft_index), &prefix->fp_addr.ip4, prefix->fp_len)); case FIB_PROTOCOL_IP6: @@ -84,7 +84,7 @@ fib_table_lookup_exact_match_i (const fib_table_t *fib_table, &prefix->fp_addr.ip6, prefix->fp_len)); case FIB_PROTOCOL_MPLS: - return (mpls_fib_table_lookup(&fib_table->mpls, + return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index), prefix->fp_label, prefix->fp_eos)); } @@ -148,7 +148,7 @@ fib_table_entry_remove (fib_table_t *fib_table, switch (prefix->fp_proto) { case FIB_PROTOCOL_IP4: - ip4_fib_table_entry_remove(&fib_table->v4, + ip4_fib_table_entry_remove(ip4_fib_get(fib_table->ft_index), &prefix->fp_addr.ip4, prefix->fp_len); break; @@ -158,7 +158,7 @@ fib_table_entry_remove (fib_table_t *fib_table, prefix->fp_len); break; case FIB_PROTOCOL_MPLS: - mpls_fib_table_entry_remove(&fib_table->mpls, + mpls_fib_table_entry_remove(mpls_fib_get(fib_table->ft_index), prefix->fp_label, prefix->fp_eos); break; @@ -208,7 +208,7 @@ fib_table_entry_insert (fib_table_t *fib_table, switch (prefix->fp_proto) { case FIB_PROTOCOL_IP4: - ip4_fib_table_entry_insert(&fib_table->v4, + ip4_fib_table_entry_insert(ip4_fib_get(fib_table->ft_index), &prefix->fp_addr.ip4, prefix->fp_len, fib_entry_index); @@ -220,7 +220,7 @@ fib_table_entry_insert (fib_table_t *fib_table, fib_entry_index); break; case FIB_PROTOCOL_MPLS: - mpls_fib_table_entry_insert(&fib_table->mpls, + mpls_fib_table_entry_insert(mpls_fib_get(fib_table->ft_index), prefix->fp_label, prefix->fp_eos, fib_entry_index); @@ -270,7 +270,9 @@ fib_table_fwding_dpo_remove (u32 fib_index, return (ip4_fib_table_fwding_dpo_remove(ip4_fib_get(fib_index), &prefix->fp_addr.ip4, prefix->fp_len, - dpo)); + dpo, + fib_table_get_less_specific(fib_index, + prefix))); case FIB_PROTOCOL_IP6: return (ip6_fib_table_fwding_dpo_remove(fib_index, &prefix->fp_addr.ip6, @@ -1034,13 +1036,13 @@ fib_table_destroy (fib_table_t *fib_table) switch (fib_table->ft_proto) { case FIB_PROTOCOL_IP4: - ip4_fib_table_destroy(&fib_table->v4); + ip4_fib_table_destroy(fib_table->ft_index); break; case FIB_PROTOCOL_IP6: ip6_fib_table_destroy(fib_table->ft_index); break; case FIB_PROTOCOL_MPLS: - mpls_fib_table_destroy(&fib_table->mpls); + mpls_fib_table_destroy(fib_table->ft_index); break; } } diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h index e7e66acb..b310aea6 100644 --- a/src/vnet/fib/fib_table.h +++ b/src/vnet/fib/fib_table.h @@ -28,18 +28,6 @@ */ typedef struct fib_table_t_ { - /** - * A union of the protocol specific FIBs that provide the - * underlying LPM mechanism. - * This element is first in the struct so that it is in the - * first cache line. - */ - union { - ip4_fib_t v4; - ip6_fib_t v6; - mpls_fib_t mpls; - }; - /** * Which protocol this table serves. Used to switch on the union above. */ diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index 1a9cce24..92141ddf 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -40,8 +40,6 @@ fformat(stderr, "FAIL:%d: " _comment "\n", \ __LINE__, ##_args); \ } else { \ - fformat(stderr, "PASS:%d: " _comment "\n", \ - __LINE__, ##_args); \ } \ _evald; \ }) @@ -5727,7 +5725,7 @@ fib_test_label (void) &a_o_10_10_11_1, &adj_o_10_10_11_2), "1.1.1.1/32 LB 2 buckets via: " - "adj over 10.10.11.1", + "adj over 10.10.11.1, " "adj-v4 over 10.10.11.2"); fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, @@ -5738,7 +5736,7 @@ fib_test_label (void) &a_o_10_10_11_1, &adj_o_10_10_11_2), "24001/eos LB 2 buckets via: " - "adj over 10.10.11.1", + "adj over 10.10.11.1, " "adj-v4 over 10.10.11.2"); fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index a7915620..98d4e52f 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -104,29 +104,35 @@ static u32 ip4_create_fib_with_table_id (u32 table_id) { fib_table_t *fib_table; + ip4_fib_t *v4_fib; pool_get_aligned(ip4_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); memset(fib_table, 0, sizeof(*fib_table)); + pool_get_aligned(ip4_main.v4_fibs, v4_fib, CLIB_CACHE_LINE_BYTES); + + ASSERT((fib_table - ip4_main.fibs) == + (v4_fib - ip4_main.v4_fibs)); + fib_table->ft_proto = FIB_PROTOCOL_IP4; fib_table->ft_index = - fib_table->v4.index = + v4_fib->index = (fib_table - ip4_main.fibs); hash_set (ip4_main.fib_index_by_table_id, table_id, fib_table->ft_index); fib_table->ft_table_id = - fib_table->v4.table_id = + v4_fib->table_id = table_id; fib_table->ft_flow_hash_config = - fib_table->v4.flow_hash_config = + v4_fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; - fib_table->v4.fwd_classify_table_index = ~0; - fib_table->v4.rev_classify_table_index = ~0; + v4_fib->fwd_classify_table_index = ~0; + v4_fib->rev_classify_table_index = ~0; fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4); - ip4_mtrie_init(&fib_table->v4.mtrie); + ip4_mtrie_init(&v4_fib->mtrie); /* * add the special entries into the new FIB @@ -151,9 +157,10 @@ ip4_create_fib_with_table_id (u32 table_id) } void -ip4_fib_table_destroy (ip4_fib_t *fib) +ip4_fib_table_destroy (u32 fib_index) { - fib_table_t *fib_table = (fib_table_t*)fib; + fib_table_t *fib_table = pool_elt_at_index(ip4_main.fibs, fib_index); + ip4_fib_t *v4_fib = pool_elt_at_index(ip4_main.v4_fibs, fib_index); int ii; /* @@ -185,6 +192,10 @@ ip4_fib_table_destroy (ip4_fib_t *fib) { hash_unset (ip4_main.fib_index_by_table_id, fib_table->ft_table_id); } + + ip4_mtrie_free(&v4_fib->mtrie); + + pool_put(ip4_main.v4_fibs, v4_fib); pool_put(ip4_main.fibs, fib_table); } @@ -367,16 +378,33 @@ ip4_fib_table_fwding_dpo_update (ip4_fib_t *fib, u32 len, const dpo_id_t *dpo) { - ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 0); // ADD + ip4_fib_mtrie_route_add(&fib->mtrie, addr, len, dpo->dpoi_index); } void ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib, const ip4_address_t *addr, u32 len, - const dpo_id_t *dpo) + const dpo_id_t *dpo, + u32 cover_index) { - ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 1); // DELETE + fib_prefix_t cover_prefix = { + .fp_len = 0, + }; + const dpo_id_t *cover_dpo; + + /* + * We need to pass the MTRIE the LB index and address length of the + * covering prefix, so it can fill the plys with the correct replacement + * for the entry being removed + */ + fib_entry_get_prefix(cover_index, &cover_prefix); + cover_dpo = fib_entry_contribute_ip_forwarding(cover_index); + + ip4_fib_mtrie_route_del(&fib->mtrie, + addr, len, dpo->dpoi_index, + cover_prefix.fp_len, + cover_dpo->dpoi_index); } void @@ -498,7 +526,7 @@ ip4_show_fib (vlib_main_t * vm, pool_foreach (fib_table, im4->fibs, ({ - ip4_fib_t *fib = &fib_table->v4; + ip4_fib_t *fib = pool_elt_at_index(im4->v4_fibs, fib_table->ft_index); if (table_id >= 0 && table_id != (int)fib->table_id) continue; @@ -523,6 +551,11 @@ ip4_show_fib (vlib_main_t * vm, } continue; } + if (mtrie) + { + vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie); + continue; + } if (!matching) { @@ -532,9 +565,6 @@ ip4_show_fib (vlib_main_t * vm, { ip4_fib_table_show_one(fib, vm, &matching_address, matching_mask); } - - if (mtrie) - vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie); })); return 0; diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h index 243fd77f..4cf9e58a 100644 --- a/src/vnet/fib/ip4_fib.h +++ b/src/vnet/fib/ip4_fib.h @@ -34,6 +34,33 @@ #include #include #include +#include + +typedef struct ip4_fib_t_ +{ + /** + * Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. + * First member so it's in the first cacheline. + */ + ip4_fib_mtrie_t mtrie; + + /* Hash table for each prefix length mapping. */ + uword *fib_entry_by_dst_address[33]; + + /* Table ID (hash key) for this FIB. */ + u32 table_id; + + /* Index into FIB vector. */ + u32 index; + + /* flow hash configuration */ + flow_hash_config_t flow_hash_config; + + /* N-tuple classifier indices */ + u32 fwd_classify_table_index; + u32 rev_classify_table_index; + +} ip4_fib_t; extern fib_node_index_t ip4_fib_table_lookup(const ip4_fib_t *fib, const ip4_address_t *addr, @@ -50,7 +77,7 @@ extern void ip4_fib_table_entry_insert(ip4_fib_t *fib, const ip4_address_t *addr, u32 len, fib_node_index_t fib_entry_index); -extern void ip4_fib_table_destroy(ip4_fib_t *fib); +extern void ip4_fib_table_destroy(u32 fib_index); extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib, const ip4_address_t *addr, @@ -60,7 +87,8 @@ extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib, extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib, const ip4_address_t *addr, u32 len, - const dpo_id_t *dpo); + const dpo_id_t *dpo, + fib_node_index_t cover_index); extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib, const ip4_address_t * dst); @@ -79,7 +107,7 @@ extern void ip4_fib_table_walk(ip4_fib_t *fib, static inline ip4_fib_t * ip4_fib_get (u32 index) { - return (&(pool_elt_at_index(ip4_main.fibs, index)->v4)); + return (pool_elt_at_index(ip4_main.v4_fibs, index)); } always_inline u32 @@ -134,7 +162,6 @@ ip4_fib_forwarding_lookup (u32 fib_index, mtrie = &ip4_fib_get(fib_index)->mtrie; leaf = ip4_fib_mtrie_lookup_step_one (mtrie, addr); - leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1); leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2); leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3); diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index 343ff55e..0ee029d3 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -55,22 +55,29 @@ static u32 create_fib_with_table_id (u32 table_id) { fib_table_t *fib_table; + ip6_fib_t *v6_fib; pool_get_aligned(ip6_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); + pool_get_aligned(ip6_main.v6_fibs, v6_fib, CLIB_CACHE_LINE_BYTES); + memset(fib_table, 0, sizeof(*fib_table)); + memset(v6_fib, 0, sizeof(*v6_fib)); + ASSERT((fib_table - ip6_main.fibs) == + (v6_fib - ip6_main.v6_fibs)); + fib_table->ft_proto = FIB_PROTOCOL_IP6; fib_table->ft_index = - fib_table->v6.index = - (fib_table - ip6_main.fibs); + v6_fib->index = + (fib_table - ip6_main.fibs); hash_set(ip6_main.fib_index_by_table_id, table_id, fib_table->ft_index); fib_table->ft_table_id = - fib_table->v6.table_id = + v6_fib->table_id = table_id; fib_table->ft_flow_hash_config = - fib_table->v6.flow_hash_config = + v6_fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; vnet_ip6_fib_init(fib_table->ft_index); @@ -188,6 +195,7 @@ ip6_fib_table_destroy (u32 fib_index) { hash_unset (ip6_main.fib_index_by_table_id, fib_table->ft_table_id); } + pool_put_index(ip6_main.v6_fibs, fib_table->ft_index); pool_put(ip6_main.fibs, fib_table); } @@ -620,7 +628,7 @@ ip6_show_fib (vlib_main_t * vm, pool_foreach (fib_table, im6->fibs, ({ - fib = &(fib_table->v6); + fib = pool_elt_at_index(im6->v6_fibs, fib_table->ft_index); if (table_id >= 0 && table_id != (int)fib->table_id) continue; if (fib_index != ~0 && fib_index != (int)fib->index) diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h index af864a75..e2f28452 100644 --- a/src/vnet/fib/ip6_fib.h +++ b/src/vnet/fib/ip6_fib.h @@ -115,7 +115,7 @@ static inline ip6_fib_t * ip6_fib_get (fib_node_index_t index) { ASSERT(!pool_is_free_index(ip6_main.fibs, index)); - return (&pool_elt_at_index (ip6_main.fibs, index)->v6); + return (pool_elt_at_index (ip6_main.v6_fibs, index)); } static inline diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c index 5cd0fd23..4b2b76ea 100644 --- a/src/vnet/fib/mpls_fib.c +++ b/src/vnet/fib/mpls_fib.c @@ -97,11 +97,15 @@ mpls_fib_create_with_table_id (u32 table_id) int i; pool_get_aligned(mpls_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); + pool_get_aligned(mpls_main.mpls_fibs, mf, CLIB_CACHE_LINE_BYTES); + + ASSERT((fib_table - mpls_main.fibs) == + (mf - mpls_main.mpls_fibs)); + memset(fib_table, 0, sizeof(*fib_table)); fib_table->ft_proto = FIB_PROTOCOL_MPLS; - fib_table->ft_index = - (fib_table - mpls_main.fibs); + fib_table->ft_index = (fib_table - mpls_main.fibs); hash_set (mpls_main.fib_index_by_table_id, table_id, fib_table->ft_index); @@ -109,8 +113,6 @@ mpls_fib_create_with_table_id (u32 table_id) table_id; fib_table->ft_flow_hash_config = MPLS_FLOW_HASH_DEFAULT; - fib_table->v4.fwd_classify_table_index = ~0; - fib_table->v4.rev_classify_table_index = ~0; fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS); @@ -122,7 +124,6 @@ mpls_fib_create_with_table_id (u32 table_id) drop_dpo_get(DPO_PROTO_MPLS)); } - mf = &fib_table->mpls; mf->mf_entries = hash_create(0, sizeof(fib_node_index_t)); for (i = 0; i < MPLS_FIB_DB_SIZE; i++) { @@ -241,9 +242,10 @@ mpls_fib_table_create_and_lock (void) } void -mpls_fib_table_destroy (mpls_fib_t *mf) +mpls_fib_table_destroy (u32 fib_index) { - fib_table_t *fib_table = (fib_table_t*)mf; + fib_table_t *fib_table = pool_elt_at_index(mpls_main.fibs, fib_index); + mpls_fib_t *mf = pool_elt_at_index(mpls_main.mpls_fibs, fib_index); fib_prefix_t prefix = { .fp_proto = FIB_PROTOCOL_MPLS, }; @@ -274,6 +276,7 @@ mpls_fib_table_destroy (mpls_fib_t *mf) } hash_free(mf->mf_entries); + pool_put(mpls_main.mpls_fibs, mf); pool_put(mpls_main.fibs, fib_table); } @@ -436,11 +439,11 @@ mpls_fib_show (vlib_main_t * vm, if (MPLS_LABEL_INVALID == label) { - mpls_fib_table_show_all(&(fib_table->mpls), vm); + mpls_fib_table_show_all(mpls_fib_get(fib_table->ft_index), vm); } else { - mpls_fib_table_show_one(&(fib_table->mpls), label, vm); + mpls_fib_table_show_one(mpls_fib_get(fib_table->ft_index), label, vm); } })); diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h index 779decaa..78a61a14 100644 --- a/src/vnet/fib/mpls_fib.h +++ b/src/vnet/fib/mpls_fib.h @@ -25,10 +25,33 @@ #include #include +#define MPLS_FIB_DEFAULT_TABLE_ID 0 + +/** + * Type exposure is to allow the DP fast/inlined access + */ +#define MPLS_FIB_KEY_SIZE 21 +#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1)) + +typedef struct mpls_fib_t_ +{ + /** + * A hash table of entries. 21 bit key + * Hash table for reduced memory footprint + */ + uword * mf_entries; + + /** + * The load-balance indices keyed by 21 bit label+eos bit. + * A flat array for maximum lookup performace. + */ + index_t mf_lbs[MPLS_FIB_DB_SIZE]; +} mpls_fib_t; + static inline mpls_fib_t* mpls_fib_get (fib_node_index_t index) { - return (&(pool_elt_at_index(mpls_main.fibs, index)->mpls)); + return (pool_elt_at_index(mpls_main.mpls_fibs, index)); } extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id); @@ -56,8 +79,7 @@ extern void mpls_fib_table_entry_insert(mpls_fib_t *mf, mpls_label_t label, mpls_eos_bit_t eos, fib_node_index_t fei); -extern void mpls_fib_table_destroy(mpls_fib_t *mf); - +extern void mpls_fib_table_destroy(u32 fib_index); extern void mpls_fib_forwarding_table_update(mpls_fib_t *mf, diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index 4e075d0f..71640def 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -40,34 +40,10 @@ #ifndef included_ip_ip4_h #define included_ip_ip4_h -#include #include #include #include -typedef struct ip4_fib_t -{ - /* Hash table for each prefix length mapping. */ - uword *fib_entry_by_dst_address[33]; - - /* Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. */ - ip4_fib_mtrie_t mtrie; - - /* Table ID (hash key) for this FIB. */ - u32 table_id; - - /* Index into FIB vector. */ - u32 index; - - /* flow hash configuration */ - flow_hash_config_t flow_hash_config; - - /* N-tuple classifier indices */ - u32 fwd_classify_table_index; - u32 rev_classify_table_index; - -} ip4_fib_t; - typedef struct ip4_mfib_t { /* Hash table for each prefix length mapping. */ @@ -111,6 +87,9 @@ typedef struct ip4_main_t /** Vector of FIBs. */ struct fib_table_t_ *fibs; + /** Vector of MTries. */ + struct ip4_fib_t_ *v4_fibs; + /** Vector of MFIBs. */ struct mfib_table_t_ *mfibs; @@ -284,8 +263,6 @@ serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main; int vnet_set_ip4_flow_hash (u32 table_id, flow_hash_config_t flow_hash_config); -void ip4_mtrie_init (ip4_fib_mtrie_t * m); - int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, u32 table_index); diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index ef6dded5..ee1703e7 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -182,7 +182,6 @@ ip4_lookup_inline (vlib_main_t * vm, mtrie2 = &ip4_fib_get (fib_index2)->mtrie; mtrie3 = &ip4_fib_get (fib_index3)->mtrie; - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1); leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2); @@ -194,14 +193,6 @@ ip4_lookup_inline (vlib_main_t * vm, tcp2 = (void *) (ip2 + 1); tcp3 = (void *) (ip3 + 1); - if (!lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1); - leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1); - leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1); - } - if (!lookup_for_responses_to_locally_received_packets) { leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); @@ -363,9 +354,6 @@ ip4_lookup_inline (vlib_main_t * vm, tcp0 = (void *) (ip0 + 1); - if (!lookup_for_responses_to_locally_received_packets) - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1); - if (!lookup_for_responses_to_locally_received_packets) leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); @@ -1622,11 +1610,6 @@ ip4_local_inline (vlib_main_t * vm, good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; good_tcp_udp1 |= is_udp1 && udp1->checksum == 0; - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - leaf1 = - ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1); - /* Verify UDP length. */ ip_len0 = clib_net_to_host_u16 (ip0->length); ip_len1 = clib_net_to_host_u16 (ip1->length); @@ -1812,9 +1795,6 @@ ip4_local_inline (vlib_main_t * vm, /* Don't verify UDP checksum for packets with explicit zero checksum. */ good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - /* Verify UDP length. */ ip_len0 = clib_net_to_host_u16 (ip0->length); udp_len0 = clib_net_to_host_u16 (udp0->length); @@ -2913,7 +2893,6 @@ ip4_lookup_validate (ip4_address_t * a, u32 fib_index0) mtrie0 = &ip4_fib_get (fib_index0)->mtrie; leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3); diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c index 317d8f10..adc95125 100644 --- a/src/vnet/ip/ip4_mtrie.c +++ b/src/vnet/ip/ip4_mtrie.c @@ -38,10 +38,17 @@ */ #include -#include +#include +#include + + +/** + * Global pool of IPv4 8bit PLYs + */ +ip4_fib_mtrie_8_ply_t *ip4_ply_pool; always_inline u32 -ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_ply_t * p, u8 dst_byte) +ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte) { /* * It's 'non-empty' if the length of the leaf stored is greater than the @@ -84,61 +91,83 @@ ip4_fib_mtrie_leaf_set_next_ply_index (u32 i) return l; } -static void -ply_init (ip4_fib_mtrie_ply_t * p, - ip4_fib_mtrie_leaf_t init, u32 prefix_len, u32 ply_base_len) -{ - /* - * A leaf is 'empty' if it represents a leaf from the covering PLY - * i.e. if the prefix length of the leaf is less than or equal to - * the prefix length of the PLY - */ - p->n_non_empty_leafs = (prefix_len > ply_base_len ? - ARRAY_LEN (p->leaves) : 0); - memset (p->dst_address_bits_of_leaves, prefix_len, - sizeof (p->dst_address_bits_of_leaves)); - p->dst_address_bits_base = ply_base_len; - - /* Initialize leaves. */ -#ifdef CLIB_HAVE_VEC128 - { - u32x4 *l, init_x4; - #ifndef __ALTIVEC__ - init_x4 = u32x4_splat (init); +#define PLY_X4_SPLAT_INIT(init_x4, init) \ + init_x4 = u32x4_splat (init); #else - { - u32x4_union_t y; - y.as_u32[0] = init; - y.as_u32[1] = init; - y.as_u32[2] = init; - y.as_u32[3] = init; - init_x4 = y.as_u32x4; - } +#define PLY_X4_SPLAT_INIT(init_x4, init) \ +{ \ + u32x4_union_t y; \ + y.as_u32[0] = init; \ + y.as_u32[1] = init; \ + y.as_u32[2] = init; \ + y.as_u32[3] = init; \ + init_x4 = y.as_u32x4; \ +} #endif - for (l = p->leaves_as_u32x4; - l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4) - { - l[0] = init_x4; - l[1] = init_x4; - l[2] = init_x4; - l[3] = init_x4; - } - } +#ifdef CLIB_HAVE_VEC128 +#define PLY_INIT_LEAVES(p) \ +{ \ + u32x4 *l, init_x4; \ + \ + PLY_X4_SPLAT_INIT(init_x4, init); \ + for (l = p->leaves_as_u32x4; \ + l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \ + l += 4) \ + { \ + l[0] = init_x4; \ + l[1] = init_x4; \ + l[2] = init_x4; \ + l[3] = init_x4; \ + } \ +} #else - { - u32 *l; - - for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) - { - l[0] = init; - l[1] = init; - l[2] = init; - l[3] = init; - } - } +#define PLY_INIT_LEAVES(p) \ +{ \ + u32 *l; \ + \ + for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \ + { \ + l[0] = init; \ + l[1] = init; \ + l[2] = init; \ + l[3] = init; \ + } \ +} #endif + +#define PLY_INIT(p, init, prefix_len, ply_base_len) \ +{ \ + /* \ + * A leaf is 'empty' if it represents a leaf from the covering PLY \ + * i.e. if the prefix length of the leaf is less than or equal to \ + * the prefix length of the PLY \ + */ \ + p->n_non_empty_leafs = (prefix_len > ply_base_len ? \ + ARRAY_LEN (p->leaves) : 0); \ + memset (p->dst_address_bits_of_leaves, prefix_len, \ + sizeof (p->dst_address_bits_of_leaves)); \ + p->dst_address_bits_base = ply_base_len; \ + \ + /* Initialize leaves. */ \ + PLY_INIT_LEAVES(p); \ +} + +static void +ply_8_init (ip4_fib_mtrie_8_ply_t * p, + ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len) +{ + PLY_INIT (p, init, prefix_len, ply_base_len); +} + +static void +ply_16_init (ip4_fib_mtrie_16_ply_t * p, + ip4_fib_mtrie_leaf_t init, uword prefix_len) +{ + memset (p->dst_address_bits_of_leaves, prefix_len, + sizeof (p->dst_address_bits_of_leaves)); + PLY_INIT_LEAVES (p); } static ip4_fib_mtrie_leaf_t @@ -146,49 +175,43 @@ ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf, u32 leaf_prefix_len, u32 ply_base_len) { - ip4_fib_mtrie_ply_t *p; + ip4_fib_mtrie_8_ply_t *p; /* Get cache aligned ply. */ - pool_get_aligned (m->ply_pool, p, sizeof (p[0])); + pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES); - ply_init (p, init_leaf, leaf_prefix_len, ply_base_len); - return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool); + ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len); + return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool); } -always_inline ip4_fib_mtrie_ply_t * +always_inline ip4_fib_mtrie_8_ply_t * get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l) { uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l); - /* It better not be the root ply. */ - ASSERT (n != 0); - return pool_elt_at_index (m->ply_pool, n); + + return pool_elt_at_index (ip4_ply_pool, n); } -static void -ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) +void +ip4_mtrie_free (ip4_fib_mtrie_t * m) { - uword i, is_root; - - is_root = p - m->ply_pool == 0; - - for (i = 0; i < ARRAY_LEN (p->leaves); i++) + /* the root ply is embedded so the is nothing to do, + * the assumption being that the IP4 FIB table has emptied the trie + * before deletion. + */ +#if CLIB_DEBUG > 0 + int i; + for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++) { - ip4_fib_mtrie_leaf_t l = p->leaves[i]; - if (ip4_fib_mtrie_leaf_is_next_ply (l)) - ply_free (m, get_next_ply_for_leaf (m, l)); + ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i])); } - - if (is_root) - ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0, 0); - else - pool_put (m->ply_pool, p); +#endif } void -ip4_fib_free (ip4_fib_mtrie_t * m) +ip4_mtrie_init (ip4_fib_mtrie_t * m) { - ip4_fib_mtrie_ply_t *root_ply = pool_elt_at_index (m->ply_pool, 0); - ply_free (m, root_ply); + ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0); } typedef struct @@ -202,7 +225,7 @@ typedef struct static void set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, - ip4_fib_mtrie_ply_t * ply, + ip4_fib_mtrie_8_ply_t * ply, ip4_fib_mtrie_leaf_t new_leaf, uword new_leaf_dst_address_bits) { @@ -218,7 +241,8 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, /* Recurse into sub plies. */ if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf)) { - ip4_fib_mtrie_ply_t *sub_ply = get_next_ply_for_leaf (m, old_leaf); + ip4_fib_mtrie_8_ply_t *sub_ply = + get_next_ply_for_leaf (m, old_leaf); set_ply_with_more_specific_leaf (m, sub_ply, new_leaf, new_leaf_dst_address_bits); } @@ -237,16 +261,20 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, static void set_leaf (ip4_fib_mtrie_t * m, - ip4_fib_mtrie_set_unset_leaf_args_t * a, + const ip4_fib_mtrie_set_unset_leaf_args_t * a, u32 old_ply_index, u32 dst_address_byte_index) { ip4_fib_mtrie_leaf_t old_leaf, new_leaf; i32 n_dst_bits_next_plies; u8 dst_byte; + ip4_fib_mtrie_8_ply_t *old_ply; + + old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index); ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); + /* how many bits of the destination address are in the next PLY */ n_dst_bits_next_plies = a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1); @@ -255,30 +283,36 @@ set_leaf (ip4_fib_mtrie_t * m, /* Number of bits next plies <= 0 => insert leaves this ply. */ if (n_dst_bits_next_plies <= 0) { + /* The mask length of the address to insert maps to this ply */ uword i, n_dst_bits_this_ply, old_leaf_is_terminal; + /* The number of bits, and hence slots/buckets, we will fill */ n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies); ASSERT ((a->dst_address.as_u8[dst_address_byte_index] & pow2_mask (n_dst_bits_this_ply)) == 0); + /* Starting at the value of the byte at this section of the v4 address + * fill the buckets/slots of the ply */ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++) { - ip4_fib_mtrie_ply_t *old_ply, *new_ply; - - old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + ip4_fib_mtrie_8_ply_t *new_ply; old_leaf = old_ply->leaves[i]; old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); - /* Is leaf to be inserted more specific? */ if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i]) { + /* The new leaf is more or equally specific than the one currently + * occupying the slot */ new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); if (old_leaf_is_terminal) { + /* The current leaf is terminal, we can replace it with + * the new one */ old_ply->n_non_empty_leafs -= ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); + old_ply->dst_address_bits_of_leaves[i] = a->dst_address_length; __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf, @@ -292,32 +326,42 @@ set_leaf (ip4_fib_mtrie_t * m, } else { - /* Existing leaf points to another ply. We need to place new_leaf into all - more specific slots. */ + /* Existing leaf points to another ply. We need to place + * new_leaf into all more specific slots. */ new_ply = get_next_ply_for_leaf (m, old_leaf); set_ply_with_more_specific_leaf (m, new_ply, new_leaf, a->dst_address_length); } } - else if (!old_leaf_is_terminal) { + /* The current leaf is less specific and not termial (i.e. a ply), + * recurse on down the trie */ new_ply = get_next_ply_for_leaf (m, old_leaf); - set_leaf (m, a, new_ply - m->ply_pool, + set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1); } + /* + * else + * the route we are adding is less specific than the leaf currently + * occupying this slot. leave it there + */ } } else { - ip4_fib_mtrie_ply_t *old_ply, *new_ply; + /* The address to insert requires us to move down at a lower level of + * the trie - recurse on down */ + ip4_fib_mtrie_8_ply_t *new_ply; u8 ply_base_len; ply_base_len = 8 * (dst_address_byte_index + 1); - old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + old_leaf = old_ply->leaves[dst_byte]; + if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) { + /* There is a leaf occupying the slot. Replace it with a new ply */ old_ply->n_non_empty_leafs -= ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte); @@ -328,28 +372,143 @@ set_leaf (ip4_fib_mtrie_t * m, new_ply = get_next_ply_for_leaf (m, new_leaf); /* Refetch since ply_create may move pool. */ - old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); + old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index); __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, new_leaf); ASSERT (old_ply->leaves[dst_byte] == new_leaf); old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len; - /* Account for the ply we just created. */ - old_ply->n_non_empty_leafs += 1; + old_ply->n_non_empty_leafs += + ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte); ASSERT (old_ply->n_non_empty_leafs >= 0); } else new_ply = get_next_ply_for_leaf (m, old_leaf); - set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1); + set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1); + } +} + +static void +set_root_leaf (ip4_fib_mtrie_t * m, + const ip4_fib_mtrie_set_unset_leaf_args_t * a) +{ + ip4_fib_mtrie_leaf_t old_leaf, new_leaf; + ip4_fib_mtrie_16_ply_t *old_ply; + i32 n_dst_bits_next_plies; + u16 dst_byte; + + old_ply = &m->root_ply; + + ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); + + /* how many bits of the destination address are in the next PLY */ + n_dst_bits_next_plies = a->dst_address_length - BITS (u16); + + dst_byte = a->dst_address.as_u16[0]; + + /* Number of bits next plies <= 0 => insert leaves this ply. */ + if (n_dst_bits_next_plies <= 0) + { + /* The mask length of the address to insert maps to this ply */ + uword i, n_dst_bits_this_ply, old_leaf_is_terminal; + + /* The number of bits, and hence slots/buckets, we will fill */ + n_dst_bits_this_ply = 16 - a->dst_address_length; + ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) & + pow2_mask (n_dst_bits_this_ply)) == 0); + + /* Starting at the value of the byte at this section of the v4 address + * fill the buckets/slots of the ply */ + for (i = 0; i < (1 << n_dst_bits_this_ply); i++) + { + ip4_fib_mtrie_8_ply_t *new_ply; + u16 slot; + + slot = clib_net_to_host_u16 (dst_byte); + slot += i; + slot = clib_host_to_net_u16 (slot); + + old_leaf = old_ply->leaves[slot]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + if (a->dst_address_length >= + old_ply->dst_address_bits_of_leaves[slot]) + { + /* The new leaf is more or equally specific than the one currently + * occupying the slot */ + new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + if (old_leaf_is_terminal) + { + /* The current leaf is terminal, we can replace it with + * the new one */ + old_ply->dst_address_bits_of_leaves[slot] = + a->dst_address_length; + __sync_val_compare_and_swap (&old_ply->leaves[slot], + old_leaf, new_leaf); + ASSERT (old_ply->leaves[slot] == new_leaf); + } + else + { + /* Existing leaf points to another ply. We need to place + * new_leaf into all more specific slots. */ + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_ply_with_more_specific_leaf (m, new_ply, new_leaf, + a->dst_address_length); + } + } + else if (!old_leaf_is_terminal) + { + /* The current leaf is less specific and not termial (i.e. a ply), + * recurse on down the trie */ + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_leaf (m, a, new_ply - ip4_ply_pool, 2); + } + /* + * else + * the route we are adding is less specific than the leaf currently + * occupying this slot. leave it there + */ + } + } + else + { + /* The address to insert requires us to move down at a lower level of + * the trie - recurse on down */ + ip4_fib_mtrie_8_ply_t *new_ply; + u8 ply_base_len; + + ply_base_len = 16; + + old_leaf = old_ply->leaves[dst_byte]; + + if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) + { + /* There is a leaf occupying the slot. Replace it with a new ply */ + new_leaf = ply_create (m, old_leaf, + clib_max (old_ply->dst_address_bits_of_leaves + [dst_byte], ply_base_len), + ply_base_len); + new_ply = get_next_ply_for_leaf (m, new_leaf); + + __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, + new_leaf); + ASSERT (old_ply->leaves[dst_byte] == new_leaf); + old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len; + } + else + new_ply = get_next_ply_for_leaf (m, old_leaf); + + set_leaf (m, a, new_ply - ip4_ply_pool, 2); } } static uword unset_leaf (ip4_fib_mtrie_t * m, - ip4_fib_mtrie_set_unset_leaf_args_t * a, - ip4_fib_mtrie_ply_t * old_ply, u32 dst_address_byte_index) + const ip4_fib_mtrie_set_unset_leaf_args_t * a, + ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index) { ip4_fib_mtrie_leaf_t old_leaf, del_leaf; i32 n_dst_bits_next_plies; @@ -397,7 +556,7 @@ unset_leaf (ip4_fib_mtrie_t * m, ASSERT (old_ply->n_non_empty_leafs >= 0); if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0) { - pool_put (m->ply_pool, old_ply); + pool_put (ip4_ply_pool, old_ply); /* Old ply was deleted. */ return 1; } @@ -419,106 +578,120 @@ unset_leaf (ip4_fib_mtrie_t * m, return 0; } -void -ip4_mtrie_init (ip4_fib_mtrie_t * m) +static void +unset_root_leaf (ip4_fib_mtrie_t * m, + const ip4_fib_mtrie_set_unset_leaf_args_t * a) { - ip4_fib_mtrie_leaf_t root; - memset (m, 0, sizeof (m[0])); - root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, 0, 0); - ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0); -} + ip4_fib_mtrie_leaf_t old_leaf, del_leaf; + i32 n_dst_bits_next_plies; + i32 i, n_dst_bits_this_ply, old_leaf_is_terminal; + u16 dst_byte; + ip4_fib_mtrie_16_ply_t *old_ply; -void -ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, - ip4_address_t dst_address, - u32 dst_address_length, - u32 adj_index, u32 is_del) -{ - ip4_fib_mtrie_t *m = &fib->mtrie; - ip4_fib_mtrie_ply_t *root_ply; - ip4_fib_mtrie_set_unset_leaf_args_t a; - ip4_main_t *im = &ip4_main; + ASSERT (a->dst_address_length >= 0 && a->dst_address_length <= 32); - ASSERT (m->ply_pool != 0); + old_ply = &m->root_ply; + n_dst_bits_next_plies = a->dst_address_length - BITS (u16); - root_ply = pool_elt_at_index (m->ply_pool, 0); + dst_byte = a->dst_address.as_u16[0]; - /* Honor dst_address_length. Fib masks are in network byte order */ - dst_address.as_u32 &= im->fib_masks[dst_address_length]; - a.dst_address = dst_address; - a.dst_address_length = dst_address_length; - a.adj_index = adj_index; + n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ? + (16 - a->dst_address_length) : 0); - if (!is_del) - { - set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0); - } - else + del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + /* Starting at the value of the byte at this section of the v4 address + * fill the buckets/slots of the ply */ + for (i = 0; i < (1 << n_dst_bits_this_ply); i++) { - ip4_main_t *im = &ip4_main; + u16 slot; + + slot = clib_net_to_host_u16 (dst_byte); + slot += i; + slot = clib_host_to_net_u16 (slot); - if (dst_address_length) + old_leaf = old_ply->leaves[slot]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + if (old_leaf == del_leaf + || (!old_leaf_is_terminal + && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2))) { - word i; + old_ply->leaves[slot] = + ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index); + old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length; + } + } +} - /* If the ply was not deleted, then we need to fill the - * bucket just reset will the leaf from the less specfic - * cover. - * Find next less specific route and insert into mtrie. */ - for (i = dst_address_length - 1; i >= 0; i--) - { - uword *p; - index_t lbi; - ip4_address_t key; +void +ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, u32 adj_index) +{ + ip4_fib_mtrie_set_unset_leaf_args_t a; + ip4_main_t *im = &ip4_main; - if (!fib->fib_entry_by_dst_address[i]) - continue; + /* Honor dst_address_length. Fib masks are in network byte order */ + a.dst_address.as_u32 = (dst_address->as_u32 & + im->fib_masks[dst_address_length]); + a.dst_address_length = dst_address_length; + a.adj_index = adj_index; - key.as_u32 = dst_address.as_u32 & im->fib_masks[i]; - p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32); - if (p) - { - lbi = fib_entry_contribute_ip_forwarding (p[0])->dpoi_index; - if (INDEX_INVALID == lbi) - continue; + set_root_leaf (m, &a); +} - a.cover_adj_index = lbi; - a.cover_address_length = i; +void +ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, + u32 adj_index, + u32 cover_address_length, u32 cover_adj_index) +{ + ip4_fib_mtrie_set_unset_leaf_args_t a; + ip4_main_t *im = &ip4_main; - break; - } - } - } - else - { - a.cover_adj_index = 0; - a.cover_address_length = 0; - } + /* Honor dst_address_length. Fib masks are in network byte order */ + a.dst_address.as_u32 = (dst_address->as_u32 & + im->fib_masks[dst_address_length]); + a.dst_address_length = dst_address_length; + a.adj_index = adj_index; + a.cover_adj_index = cover_adj_index; + a.cover_address_length = cover_address_length; - /* the top level ply is never removed, so we can ignore the return code */ - unset_leaf (m, &a, root_ply, 0); - } + /* the top level ply is never removed */ + unset_root_leaf (m, &a); } /* Returns number of bytes of memory used by mtrie. */ static uword -mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) +mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p) { uword bytes, i; - if (!p) - { - if (pool_is_free_index (m->ply_pool, 0)) - return 0; - p = pool_elt_at_index (m->ply_pool, 0); - } - bytes = sizeof (p[0]); for (i = 0; i < ARRAY_LEN (p->leaves); i++) { ip4_fib_mtrie_leaf_t l = p->leaves[i]; if (ip4_fib_mtrie_leaf_is_next_ply (l)) - bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l)); + bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l)); + } + + return bytes; +} + +/* Returns number of bytes of memory used by mtrie. */ +static uword +mtrie_memory_usage (ip4_fib_mtrie_t * m) +{ + uword bytes, i; + + bytes = sizeof (*m); + for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++) + { + ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i]; + if (ip4_fib_mtrie_leaf_is_next_ply (l)) + bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l)); } return bytes; @@ -536,47 +709,49 @@ format_ip4_fib_mtrie_leaf (u8 * s, va_list * va) return s; } +#define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent) \ +({ \ + u32 a, ia_length; \ + ip4_address_t ia; \ + ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)]; \ + \ + a = (_base_address) + ((_i) << (32 - (_ply_max_len))); \ + ia.as_u32 = clib_host_to_net_u32 (a); \ + ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \ + s = format (s, "\n%U%20U %U", \ + format_white_space, (_indent) + 2, \ + format_ip4_address_and_length, &ia, ia_length, \ + format_ip4_fib_mtrie_leaf, _l); \ + \ + if (ip4_fib_mtrie_leaf_is_next_ply (_l)) \ + s = format (s, "\n%U%U", \ + format_white_space, (_indent) + 2, \ + format_ip4_fib_mtrie_ply, m, a, \ + ip4_fib_mtrie_leaf_get_next_ply_index (_l)); \ + s; \ +}) + static u8 * format_ip4_fib_mtrie_ply (u8 * s, va_list * va) { ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *); u32 base_address = va_arg (*va, u32); u32 ply_index = va_arg (*va, u32); - u32 dst_address_byte_index = va_arg (*va, u32); - ip4_fib_mtrie_ply_t *p; - uword i, indent; + ip4_fib_mtrie_8_ply_t *p; + uword indent; + int i; - p = pool_elt_at_index (m->ply_pool, ply_index); + p = pool_elt_at_index (ip4_ply_pool, ply_index); indent = format_get_indent (s); - s = - format (s, "ply index %d, %d non-empty leaves", ply_index, - p->n_non_empty_leafs); + s = format (s, "ply index %d, %d non-empty leaves", ply_index, + p->n_non_empty_leafs); + for (i = 0; i < ARRAY_LEN (p->leaves); i++) { - ip4_fib_mtrie_leaf_t l = p->leaves[i]; - if (ip4_fib_mtrie_leaf_is_non_empty (p, i)) { - u32 a, ia_length; - ip4_address_t ia; - - a = base_address + (i << (24 - 8 * dst_address_byte_index)); - ia.as_u32 = clib_host_to_net_u32 (a); - if (ip4_fib_mtrie_leaf_is_terminal (l)) - ia_length = p->dst_address_bits_of_leaves[i]; - else - ia_length = 8 * (1 + dst_address_byte_index); - s = format (s, "\n%U%20U %U", - format_white_space, indent + 2, - format_ip4_address_and_length, &ia, ia_length, - format_ip4_fib_mtrie_leaf, l); - - if (ip4_fib_mtrie_leaf_is_next_ply (l)) - s = format (s, "\n%U%U", - format_white_space, indent + 2, - format_ip4_fib_mtrie_ply, m, a, - ip4_fib_mtrie_leaf_get_next_ply_index (l), - dst_address_byte_index + 1); + FORMAT_PLY (s, p, i, base_address, + p->dst_address_bits_base + 8, indent); } } @@ -587,22 +762,44 @@ u8 * format_ip4_fib_mtrie (u8 * s, va_list * va) { ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *); + ip4_fib_mtrie_16_ply_t *p; + u32 base_address = 0; + int i; - s = format (s, "%d plies, memory usage %U", - pool_elts (m->ply_pool), - format_memory_size, mtrie_memory_usage (m, 0)); + s = format (s, "%d plies, memory usage %U\n", + pool_elts (ip4_ply_pool), + format_memory_size, mtrie_memory_usage (m)); + s = format (s, "root-ply"); + p = &m->root_ply; - if (pool_elts (m->ply_pool) > 0) + for (i = 0; i < ARRAY_LEN (p->leaves); i++) { - ip4_address_t base_address; - base_address.as_u32 = 0; - s = - format (s, "\n %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0); + u16 slot; + + slot = clib_host_to_net_u16 (i); + + if (p->dst_address_bits_of_leaves[slot] > 0) + { + FORMAT_PLY (s, p, slot, base_address, 16, 2); + } } return s; } +static clib_error_t * +ip4_mtrie_module_init (vlib_main_t * vm) +{ + /* Burn one ply so index 0 is taken */ + CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p); + + pool_get (ip4_ply_pool, p); + + return (NULL); +} + +VLIB_INIT_FUNCTION (ip4_mtrie_module_init); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h index 128195d3..be262c2c 100644 --- a/src/vnet/ip/ip4_mtrie.h +++ b/src/vnet/ip/ip4_mtrie.h @@ -47,16 +47,43 @@ /* ip4 fib leafs: 4 ply 8-8-8-8 mtrie. 1 + 2*adj_index for terminal leaves. - 0 + 2*next_ply_index for non-terminals. + 0 + 2*next_ply_index for non-terminals, i.e. PLYs 1 => empty (adjacency index of zero is special miss adjacency). */ typedef u32 ip4_fib_mtrie_leaf_t; #define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0) +/** + * @brief the 16 way stride that is the top PLY of the mtrie + * We do not maintain the count of 'real' leaves in this PLY, since + * it is never removed. The FIB will destroy the mtrie and the ply once + * the FIB is destroyed. + */ +#define PLY_16_SIZE (1<<16) +typedef struct ip4_fib_mtrie_16_ply_t_ +{ + /** + * The leaves/slots/buckets to be filed with leafs + */ + union + { + ip4_fib_mtrie_leaf_t leaves[PLY_16_SIZE]; + +#ifdef CLIB_HAVE_VEC128 + u32x4 leaves_as_u32x4[PLY_16_SIZE / 4]; +#endif + }; + + /** + * Prefix length for terminal leaves. + */ + u8 dst_address_bits_of_leaves[PLY_16_SIZE]; +} ip4_fib_mtrie_16_ply_t; + /** * @brief One ply of the 4 ply mtrie fib. */ -typedef struct +typedef struct ip4_fib_mtrie_8_ply_t_ { /** * The leaves/slots/buckets to be filed with leafs @@ -90,34 +117,72 @@ typedef struct /* Pad to cache line boundary. */ u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)]; } -ip4_fib_mtrie_ply_t; +ip4_fib_mtrie_8_ply_t; -STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_ply_t) % CLIB_CACHE_LINE_BYTES, +STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES, "IP4 Mtrie ply cache line"); +/** + * @brief The mutiway-TRIE. + * There is no data associated with the mtrie apart from the top PLY + */ typedef struct { - /* Pool of plies. Index zero is root ply. */ - ip4_fib_mtrie_ply_t *ply_pool; + /** + * Embed the PLY with the mtrie struct. This means that the Data-plane + * 'get me the mtrie' returns the first ply, and not an indirect 'pointer' + * to it. therefore no cachline misses in the data-path. + */ + ip4_fib_mtrie_16_ply_t root_ply; } ip4_fib_mtrie_t; -void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m); +/** + * @brief Initialise an mtrie + */ +void ip4_mtrie_init (ip4_fib_mtrie_t * m); -struct ip4_fib_t; +/** + * @brief Free an mtrie, It must be emty when free'd + */ +void ip4_mtrie_free (ip4_fib_mtrie_t * m); -void ip4_fib_mtrie_add_del_route (struct ip4_fib_t *f, - ip4_address_t dst_address, - u32 dst_address_length, - u32 adj_index, u32 is_del); +/** + * @brief Add a route/rntry to the mtrie + */ +void ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, u32 adj_index); +/** + * @brief remove a route/rntry to the mtrie + */ +void ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, + u32 adj_index, + u32 cover_address_length, u32 cover_adj_index); +/** + * @brief Format/display the contents of the mtrie + */ format_function_t format_ip4_fib_mtrie; +/** + * @brief A global pool of 8bit stride plys + */ +extern ip4_fib_mtrie_8_ply_t *ip4_ply_pool; + +/** + * Is the leaf terminal (i.e. an LB index) or non-terminak (i.e. a PLY index) + */ always_inline u32 ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n) { return n & 1; } +/** + * From the stored slot value extract the LB index value + */ always_inline u32 ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n) { @@ -125,35 +190,38 @@ ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n) return n >> 1; } -/* Lookup step. Processes 1 byte of 4 byte ip4 address. */ +/** + * @brief Lookup step. Processes 1 byte of 4 byte ip4 address. + */ always_inline ip4_fib_mtrie_leaf_t ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t current_leaf, const ip4_address_t * dst_address, u32 dst_address_byte_index) { - ip4_fib_mtrie_ply_t *ply; + ip4_fib_mtrie_8_ply_t *ply; + uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf); if (!current_is_terminal) { - ply = m->ply_pool + (current_leaf >> 1); + ply = ip4_ply_pool + (current_leaf >> 1); return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]); } return current_leaf; } -/* Lookup step. Processes 1 byte of 4 byte ip4 address. */ +/** + * @brief Lookup step number 1. Processes 2 bytes of 4 byte ip4 address. + */ always_inline ip4_fib_mtrie_leaf_t ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m, const ip4_address_t * dst_address) { ip4_fib_mtrie_leaf_t next_leaf; - ip4_fib_mtrie_ply_t *ply; - ply = m->ply_pool; - next_leaf = ply->leaves[dst_address->as_u8[0]]; + next_leaf = m->root_ply.leaves[dst_address->as_u16[0]]; return next_leaf; } diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h index b2c1fcd4..1ff9fbdb 100644 --- a/src/vnet/ip/ip4_packet.h +++ b/src/vnet/ip/ip4_packet.h @@ -52,6 +52,7 @@ typedef union u32 data_u32; /* Aliases. */ u8 as_u8[4]; + u16 as_u16[2]; u32 as_u32; } ip4_address_t; diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c index 7c2b7be8..6831066e 100644 --- a/src/vnet/ip/ip4_source_check.c +++ b/src/vnet/ip/ip4_source_check.c @@ -165,11 +165,6 @@ ip4_source_check_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - leaf1 = - ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); leaf1 = @@ -248,9 +243,6 @@ ip4_source_check_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index 8fa9a479..bf7ec7d5 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -153,6 +153,9 @@ typedef struct ip6_main_t /* Pool of FIBs. */ struct fib_table_t_ *fibs; + /* Pool of V6 FIBs. */ + ip6_fib_t *v6_fibs; + /** Vector of MFIBs. */ struct mfib_table_t_ *mfibs; diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index e3a1fee8..b9f1782b 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -240,6 +240,21 @@ send_ip_fib_details (vpe_api_main_t * am, vl_msg_api_send_shmem (q, (u8 *) & mp); } +typedef struct vl_api_ip_fib_dump_walk_ctx_t_ +{ + fib_node_index_t *feis; +} vl_api_ip_fib_dump_walk_ctx_t; + +static int +vl_api_ip_fib_dump_walk (fib_node_index_t fei, void *arg) +{ + vl_api_ip_fib_dump_walk_ctx_t *ctx = arg; + + vec_add1 (ctx->feis, fei); + + return (1); +} + static void vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) { @@ -247,12 +262,13 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) unix_shared_memory_queue_t *q; ip4_main_t *im = &ip4_main; fib_table_t *fib_table; - fib_node_index_t lfei, *lfeip, *lfeis = NULL; - mpls_label_t key; + fib_node_index_t *lfeip; fib_prefix_t pfx; u32 fib_index; fib_route_path_encode_t *api_rpaths; - int i; + vl_api_ip_fib_dump_walk_ctx_t ctx = { + .feis = NULL, + }; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) @@ -261,19 +277,16 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) /* *INDENT-OFF* */ pool_foreach (fib_table, im->fibs, ({ - for (i = 0; i < ARRAY_LEN (fib_table->v4.fib_entry_by_dst_address); i++) - { - hash_foreach(key, lfei, fib_table->v4.fib_entry_by_dst_address[i], - ({ - vec_add1(lfeis, lfei); - })); - } + fib_table_walk(fib_table->ft_index, + FIB_PROTOCOL_IP4, + vl_api_ip_fib_dump_walk, + &ctx); })); /* *INDENT-ON* */ - vec_sort_with_function (lfeis, fib_entry_cmp_for_sort); + vec_sort_with_function (ctx.feis, fib_entry_cmp_for_sort); - vec_foreach (lfeip, lfeis) + vec_foreach (lfeip, ctx.feis) { fib_entry_get_prefix (*lfeip, &pfx); fib_index = fib_entry_get_fib_index (*lfeip); @@ -286,7 +299,7 @@ vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) vec_free (api_rpaths); } - vec_free (lfeis); + vec_free (ctx.feis); } static void @@ -377,10 +390,10 @@ api_ip6_fib_table_get_all (unix_shared_memory_queue_t * q, { vpe_api_main_t *am = &vpe_api_main; ip6_main_t *im6 = &ip6_main; - ip6_fib_t *fib = &fib_table->v6; fib_node_index_t *fib_entry_index; api_ip6_fib_show_ctx_t ctx = { - .fib_index = fib->index,.entries = NULL, + .fib_index = fib_table->ft_index, + .entries = NULL, }; fib_route_path_encode_t *api_rpaths; fib_prefix_t pfx; diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c index f631dc76..a085aaa2 100644 --- a/src/vnet/mpls/interface.c +++ b/src/vnet/mpls/interface.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h index 300f2cfd..b0125e60 100644 --- a/src/vnet/mpls/mpls.h +++ b/src/vnet/mpls/mpls.h @@ -30,29 +30,6 @@ typedef enum { MPLS_N_ERROR, } mpls_error_t; -#define MPLS_FIB_DEFAULT_TABLE_ID 0 - -/** - * Type exposure is to allow the DP fast/inlined access - */ -#define MPLS_FIB_KEY_SIZE 21 -#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1)) - -typedef struct mpls_fib_t_ -{ - /** - * A hash table of entries. 21 bit key - * Hash table for reduced memory footprint - */ - uword * mf_entries; - - /** - * The load-balance indeices keyed by 21 bit label+eos bit. - * A flat array for maximum lookup performace. - */ - index_t mf_lbs[MPLS_FIB_DB_SIZE]; -} mpls_fib_t; - /** * @brief Definition of a callback for receiving MPLS interface state change * notifications @@ -67,6 +44,9 @@ typedef struct { /** A pool of all the MPLS FIBs */ struct fib_table_t_ *fibs; + /** A pool of all the MPLS FIBs */ + struct mpls_fib_t_ *mpls_fibs; + /** A hash table to lookup the mpls_fib by table ID */ uword *fib_index_by_table_id; diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index a36a5046..f1aef6c9 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -369,6 +370,21 @@ send_mpls_fib_details (vpe_api_main_t * am, vl_msg_api_send_shmem (q, (u8 *) & mp); } +typedef struct vl_api_mpls_fib_dump_table_walk_ctx_t_ +{ + fib_node_index_t *lfeis; +} vl_api_mpls_fib_dump_table_walk_ctx_t; + +static int +vl_api_mpls_fib_dump_table_walk (fib_node_index_t fei, void *arg) +{ + vl_api_mpls_fib_dump_table_walk_ctx_t *ctx = arg; + + vec_add1 (ctx->lfeis, fei); + + return (1); +} + static void vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) { @@ -376,28 +392,30 @@ vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) unix_shared_memory_queue_t *q; mpls_main_t *mm = &mpls_main; fib_table_t *fib_table; - fib_node_index_t lfei, *lfeip, *lfeis = NULL; - mpls_label_t key; + mpls_fib_t *mpls_fib; + fib_node_index_t *lfeip = NULL; fib_prefix_t pfx; u32 fib_index; fib_route_path_encode_t *api_rpaths; + vl_api_mpls_fib_dump_table_walk_ctx_t ctx = { + .lfeis = NULL, + }; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) return; /* *INDENT-OFF* */ - pool_foreach (fib_table, mm->fibs, + pool_foreach (mpls_fib, mm->mpls_fibs, ({ - hash_foreach(key, lfei, fib_table->mpls.mf_entries, - ({ - vec_add1(lfeis, lfei); - })); + mpls_fib_table_walk (mpls_fib, + vl_api_mpls_fib_dump_table_walk, + &ctx); })); /* *INDENT-ON* */ - vec_sort_with_function (lfeis, fib_entry_cmp_for_sort); + vec_sort_with_function (ctx.lfeis, fib_entry_cmp_for_sort); - vec_foreach (lfeip, lfeis) + vec_foreach (lfeip, ctx.lfeis) { fib_entry_get_prefix (*lfeip, &pfx); fib_index = fib_entry_get_fib_index (*lfeip); @@ -410,7 +428,7 @@ vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) vec_free (api_rpaths); } - vec_free (lfeis); + vec_free (ctx.lfeis); } /* diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 14ccd864..09ae8b8f 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -896,9 +896,10 @@ ip4_reset_fib_t_handler (vl_api_reset_fib_t * mp) /* *INDENT-OFF* */ pool_foreach (fib_table, im4->fibs, ({ - fib = &fib_table->v4; vnet_sw_interface_t * si; + fib = pool_elt_at_index (im4->v4_fibs, fib_table->ft_index); + if (fib->table_id != target_fib_id) continue; @@ -964,7 +965,8 @@ ip6_reset_fib_t_handler (vl_api_reset_fib_t * mp) pool_foreach (fib_table, im6->fibs, ({ vnet_sw_interface_t * si; - fib = &(fib_table->v6); + + fib = pool_elt_at_index (im6->v6_fibs, fib_table->ft_index); if (fib->table_id != target_fib_id) continue; diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c index 1927da0b..042d02e2 100644 --- a/src/vpp/stats/stats.c +++ b/src/vpp/stats/stats.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #define STATS_DEBUG 0 @@ -576,6 +577,7 @@ do_ip4_fibs (stats_main_t * sm) static ip4_route_t *routes; ip4_route_t *r; fib_table_t *fib; + ip4_fib_t *v4_fib; ip_lookup_main_t *lm = &im4->lookup_main; static uword *results; vl_api_vnet_ip4_fib_counters_t *mp = 0; @@ -592,6 +594,8 @@ again: while ((fib - im4->fibs) < start_at_fib_index) continue; + v4_fib = pool_elt_at_index (im4->v4_fibs, fib->ft_index); + if (mp == 0) { items_this_message = IP4_FIB_COUNTER_BATCH_SIZE; @@ -615,9 +619,9 @@ again: vec_reset_length (routes); vec_reset_length (results); - for (i = 0; i < ARRAY_LEN (fib->v4.fib_entry_by_dst_address); i++) + for (i = 0; i < ARRAY_LEN (v4_fib->fib_entry_by_dst_address); i++) { - uword *hash = fib->v4.fib_entry_by_dst_address[i]; + uword *hash = v4_fib->fib_entry_by_dst_address[i]; hash_pair_t *p; ip4_route_t x; -- cgit 1.2.3-korg From f9f4065354d665c9b23931afbc23106549cf1d16 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Sat, 1 Apr 2017 03:55:08 +0300 Subject: ARP/ND/API:fix arp reg + nd no identical regs fixed arp registration without allocating the event added protection against identical ND registrations Change-Id: I4e2db4913b35d895d8686ada1f0818920f276ad6 Signed-off-by: Eyal Bari --- src/vnet/ip/ip6_neighbor.c | 93 +++++++++++++++++++--------------------------- src/vpp/api/api.c | 31 +++++++++++----- 2 files changed, 59 insertions(+), 65 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 3d48c402..b1a03546 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -3910,76 +3910,59 @@ vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm, { ip6_neighbor_main_t *nm = &ip6_neighbor_main; ip6_address_t *address = address_arg; - uword *p; + + /* Try to find an existing entry */ + u32 *first = (u32 *) mhash_get (&nm->mac_changes_by_address, address); + u32 *p = first; pending_resolution_t *mc; - void (*fp) (u32, u8 *) = data_callback; + while (p && *p != ~0) + { + mc = pool_elt_at_index (nm->mac_changes, *p); + if (mc->node_index == node_index && mc->type_opaque == type_opaque + && mc->pid == pid) + break; + p = &mc->next_index; + } + int found = p && *p != ~0; if (is_add) { - pool_get (nm->mac_changes, mc); + if (found) + return VNET_API_ERROR_ENTRY_ALREADY_EXISTS; - mc->next_index = ~0; - mc->node_index = node_index; - mc->type_opaque = type_opaque; - mc->data = data; - mc->data_callback = data_callback; - mc->pid = pid; + pool_get (nm->mac_changes, mc); + *mc = (pending_resolution_t) + { + .next_index = ~0,.node_index = node_index,.type_opaque = + type_opaque,.data = data,.data_callback = data_callback,.pid = + pid,}; - p = mhash_get (&nm->mac_changes_by_address, address); + /* Insert new resolution at the end of the list */ + u32 new_idx = mc - nm->mac_changes; if (p) - { - /* Insert new resolution at the head of the list */ - mc->next_index = p[0]; - mhash_unset (&nm->mac_changes_by_address, address, 0); - } - - mhash_set (&nm->mac_changes_by_address, address, - mc - nm->mac_changes, 0); - return 0; + p[0] = new_idx; + else + mhash_set (&nm->mac_changes_by_address, address, new_idx, 0); } else { - u32 index; - pending_resolution_t *mc_last = 0; - - p = mhash_get (&nm->mac_changes_by_address, address); - if (p == 0) + if (!found) return VNET_API_ERROR_NO_SUCH_ENTRY; - index = p[0]; + /* Clients may need to clean up pool entries, too */ + void (*fp) (u32, u8 *) = data_callback; + if (fp) + (*fp) (mc->data, 0 /* no new mac addrs */ ); - while (index != (u32) ~ 0) - { - mc = pool_elt_at_index (nm->mac_changes, index); - if (mc->node_index == node_index && - mc->type_opaque == type_opaque && mc->pid == pid) - { - /* Clients may need to clean up pool entries, too */ - if (fp) - (*fp) (mc->data, 0 /* no new mac addrs */ ); - if (index == p[0]) - { - mhash_unset (&nm->mac_changes_by_address, address, 0); - if (mc->next_index != ~0) - mhash_set (&nm->mac_changes_by_address, address, - mc->next_index, 0); - pool_put (nm->mac_changes, mc); - return 0; - } - else - { - ASSERT (mc_last); - mc_last->next_index = mc->next_index; - pool_put (nm->mac_changes, mc); - return 0; - } - } - mc_last = mc; - index = mc->next_index; - } + /* Remove the entry from the list and delete the entry */ + *p = mc->next_index; + pool_put (nm->mac_changes, mc); - return VNET_API_ERROR_NO_SUCH_ENTRY; + /* Remove from hash if we deleted the last entry */ + if (*p == ~0 && p == first) + mhash_unset (&nm->mac_changes_by_address, address, 0); } + return 0; } int diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 09ae8b8f..22410fcc 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -199,7 +199,7 @@ int ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp); int ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp); -void +static void handle_ip4_arp_event (u32 pool_index) { vpe_api_main_t *vam = &vpe_api_main; @@ -1571,11 +1571,12 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) vpe_api_main_t *am = &vpe_api_main; vnet_main_t *vnm = vnet_get_main (); vl_api_want_ip4_arp_events_reply_t *rmp; - vl_api_ip4_arp_event_t *event; int rv; if (mp->enable_disable) { + vl_api_ip4_arp_event_t *event; + pool_get (am->arp_events, event); rv = vnet_add_del_ip4_arp_change_event (vnm, arp_change_data_callback, mp->pid, &mp->address /* addr, in net byte order */ , @@ -1583,8 +1584,10 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) IP4_ARP_EVENT, event - am->arp_events, 1 /* is_add */ ); if (rv) - goto out; - pool_get (am->arp_events, event); + { + pool_put (am->arp_events, event); + goto out; + } memset (event, 0, sizeof (*event)); event->_vl_msg_id = ntohs (VL_API_IP4_ARP_EVENT); @@ -1613,12 +1616,24 @@ vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp) vpe_api_main_t *am = &vpe_api_main; vnet_main_t *vnm = vnet_get_main (); vl_api_want_ip6_nd_events_reply_t *rmp; - vl_api_ip6_nd_event_t *event; int rv; if (mp->enable_disable) { + vl_api_ip6_nd_event_t *event; pool_get (am->nd_events, event); + + rv = vnet_add_del_ip6_nd_change_event + (vnm, nd_change_data_callback, + mp->pid, mp->address /* addr, in net byte order */ , + vpe_resolver_process_node.index, + IP6_ND_EVENT, event - am->nd_events, 1 /* is_add */ ); + + if (rv) + { + pool_put (am->nd_events, event); + goto out; + } memset (event, 0, sizeof (*event)); event->_vl_msg_id = ntohs (VL_API_IP6_ND_EVENT); @@ -1629,11 +1644,6 @@ vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp) if (ip6_address_is_zero ((ip6_address_t *) mp->address)) event->mac_ip = 1; - rv = vnet_add_del_ip6_nd_change_event - (vnm, nd_change_data_callback, - mp->pid, mp->address /* addr, in net byte order */ , - vpe_resolver_process_node.index, - IP6_ND_EVENT, event - am->nd_events, 1 /* is_add */ ); } else { @@ -1643,6 +1653,7 @@ vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp) vpe_resolver_process_node.index, IP6_ND_EVENT, ~0 /* pool index */ , 0 /* is_add */ ); } +out: REPLY_MACRO (VL_API_WANT_IP6_ND_EVENTS_REPLY); } -- cgit 1.2.3-korg From 8328534a2734b64e5da791007fd346c54cdb4d32 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Tue, 4 Apr 2017 04:23:52 +0300 Subject: VXLAN:fix api dump flipped src<->dst Change-Id: I48bf6c46d78773669e76dc4749be3b1af80782f4 Signed-off-by: Eyal Bari --- src/vpp/api/custom_dump.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 1e841eff..fd4f7fef 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -1384,8 +1384,8 @@ static void *vl_api_vxlan_add_del_tunnel_t_print u8 *s; s = format (0, "SCRIPT: vxlan_add_del_tunnel "); - ip46_address_t src = to_ip46 (mp->is_ipv6, mp->dst_address); - ip46_address_t dst = to_ip46 (mp->is_ipv6, mp->src_address); + ip46_address_t src = to_ip46 (mp->is_ipv6, mp->src_address); + ip46_address_t dst = to_ip46 (mp->is_ipv6, mp->dst_address); u8 is_grp = ip46_address_is_multicast (&dst); char *dst_name = is_grp ? "group" : "dst"; @@ -1404,9 +1404,6 @@ static void *vl_api_vxlan_add_del_tunnel_t_print s = format (s, "vni %d ", ntohl (mp->vni)); - if (mp->is_add == 0) - s = format (s, "del "); - if (mp->is_add == 0) s = format (s, "del "); -- cgit 1.2.3-korg From fead670ae0a8323d8cd14e2cfad1fb8c25a48a99 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Tue, 4 Apr 2017 04:46:32 +0300 Subject: BD/API:add bridge_domain_set_mac_age api Change-Id: Ic2d33b31ba88f6d9602a22439865637d98cf4a33 Signed-off-by: Eyal Bari --- src/vat/api_format.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++- src/vnet/l2/l2.api | 24 +++++++++++++++ src/vnet/l2/l2_api.c | 33 +++++++++++++++----- src/vnet/l2/l2_bd.c | 3 +- src/vnet/l2/l2_input.h | 5 +++ src/vpp/api/custom_dump.c | 23 +++++++++++--- 6 files changed, 150 insertions(+), 15 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index fca2b37a..06884eb1 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -1458,6 +1458,39 @@ static void vl_api_control_ping_reply_t_handler_json vam->result_ready = 1; } +static void + vl_api_bridge_domain_set_mac_age_reply_t_handler + (vl_api_bridge_domain_set_mac_age_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + vam->result_ready = 1; + } +} + +static void vl_api_bridge_domain_set_mac_age_reply_t_handler_json + (vl_api_bridge_domain_set_mac_age_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + vat_json_node_t node; + + vat_json_init_object (&node); + vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); + + vat_json_print (vam->ofp, &node); + vat_json_free (&node); + + vam->retval = ntohl (mp->retval); + vam->result_ready = 1; +} + static void vl_api_l2_flags_reply_t_handler (vl_api_l2_flags_reply_t * mp) { @@ -4285,6 +4318,7 @@ _(SW_INTERFACE_SET_L2_BRIDGE_REPLY, \ _(BRIDGE_DOMAIN_ADD_DEL_REPLY, bridge_domain_add_del_reply) \ _(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ _(BRIDGE_DOMAIN_SW_IF_DETAILS, bridge_domain_sw_if_details) \ +_(BRIDGE_DOMAIN_SET_MAC_AGE_REPLY, bridge_domain_set_mac_age_reply) \ _(L2FIB_ADD_DEL_REPLY, l2fib_add_del_reply) \ _(L2_FLAGS_REPLY, l2_flags_reply) \ _(BRIDGE_FLAGS_REPLY, bridge_flags_reply) \ @@ -6030,6 +6064,46 @@ api_l2fib_add_del (vat_main_t * vam) return (vam->retval); } +static int +api_bridge_domain_set_mac_age (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_bridge_domain_set_mac_age_t *mp; + u32 bd_id = ~0; + u32 mac_age = 0; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "bd_id %d", &bd_id)); + else if (unformat (i, "mac-age %d", &mac_age)); + else + break; + } + + if (bd_id == ~0) + { + errmsg ("missing bridge domain"); + return -99; + } + + if (mac_age > 255) + { + errmsg ("mac age must be less than 256 "); + return -99; + } + + M (BRIDGE_DOMAIN_SET_MAC_AGE, mp); + + mp->bd_id = htonl (bd_id); + mp->mac_age = (u8) mac_age; + + S (mp); + W (ret); + return ret; +} + static int api_l2_flags (vat_main_t * vam) { @@ -18419,8 +18493,9 @@ _(sw_interface_set_l2_bridge, \ " | sw_if_index bd_id \n" \ "[shg ] [bvi]\n" \ "enable | disable") \ +_(bridge_domain_set_mac_age, "bd_id mac-age 0-255\n")\ _(bridge_domain_add_del, \ - "bd_id [flood 1|0] [uu-flood 1|0] [forward 1|0] [learn 1|0] [arp-term 1|0] [del]\n") \ + "bd_id [flood 1|0] [uu-flood 1|0] [forward 1|0] [learn 1|0] [arp-term 1|0] [mac-age 0-255] [del]\n") \ _(bridge_domain_dump, "[bd_id ]\n") \ _(l2fib_add_del, \ "mac bd_id [del] | sw_if | sw_if_index [static] [filter] [bvi] [count ]\n") \ diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index 061990c0..81b75858 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -146,6 +146,30 @@ define l2_flags_reply u32 resulting_feature_bitmap; }; +/** \brief L2 bridge domain set mac age + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the bridge domain to create + @param mac_age - mac aging time in min, 0 for disabled +*/ +define bridge_domain_set_mac_age +{ + u32 client_index; + u32 context; + u32 bd_id; + u8 mac_age; +}; + +/** \brief Set bridge domain response + @param context - sender context, to match reply w/ request + @param retval - return code for the set l2 bits request +*/ +define bridge_domain_set_mac_age_reply +{ + u32 context; + i32 retval; +}; + /** \brief L2 bridge domain add or delete request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index a985852c..ffcc7901 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -54,7 +54,8 @@ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ _(BRIDGE_FLAGS, bridge_flags) \ _(L2_INTERFACE_VLAN_TAG_REWRITE, l2_interface_vlan_tag_rewrite) \ -_(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) +_(L2_INTERFACE_PBB_TAG_REWRITE, l2_interface_pbb_tag_rewrite) \ +_(BRIDGE_DOMAIN_SET_MAC_AGE, bridge_domain_set_mac_age) static void send_l2_xconnect_details (unix_shared_memory_queue_t * q, u32 context, @@ -244,17 +245,13 @@ vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp) { vl_api_l2_flags_reply_t *rmp; int rv = 0; - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 flags = ntohl (mp->feature_bitmap); u32 rbm = 0; VALIDATE_SW_IF_INDEX (mp); -#define _(a,b) \ - if (flags & L2INPUT_FEAT_ ## a) \ - rbm = l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_ ## a, mp->is_set); - foreach_l2input_feat; -#undef _ + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 flags = ntohl (mp->feature_bitmap) & L2INPUT_VALID_MASK; + rbm = l2input_intf_bitmap_enable (sw_if_index, flags, mp->is_set); BAD_SW_IF_INDEX_LABEL; @@ -266,6 +263,26 @@ vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp) /* *INDENT-ON* */ } +static void +vl_api_bridge_domain_set_mac_age_t_handler (vl_api_bridge_domain_set_mac_age_t + * mp) +{ + vlib_main_t *vm = vlib_get_main (); + bd_main_t *bdm = &bd_main; + vl_api_bridge_domain_set_mac_age_reply_t *rmp; + int rv = 0; + u32 bd_id = ntohl (mp->bd_id); + uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p == 0) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto out; + } + bd_set_mac_age (vm, *p, mp->mac_age); +out: + REPLY_MACRO (VL_API_BRIDGE_DOMAIN_SET_MAC_AGE_REPLY); +} + static void vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) { diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index 0375998c..a222fec9 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -284,8 +284,7 @@ bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age) /* check if there is at least one bd with mac aging enabled */ vec_foreach (bd_config, l2input_main.bd_configs) - if (bd_config->bd_id != ~0 && bd_config->mac_age != 0) - enable = 1; + enable |= bd_config->bd_id != ~0 && bd_config->mac_age != 0; vlib_process_signal_event (vm, l2fib_mac_age_scanner_process_node.index, enable ? L2_MAC_AGE_PROCESS_EVENT_START : diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h index 262f75c7..a2ade8d8 100644 --- a/src/vnet/l2/l2_input.h +++ b/src/vnet/l2/l2_input.h @@ -117,6 +117,11 @@ typedef enum foreach_l2input_feat #undef _ L2INPUT_N_FEAT, + L2INPUT_VALID_MASK = +#define _(sym,str) L2INPUT_FEAT_##sym##_BIT | + foreach_l2input_feat +#undef _ + 0, } l2input_feat_t; /* Feature bit masks */ diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index fd4f7fef..b3fd781e 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -200,7 +200,7 @@ static void *vl_api_sw_interface_set_vxlan_bypass_t_print s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); if (mp->is_ipv6) - s = format (s, "ip6"); + s = format (s, "ip6 "); if (mp->enable) s = format (s, "enable "); @@ -260,9 +260,9 @@ static void *vl_api_bridge_domain_add_del_t_print if (mp->is_add) { - s = format (s, "flood %d uu-flood %d forward %d learn %d arp-term %d", - mp->flood, mp->uu_flood, mp->forward, mp->learn, - mp->arp_term); + s = format (s, "flood %d uu-flood %d ", mp->flood, mp->uu_flood); + s = format (s, "forward %d learn %d ", mp->forward, mp->learn); + s = format (s, "arp-term %d mac-age %d", mp->arp_term, mp->mac_age); } else s = format (s, "del "); @@ -270,6 +270,20 @@ static void *vl_api_bridge_domain_add_del_t_print FINISH; } +static void *vl_api_bridge_domain_set_mac_age_t_print + (vl_api_bridge_domain_set_mac_age_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: bridge_domain_set_mac_age "); + + s = format (s, "bd_id %d ", ntohl (mp->bd_id)); + + s = format (s, "mac-age %d", mp->mac_age); + + FINISH; +} + static void *vl_api_bridge_domain_dump_t_print (vl_api_bridge_domain_dump_t * mp, void *handle) { @@ -2948,6 +2962,7 @@ _(CLASSIFY_ADD_DEL_SESSION, classify_add_del_session) \ _(SW_INTERFACE_SET_L2_BRIDGE, sw_interface_set_l2_bridge) \ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ +_(BRIDGE_DOMAIN_SET_MAC_AGE, bridge_domain_set_mac_age) \ _(CLASSIFY_SET_INTERFACE_IP_TABLE, classify_set_interface_ip_table) \ _(CLASSIFY_SET_INTERFACE_L2_TABLES, classify_set_interface_l2_tables) \ _(ADD_NODE_NEXT, add_node_next) \ -- cgit 1.2.3-korg From f24991cab4a899f9e36e7f12ca61c591fedde249 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Wed, 5 Apr 2017 05:33:21 +0300 Subject: L2FIB:add l2fib_flush_bd l2fib_flush_int apis Change-Id: I0a6989c6963956f3e60e8c50835c57845fccef8c Signed-off-by: Eyal Bari --- src/vat/api_format.c | 74 +++++++++++++++++++++++++++++++++++++++++++++-- src/vnet/l2/l2.api | 44 ++++++++++++++++++++++++++++ src/vnet/l2/l2_api.c | 38 ++++++++++++++++++++++++ src/vpp/api/custom_dump.c | 26 +++++++++++++++++ 4 files changed, 180 insertions(+), 2 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 090d990b..61b8e1d8 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -4151,6 +4151,8 @@ _(sw_interface_set_l2_bridge_reply) \ _(bridge_domain_add_del_reply) \ _(sw_interface_set_l2_xconnect_reply) \ _(l2fib_add_del_reply) \ +_(l2fib_flush_int_reply) \ +_(l2fib_flush_bd_reply) \ _(ip_add_del_route_reply) \ _(ip_mroute_add_del_reply) \ _(mpls_route_add_del_reply) \ @@ -4320,6 +4322,8 @@ _(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ _(BRIDGE_DOMAIN_SW_IF_DETAILS, bridge_domain_sw_if_details) \ _(BRIDGE_DOMAIN_SET_MAC_AGE_REPLY, bridge_domain_set_mac_age_reply) \ _(L2FIB_ADD_DEL_REPLY, l2fib_add_del_reply) \ +_(L2FIB_FLUSH_INT_REPLY, l2fib_flush_int_reply) \ +_(L2FIB_FLUSH_BD_REPLY, l2fib_flush_bd_reply) \ _(L2_FLAGS_REPLY, l2_flags_reply) \ _(BRIDGE_FLAGS_REPLY, bridge_flags_reply) \ _(TAP_CONNECT_REPLY, tap_connect_reply) \ @@ -5914,6 +5918,70 @@ api_bridge_domain_add_del (vat_main_t * vam) return ret; } +static int +api_l2fib_flush_bd (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_l2fib_flush_bd_t *mp; + u32 bd_id = ~0; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "bd_id %d", &bd_id)); + else + break; + } + + if (bd_id == ~0) + { + errmsg ("missing bridge domain"); + return -99; + } + + M (L2FIB_FLUSH_BD, mp); + + mp->bd_id = htonl (bd_id); + + S (mp); + W (ret); + return ret; +} + +static int +api_l2fib_flush_int (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_l2fib_flush_int_t *mp; + u32 sw_if_index = ~0; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_index %d", &sw_if_index)); + else + if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)); + else + break; + } + + if (sw_if_index == ~0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + + M (L2FIB_FLUSH_INT, mp); + + mp->sw_if_index = ntohl (sw_if_index); + + S (mp); + W (ret); + return ret; +} + static int api_l2fib_add_del (vat_main_t * vam) { @@ -18542,15 +18610,17 @@ _(sw_interface_set_l2_xconnect, \ "rx | rx_sw_if_index tx | tx_sw_if_index \n" \ "enable | disable") \ _(sw_interface_set_l2_bridge, \ - " | sw_if_index bd_id \n" \ + "{ | sw_if_index } bd_id \n" \ "[shg ] [bvi]\n" \ "enable | disable") \ -_(bridge_domain_set_mac_age, "bd_id mac-age 0-255\n")\ +_(bridge_domain_set_mac_age, "bd_id mac-age 0-255") \ _(bridge_domain_add_del, \ "bd_id [flood 1|0] [uu-flood 1|0] [forward 1|0] [learn 1|0] [arp-term 1|0] [mac-age 0-255] [del]\n") \ _(bridge_domain_dump, "[bd_id ]\n") \ _(l2fib_add_del, \ "mac bd_id [del] | sw_if | sw_if_index [static] [filter] [bvi] [count ]\n") \ +_(l2fib_flush_bd, "bd_id ") \ +_(l2fib_flush_int, " | sw_if_index ") \ _(l2_flags, \ "sw_if | sw_if_index [learn] [forward] [uu-flood] [flood]\n") \ _(bridge_flags, \ diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index 81b75858..c23eebec 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -86,6 +86,50 @@ define l2_fib_clear_table_reply i32 retval; }; +/** \brief L2 FIB flush bridge domain entries + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the entry's bridge domain id +*/ +define l2fib_flush_bd +{ + u32 client_index; + u32 context; + u32 bd_id; +}; + +/** \brief L2 FIB flush bridge domain entries response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define l2fib_flush_bd_reply +{ + u32 context; + i32 retval; +}; + +/** \brief L2 FIB flush interface entries + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bd_id - the entry's bridge domain id +*/ +define l2fib_flush_int +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief L2 FIB flush interface entries response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define l2fib_flush_int_reply +{ + u32 context; + i32 retval; +}; + /** \brief L2 FIB add entry request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index ffcc7901..026f1706 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -48,6 +48,8 @@ _(L2_XCONNECT_DUMP, l2_xconnect_dump) \ _(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \ _(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \ +_(L2FIB_FLUSH_INT, l2fib_flush_int) \ +_(L2FIB_FLUSH_BD, l2fib_flush_bd) \ _(L2FIB_ADD_DEL, l2fib_add_del) \ _(L2_FLAGS, l2_flags) \ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ @@ -240,6 +242,42 @@ vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp) REPLY_MACRO (VL_API_L2FIB_ADD_DEL_REPLY); } +static void +vl_api_l2fib_flush_int_t_handler (vl_api_l2fib_flush_int_t * mp) +{ + int rv = 0; + vlib_main_t *vm = vlib_get_main (); + vl_api_l2fib_flush_int_reply_t *rmp; + + VALIDATE_SW_IF_INDEX (mp); + + u32 sw_if_index = ntohl (mp->sw_if_index); + l2fib_flush_int_mac (vm, sw_if_index); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_L2FIB_FLUSH_INT_REPLY); +} + +static void +vl_api_l2fib_flush_bd_t_handler (vl_api_l2fib_flush_bd_t * mp) +{ + int rv = 0; + vlib_main_t *vm = vlib_get_main (); + bd_main_t *bdm = &bd_main; + vl_api_l2fib_flush_bd_reply_t *rmp; + + u32 bd_id = ntohl (mp->bd_id); + uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p == 0) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto out; + } + l2fib_flush_bd_mac (vm, *p); +out: + REPLY_MACRO (VL_API_L2FIB_FLUSH_BD_REPLY); +} + static void vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp) { diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index b3fd781e..000fe0d4 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -298,6 +298,30 @@ static void *vl_api_bridge_domain_dump_t_print FINISH; } +static void *vl_api_l2fib_flush_bd_t_print + (vl_api_l2fib_flush_bd_t * mp, void *handle) +{ + u8 *s; + u32 bd_id = ntohl (mp->bd_id); + + s = format (0, "SCRIPT: l2fib_flush_bd "); + s = format (s, "bd_id %d ", bd_id); + + FINISH; +} + +static void *vl_api_l2fib_flush_int_t_print + (vl_api_l2fib_flush_int_t * mp, void *handle) +{ + u8 *s; + u32 sw_if_index = ntohl (mp->sw_if_index); + + s = format (0, "SCRIPT: l2fib_flush_int "); + s = format (s, "sw_if_index %d ", sw_if_index); + + FINISH; +} + static void *vl_api_l2fib_add_del_t_print (vl_api_l2fib_add_del_t * mp, void *handle) { @@ -2955,6 +2979,8 @@ _(SR_POLICY_MOD, sr_policy_mod) \ _(SR_POLICY_DEL, sr_policy_del) \ _(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \ _(L2FIB_ADD_DEL, l2fib_add_del) \ +_(L2FIB_FLUSH_BD, l2fib_flush_bd) \ +_(L2FIB_FLUSH_INT, l2fib_flush_int) \ _(L2_FLAGS, l2_flags) \ _(BRIDGE_FLAGS, bridge_flags) \ _(CLASSIFY_ADD_DEL_TABLE, classify_add_del_table) \ -- cgit 1.2.3-korg From b1352ed0ac39aaa7be7542275d1d43fa64ab28ac Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Fri, 7 Apr 2017 23:14:17 +0300 Subject: BD:unify bridge domain creation code Change-Id: I29082e7a0c556069180a157e55b3698cf8cd38c7 Signed-off-by: Eyal Bari --- src/vnet/api_errno.h | 4 +- src/vnet/l2/l2_api.c | 63 +++++++---------------------- src/vnet/l2/l2_bd.c | 92 ++++++++++++++++++++----------------------- src/vnet/l2/l2_bd.h | 37 ++++++++++++----- src/vnet/lisp-gpe/interface.c | 7 ++-- src/vpp/api/api.c | 12 +++--- 6 files changed, 97 insertions(+), 118 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index e939404b..0d5b2227 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -107,7 +107,9 @@ _(ADDRESS_FOUND_FOR_INTERFACE, -114, "Address found for interface") \ _(SESSION_CONNECT_FAIL, -115, "Session failed to connect") \ _(ENTRY_ALREADY_EXISTS, -116, "Entry already exists") \ _(SVM_SEGMENT_CREATE_FAIL, -117, "svm segment create fail") \ -_(APPLICATION_NOT_ATTACHED, -118, "application not attached") +_(APPLICATION_NOT_ATTACHED, -118, "application not attached") \ +_(BD_ALREADY_EXISTS, -119, "Bridge domain already exists") \ +_(BD_IN_USE, -120, "Bridge domain has member interfaces") typedef enum { diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index 026f1706..5a3c8dc2 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -324,54 +324,20 @@ out: static void vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) { - vlib_main_t *vm = vlib_get_main (); - bd_main_t *bdm = &bd_main; - vl_api_bridge_domain_add_del_reply_t *rmp; - int rv = 0; - u32 enable_flags = 0, disable_flags = 0; - u32 bd_id = ntohl (mp->bd_id); - u32 bd_index; - - if (mp->is_add) - { - bd_index = bd_find_or_add_bd_index (bdm, bd_id); - - if (mp->flood) - enable_flags |= L2_FLOOD; - else - disable_flags |= L2_FLOOD; - - if (mp->uu_flood) - enable_flags |= L2_UU_FLOOD; - else - disable_flags |= L2_UU_FLOOD; - - if (mp->forward) - enable_flags |= L2_FWD; - else - disable_flags |= L2_FWD; - - if (mp->arp_term) - enable_flags |= L2_ARP_TERM; - else - disable_flags |= L2_ARP_TERM; - - if (mp->learn) - enable_flags |= L2_LEARN; - else - disable_flags |= L2_LEARN; - - if (enable_flags) - bd_set_flags (vm, bd_index, enable_flags, 1 /* enable */ ); - - if (disable_flags) - bd_set_flags (vm, bd_index, disable_flags, 0 /* disable */ ); - - bd_set_mac_age (vm, bd_index, mp->mac_age); - } - else - rv = bd_delete_bd_index (bdm, bd_id); + l2_bridge_domain_add_del_args_t a = { + .is_add = mp->is_add, + .flood = mp->flood, + .uu_flood = mp->uu_flood, + .forward = mp->forward, + .learn = mp->learn, + .arp_term = mp->arp_term, + .mac_age = mp->mac_age, + .bd_id = ntohl (mp->bd_id), + }; + + int rv = bd_add_del (&a); + vl_api_bridge_domain_add_del_reply_t *rmp; REPLY_MACRO (VL_API_BRIDGE_DOMAIN_ADD_DEL_REPLY); } @@ -436,7 +402,8 @@ vl_api_bridge_domain_dump_t_handler (vl_api_bridge_domain_dump_t * mp) bd_id = ntohl (mp->bd_id); - bd_index = (bd_id == ~0) ? 0 : bd_find_or_add_bd_index (bdm, bd_id); + bd_index = (bd_id == ~0) ? 0 : bd_find_index (bdm, bd_id); + ASSERT (bd_index != ~0); end = (bd_id == ~0) ? vec_len (l2im->bd_configs) : bd_index + 1; for (; bd_index < end; bd_index++) { diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index 9d7a43d3..cfaf4c99 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -50,42 +50,35 @@ bd_main_t bd_main; void bd_validate (l2_bridge_domain_t * bd_config) { - if (!bd_is_valid (bd_config)) - { - bd_config->feature_bitmap = ~L2INPUT_FEAT_ARP_TERM; - bd_config->bvi_sw_if_index = ~0; - bd_config->members = 0; - bd_config->flood_count = 0; - bd_config->tun_master_count = 0; - bd_config->tun_normal_count = 0; - bd_config->mac_by_ip4 = 0; - bd_config->mac_by_ip6 = hash_create_mem (0, sizeof (ip6_address_t), - sizeof (uword)); - } + if (bd_is_valid (bd_config)) + return; + bd_config->feature_bitmap = ~L2INPUT_FEAT_ARP_TERM; + bd_config->bvi_sw_if_index = ~0; + bd_config->members = 0; + bd_config->flood_count = 0; + bd_config->tun_master_count = 0; + bd_config->tun_normal_count = 0; + bd_config->mac_by_ip4 = 0; + bd_config->mac_by_ip6 = hash_create_mem (0, sizeof (ip6_address_t), + sizeof (uword)); } u32 -bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id) +bd_find_index (bd_main_t * bdm, u32 bd_id) { - uword *p; - u32 rv; - - if (bd_id == ~0) - { - bd_id = 0; - while (hash_get (bdm->bd_index_by_bd_id, bd_id)) - bd_id++; - } - else - { - p = hash_get (bdm->bd_index_by_bd_id, bd_id); - if (p) - return (p[0]); - } + u32 *p = (u32 *) hash_get (bdm->bd_index_by_bd_id, bd_id); + if (!p) + return ~0; + return p[0]; +} - rv = clib_bitmap_first_clear (bdm->bd_index_bitmap); +u32 +bd_add_bd_index (bd_main_t * bdm, u32 bd_id) +{ + ASSERT (!hash_get (bdm->bd_index_by_bd_id, bd_id)); + u32 rv = clib_bitmap_first_clear (bdm->bd_index_bitmap); - /* mark this index busy */ + /* mark this index taken */ bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, rv, 1); hash_set (bdm->bd_index_by_bd_id, bd_id, rv); @@ -96,21 +89,14 @@ bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id) return rv; } -int -bd_delete_bd_index (bd_main_t * bdm, u32 bd_id) +static int +bd_delete (bd_main_t * bdm, u32 bd_index) { - uword *p; - u32 bd_index; - - p = hash_get (bdm->bd_index_by_bd_id, bd_id); - if (p == 0) - return VNET_API_ERROR_NO_SUCH_ENTRY; - - bd_index = p[0]; + u32 bd_id = l2input_main.bd_configs[bd_index].bd_id; + hash_unset (bdm->bd_index_by_bd_id, bd_id); /* mark this index clear */ bdm->bd_index_bitmap = clib_bitmap_set (bdm->bd_index_bitmap, bd_index, 0); - hash_unset (bdm->bd_index_by_bd_id, bd_id); l2input_main.bd_configs[bd_index].bd_id = ~0; l2input_main.bd_configs[bd_index].feature_bitmap = 0; @@ -202,14 +188,13 @@ clib_error_t * l2bd_init (vlib_main_t * vm) { bd_main_t *bdm = &bd_main; - u32 bd_index; bdm->bd_index_by_bd_id = hash_create (0, sizeof (uword)); /* * create a dummy bd with bd_id of 0 and bd_index of 0 with feature set * to packet drop only. Thus, packets received from any L2 interface with * uninitialized bd_index of 0 can be dropped safely. */ - bd_index = bd_find_or_add_bd_index (bdm, 0); + u32 bd_index = bd_add_bd_index (bdm, 0); ASSERT (bd_index == 0); l2input_main.bd_configs[0].feature_bitmap = L2INPUT_FEAT_DROP; @@ -1087,16 +1072,16 @@ bd_add_del (l2_bridge_domain_add_del_args_t * a) { bd_main_t *bdm = &bd_main; vlib_main_t *vm = bdm->vlib_main; - u32 enable_flags = 0, disable_flags = 0; - u32 bd_index = ~0; int rv = 0; + u32 bd_index = bd_find_index (bdm, a->bd_id); if (a->is_add) { - bd_index = bd_find_or_add_bd_index (bdm, a->bd_id); - if (bd_index == ~0) - return bd_index; + if (bd_index != ~0) + return VNET_API_ERROR_BD_ALREADY_EXISTS; + bd_index = bd_add_bd_index (bdm, a->bd_id); + u32 enable_flags = 0, disable_flags = 0; if (a->flood) enable_flags |= L2_FLOOD; else @@ -1131,7 +1116,13 @@ bd_add_del (l2_bridge_domain_add_del_args_t * a) bd_set_mac_age (vm, bd_index, a->mac_age); } else - rv = bd_delete_bd_index (bdm, a->bd_id); + { + if (bd_index == ~0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + if (vec_len (l2input_main.bd_configs[bd_index].members)) + return VNET_API_ERROR_BD_IN_USE; + rv = bd_delete (bdm, bd_index); + } return rv; } @@ -1215,6 +1206,9 @@ bd_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input, if (is_add) vlib_cli_output (vm, "bridge-domain %d", bd_id); break; + case VNET_API_ERROR_BD_IN_USE: + error = clib_error_return (0, "bridge domain in use - remove members"); + goto done; case VNET_API_ERROR_NO_SUCH_ENTRY: error = clib_error_return (0, "bridge domain id does not exist"); goto done; diff --git a/src/vnet/l2/l2_bd.h b/src/vnet/l2/l2_bd.h index 83733411..e502d497 100644 --- a/src/vnet/l2/l2_bd.h +++ b/src/vnet/l2/l2_bd.h @@ -128,28 +128,47 @@ u32 bd_remove_member (l2_bridge_domain_t * bd_config, u32 sw_if_index); u32 bd_set_flags (vlib_main_t * vm, u32 bd_index, u32 flags, u32 enable); void bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age); +int bd_add_del (l2_bridge_domain_add_del_args_t * args); /** - * \brief Get or create a bridge domain. + * \brief Get a bridge domain. + * + * Get a bridge domain with the given bridge domain ID. + * + * \param bdm bd_main pointer. + * \param bd_id The bridge domain ID + * \return The bridge domain index in \c l2input_main->l2_bridge_domain_t vector. + */ +u32 bd_find_index (bd_main_t * bdm, u32 bd_id); + +/** + * \brief Create a bridge domain. * - * Get or create a bridge domain with the given bridge domain ID. + * Create a bridge domain with the given bridge domain ID * * \param bdm bd_main pointer. - * \param bd_id The bridge domain ID or ~0 if an arbitrary unused bridge domain should be used. * \return The bridge domain index in \c l2input_main->l2_bridge_domain_t vector. */ -u32 bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id); +u32 bd_add_bd_index (bd_main_t * bdm, u32 bd_id); /** - * \brief Delete a bridge domain. + * \brief Get or create a bridge domain. * - * Delete an existing bridge domain with the given bridge domain ID. + * Get a bridge domain with the given bridge domain ID, if one exists, otherwise + * create one with the given ID, or the first unused ID if the given ID is ~0.. * * \param bdm bd_main pointer. - * \param bd_id The bridge domain ID. - * \return 0 on success and -1 if the bridge domain does not exist. + * \param bd_id The bridge domain ID + * \return The bridge domain index in \c l2input_main->l2_bridge_domain_t vector. */ -int bd_delete_bd_index (bd_main_t * bdm, u32 bd_id); +static inline u32 +bd_find_or_add_bd_index (bd_main_t * bdm, u32 bd_id) +{ + u32 bd_index = bd_find_index (bdm, bd_id); + if (bd_index == ~0) + return bd_add_bd_index (bdm, bd_id); + return bd_index; +} u32 bd_add_del_ip_mac (u32 bd_index, u8 * ip_addr, u8 * mac_addr, u8 is_ip6, u8 is_add); diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c index 4760f448..0598c048 100644 --- a/src/vnet/lisp-gpe/interface.c +++ b/src/vnet/lisp-gpe/interface.c @@ -705,11 +705,10 @@ void lisp_gpe_del_l2_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id) { tunnel_lookup_t *l2_ifaces = &lgm->l2_ifaces; - u16 bd_index; - uword *hip; - bd_index = bd_find_or_add_bd_index (&bd_main, bd_id); - hip = hash_get (l2_ifaces->hw_if_index_by_dp_table, bd_index); + u32 bd_index = bd_find_index (&bd_main, bd_id); + ASSERT (bd_index != ~0); + uword *hip = hash_get (l2_ifaces->hw_if_index_by_dp_table, bd_index); if (hip == 0) { diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 22410fcc..f1b6877f 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -410,21 +410,19 @@ static void bd_main_t *bdm = &bd_main; vl_api_sw_interface_set_l2_bridge_reply_t *rmp; int rv = 0; - u32 rx_sw_if_index = ntohl (mp->rx_sw_if_index); - u32 bd_id = ntohl (mp->bd_id); - u32 bd_index; - u32 bvi = mp->bvi; - u8 shg = mp->shg; vlib_main_t *vm = vlib_get_main (); vnet_main_t *vnm = vnet_get_main (); VALIDATE_RX_SW_IF_INDEX (mp); + u32 rx_sw_if_index = ntohl (mp->rx_sw_if_index); - bd_index = bd_find_or_add_bd_index (bdm, bd_id); if (mp->enable) { - //VALIDATE_TX_SW_IF_INDEX(mp); + u32 bd_id = ntohl (mp->bd_id); + u32 bd_index = bd_find_or_add_bd_index (bdm, bd_id); + u32 bvi = mp->bvi; + u8 shg = mp->shg; rv = set_int_l2_mode (vm, vnm, MODE_L2_BRIDGE, rx_sw_if_index, bd_index, bvi, shg, 0); } -- cgit 1.2.3-korg From 11b8dbf78af49d270a0e72abe7dea73eec30d85f Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Mon, 24 Apr 2017 10:46:54 -0400 Subject: "autoreply" flag: autogenerate standard xxx_reply_t messages Change-Id: I72298aaae7d172082ece3a8edea4217c11b28d79 Signed-off-by: Dave Barach --- src/examples/sample-plugin/sample/sample.api | 10 +- src/plugins/acl/acl.api | 60 +--- src/plugins/dpdk/api/dpdk.api | 35 +- src/plugins/flowperpkt/flowperpkt.api | 23 +- .../export-vxlan-gpe/vxlan_gpe_ioam_export.api | 10 +- src/plugins/ioam/export/ioam_export.api | 10 +- src/plugins/ioam/ip6/ioam_cache.api | 10 +- src/plugins/ioam/lib-pot/pot.api | 34 +- src/plugins/ioam/lib-trace/trace.api | 26 +- src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api | 82 +---- src/plugins/lb/lb.api | 21 +- src/plugins/memif/memif.api | 12 +- src/plugins/snat/snat.api | 88 +---- src/tools/vppapigen/gram.y | 3 +- src/tools/vppapigen/lex.c | 57 +++- src/tools/vppapigen/lex.h | 1 + src/tools/vppapigen/node.c | 5 + src/tools/vppapigen/node.h | 2 + src/vlibmemory/memclnt.api | 7 +- src/vlibmemory/memory_vlib.c | 8 +- src/vnet/bfd/bfd.api | 132 +------- src/vnet/classify/classify.api | 37 +-- src/vnet/cop/cop.api | 28 +- src/vnet/devices/af_packet/af_packet.api | 12 +- src/vnet/devices/netmap/netmap.api | 24 +- src/vnet/devices/virtio/vhost_user.api | 24 +- src/vnet/dhcp/dhcp.api | 38 +-- src/vnet/flow/flow.api | 32 +- src/vnet/interface.api | 108 +----- src/vnet/ip/ip.api | 108 +----- src/vnet/ipsec/ipsec.api | 224 ++----------- src/vnet/l2/l2.api | 96 +----- src/vnet/l2tp/l2tp.api | 28 +- src/vnet/lisp-cp/lisp.api | 164 +-------- src/vnet/lisp-cp/one.api | 185 +---------- src/vnet/lisp-gpe/lisp_gpe.api | 48 +-- src/vnet/map/map.api | 22 +- src/vnet/mpls/mpls.api | 26 +- src/vnet/session/session.api | 68 +--- src/vnet/span/span.api | 10 +- src/vnet/sr/sr.api | 60 +--- src/vnet/unix/tap.api | 12 +- src/vnet/vxlan/vxlan.api | 12 +- src/vpp/api/vpe.api | 367 ++------------------- 44 files changed, 271 insertions(+), 2098 deletions(-) (limited to 'src/vpp/api') diff --git a/src/examples/sample-plugin/sample/sample.api b/src/examples/sample-plugin/sample/sample.api index f99cdb38..d565c0b1 100644 --- a/src/examples/sample-plugin/sample/sample.api +++ b/src/examples/sample-plugin/sample/sample.api @@ -16,7 +16,7 @@ /* Define a simple binary API to control the feature */ -define sample_macswap_enable_disable { +autoreply define sample_macswap_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -29,11 +29,3 @@ define sample_macswap_enable_disable { /* Interface handle */ u32 sw_if_index; }; - -define sample_macswap_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api index d981338d..3b334113 100644 --- a/src/plugins/acl/acl.api +++ b/src/plugins/acl/acl.api @@ -161,24 +161,13 @@ define acl_add_replace_reply @param acl_index - ACL index to delete */ -manual_print define acl_del +autoreply manual_print define acl_del { u32 client_index; u32 context; u32 acl_index; }; -/** \brief Reply to delete the ACL - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define acl_del_reply -{ - u32 context; - i32 retval; -}; - /* acl_interface_add_del(_reply) to be deprecated in lieu of acl_interface_set_acl_list */ /** \brief Use acl_interface_set_acl_list instead Append/remove an ACL index to/from the list of ACLs checked for an interface @@ -190,7 +179,7 @@ define acl_del_reply @param acl_index - index of ACL for the operation */ -manual_print define acl_interface_add_del +autoreply manual_print define acl_interface_add_del { u32 client_index; u32 context; @@ -204,17 +193,6 @@ manual_print define acl_interface_add_del u32 acl_index; }; -/** \brief Reply to alter the ACL list - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define acl_interface_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set the vector of input/output ACLs checked for an interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -224,7 +202,7 @@ define acl_interface_add_del_reply @param acls - vector of ACL indices */ -manual_print define acl_interface_set_acl_list +autoreply manual_print define acl_interface_set_acl_list { u32 client_index; u32 context; @@ -239,12 +217,6 @@ manual_print define acl_interface_set_acl_list @param retval 0 - no error */ -define acl_interface_set_acl_list_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump the specific ACL contents or all of the ACLs' contents @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -341,24 +313,13 @@ define macip_acl_add_reply @param acl_index - MACIP ACL index to delete */ -manual_print define macip_acl_del +autoreply manual_print define macip_acl_del { u32 client_index; u32 context; u32 acl_index; }; -/** \brief Reply to delete the MACIP ACL - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define macip_acl_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add or delete a MACIP ACL to/from interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -367,7 +328,7 @@ define macip_acl_del_reply @param acl_index - MACIP ACL index */ -manual_print define macip_acl_interface_add_del +autoreply manual_print define macip_acl_interface_add_del { u32 client_index; u32 context; @@ -377,17 +338,6 @@ manual_print define macip_acl_interface_add_del u32 acl_index; }; -/** \brief Reply to apply/unapply the MACIP ACL - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define macip_acl_interface_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump one or all defined MACIP ACLs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/dpdk/api/dpdk.api b/src/plugins/dpdk/api/dpdk.api index 21215d45..d43f8a36 100644 --- a/src/plugins/dpdk/api/dpdk.api +++ b/src/plugins/dpdk/api/dpdk.api @@ -21,7 +21,7 @@ @param pipe - pipe ID within its subport @param profile - pipe profile ID */ -define sw_interface_set_dpdk_hqos_pipe { +autoreply define sw_interface_set_dpdk_hqos_pipe { u32 client_index; u32 context; u32 sw_if_index; @@ -30,15 +30,6 @@ define sw_interface_set_dpdk_hqos_pipe { u32 profile; }; -/** \brief DPDK interface HQoS pipe profile set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_pipe_reply { - u32 context; - i32 retval; -}; - /** \brief DPDK interface HQoS subport parameters set request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -49,7 +40,7 @@ define sw_interface_set_dpdk_hqos_pipe_reply { @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) @param tc_period - enforcement period for rates (measured in milliseconds) */ -define sw_interface_set_dpdk_hqos_subport { +autoreply define sw_interface_set_dpdk_hqos_subport { u32 client_index; u32 context; u32 sw_if_index; @@ -60,15 +51,6 @@ define sw_interface_set_dpdk_hqos_subport { u32 tc_period; }; -/** \brief DPDK interface HQoS subport parameters set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_subport_reply { - u32 context; - i32 retval; -}; - /** \brief DPDK interface HQoS tctbl entry set request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -77,7 +59,7 @@ define sw_interface_set_dpdk_hqos_subport_reply { @param tc - traffic class (0 .. 3) @param queue - traffic class queue (0 .. 3) */ -define sw_interface_set_dpdk_hqos_tctbl { +autoreply define sw_interface_set_dpdk_hqos_tctbl { u32 client_index; u32 context; u32 sw_if_index; @@ -86,18 +68,9 @@ define sw_interface_set_dpdk_hqos_tctbl { u32 queue; }; -/** \brief DPDK interface HQoS tctbl entry set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_tctbl_reply { - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/plugins/flowperpkt/flowperpkt.api b/src/plugins/flowperpkt/flowperpkt.api index 1cf62c54..3ff92dca 100644 --- a/src/plugins/flowperpkt/flowperpkt.api +++ b/src/plugins/flowperpkt/flowperpkt.api @@ -12,7 +12,7 @@ @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param sw_if_index - index of the interface */ -manual_print define flowperpkt_tx_interface_add_del +autoreply manual_print define flowperpkt_tx_interface_add_del { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -28,20 +28,7 @@ manual_print define flowperpkt_tx_interface_add_del u32 sw_if_index; }; -/** \brief Reply to enable/disable per-packet IPFIX recording messages - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define flowperpkt_tx_interface_add_del_reply -{ - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; - -define flowperpkt_params +autoreply define flowperpkt_params { u32 client_index; u32 context; @@ -51,9 +38,3 @@ define flowperpkt_params u32 active_timer; /* ~0 is off, 0 is default */ u32 passive_timer; /* ~0 is off, 0 is default */ }; - -define flowperpkt_params_reply -{ - u32 context; - i32 retval; -}; diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api index 7b17c3f7..caa97e6e 100644 --- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api @@ -16,7 +16,7 @@ /* Define a simple binary API to control the feature */ -define vxlan_gpe_ioam_export_enable_disable { +autoreply define vxlan_gpe_ioam_export_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -32,11 +32,3 @@ define vxlan_gpe_ioam_export_enable_disable { /* Src ip address */ }; - -define vxlan_gpe_ioam_export_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; \ No newline at end of file diff --git a/src/plugins/ioam/export/ioam_export.api b/src/plugins/ioam/export/ioam_export.api index f22d9fc8..bb830561 100644 --- a/src/plugins/ioam/export/ioam_export.api +++ b/src/plugins/ioam/export/ioam_export.api @@ -16,7 +16,7 @@ /* Define a simple binary API to control the feature */ -define ioam_export_ip6_enable_disable { +autoreply define ioam_export_ip6_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -32,11 +32,3 @@ define ioam_export_ip6_enable_disable { /* Src ip address */ }; - -define ioam_export_ip6_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; diff --git a/src/plugins/ioam/ip6/ioam_cache.api b/src/plugins/ioam/ip6/ioam_cache.api index de50d57d..dd9c0186 100644 --- a/src/plugins/ioam/ip6/ioam_cache.api +++ b/src/plugins/ioam/ip6/ioam_cache.api @@ -16,7 +16,7 @@ /* API to control ioam caching */ -define ioam_cache_ip6_enable_disable { +autoreply define ioam_cache_ip6_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -27,11 +27,3 @@ define ioam_cache_ip6_enable_disable { u8 is_disable; }; - -define ioam_cache_ip6_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; diff --git a/src/plugins/ioam/lib-pot/pot.api b/src/plugins/ioam/lib-pot/pot.api index fa2fc126..c377cde0 100644 --- a/src/plugins/ioam/lib-pot/pot.api +++ b/src/plugins/ioam/lib-pot/pot.api @@ -27,7 +27,7 @@ @param list_name_len - length of the name of this profile list @param list_name - name of this profile list */ -define pot_profile_add { +autoreply define pot_profile_add { u32 client_index; u32 context; u8 id; @@ -42,22 +42,12 @@ define pot_profile_add { u8 list_name[0]; }; -/** \brief Proof of Transit profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pot_profile_add_reply { - u32 context; - i32 retval; -}; - - /** \brief Proof of Transit(POT): Activate POT profile in the list @param id - id of the profile @param list_name_len - length of the name of this profile list @param list_name - name of this profile list */ -define pot_profile_activate { +autoreply define pot_profile_activate { u32 client_index; u32 context; u8 id; @@ -65,37 +55,19 @@ define pot_profile_activate { u8 list_name[0]; }; -/** \brief Proof of Transit profile activate response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pot_profile_activate_reply { - u32 context; - i32 retval; -}; - /** \brief Delete POT Profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param list_name_len - length of the name of the profile list @param list_name - name of profile list to delete */ -define pot_profile_del { +autoreply define pot_profile_del { u32 client_index; u32 context; u8 list_name_len; u8 list_name[0]; }; -/** \brief Proof of Transit profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pot_profile_del_reply { - u32 context; - i32 retval; -}; - /** \brief Show POT Profiles @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/ioam/lib-trace/trace.api b/src/plugins/ioam/lib-trace/trace.api index cb958325..2f45c6e2 100644 --- a/src/plugins/ioam/lib-trace/trace.api +++ b/src/plugins/ioam/lib-trace/trace.api @@ -22,7 +22,7 @@ @param trace_tsp- Timestamp resolution @param app_data - Application specific opaque */ -define trace_profile_add { +autoreply define trace_profile_add { u32 client_index; u32 context; u8 trace_type; @@ -32,37 +32,15 @@ define trace_profile_add { u32 app_data; }; -/** \brief Trace profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define trace_profile_add_reply { - u32 context; - i32 retval; -}; - - - /** \brief Delete trace Profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define trace_profile_del { +autoreply define trace_profile_del { u32 client_index; u32 context; }; -/** \brief Trace profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define trace_profile_del_reply { - u32 context; - i32 retval; -}; - - - /** \brief Show trace Profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api index 056529a4..a6761f07 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api @@ -24,7 +24,7 @@ @param trace_enable - iOAM Trace enabled or not flag */ -define vxlan_gpe_ioam_enable { +autoreply define vxlan_gpe_ioam_enable { u32 client_index; u32 context; u16 id; @@ -33,38 +33,18 @@ define vxlan_gpe_ioam_enable { u8 trace_enable; }; -/** \brief iOAM Over VxLAN-GPE - Set iOAM transport for VXLAN-GPE reply - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define vxlan_gpe_ioam_enable_reply { - u32 context; - i32 retval; -}; - - /** \brief iOAM for VxLAN-GPE disable @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param id - profile id */ -define vxlan_gpe_ioam_disable +autoreply define vxlan_gpe_ioam_disable { u32 client_index; u32 context; u16 id; }; -/** \brief vxlan_gpe_ioam disable response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define vxlan_gpe_ioam_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Enable iOAM for a VNI (VXLAN-GPE) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -73,7 +53,7 @@ define vxlan_gpe_ioam_disable_reply @param remote - IPv4/6 Address of the remote VTEP */ -define vxlan_gpe_ioam_vni_enable { +autoreply define vxlan_gpe_ioam_vni_enable { u32 client_index; u32 context; u32 vni; @@ -82,18 +62,6 @@ define vxlan_gpe_ioam_vni_enable { u8 is_ipv6; }; -/** \brief Reply to enable iOAM for a VNI (VXLAN-GPE) - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_vni_enable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - /** \brief Disable iOAM for a VNI (VXLAN-GPE) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -102,7 +70,7 @@ define vxlan_gpe_ioam_vni_enable_reply { @param remote - IPv4/6 Address of the remote VTEP */ -define vxlan_gpe_ioam_vni_disable { +autoreply define vxlan_gpe_ioam_vni_disable { u32 client_index; u32 context; u32 vni; @@ -111,19 +79,6 @@ define vxlan_gpe_ioam_vni_disable { u8 is_ipv6; }; -/** \brief Reply to disable iOAM for a VNI (VXLAN-GPE) - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_vni_disable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - - /** \brief Enable iOAM for a VXLAN-GPE transit @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -131,7 +86,7 @@ define vxlan_gpe_ioam_vni_disable_reply { @param outer_fib_index- FIB index */ -define vxlan_gpe_ioam_transit_enable { +autoreply define vxlan_gpe_ioam_transit_enable { u32 client_index; u32 context; u32 outer_fib_index; @@ -139,18 +94,6 @@ define vxlan_gpe_ioam_transit_enable { u8 is_ipv6; }; -/** \brief Reply to enable iOAM for VXLAN-GPE transit - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_transit_enable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - /** \brief Disable iOAM for VXLAN-GPE transit @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -158,7 +101,7 @@ define vxlan_gpe_ioam_transit_enable_reply { @param outer_fib_index- FIB index */ -define vxlan_gpe_ioam_transit_disable { +autoreply define vxlan_gpe_ioam_transit_disable { u32 client_index; u32 context; u32 outer_fib_index; @@ -166,16 +109,3 @@ define vxlan_gpe_ioam_transit_disable { u8 is_ipv6; }; -/** \brief Reply to disable iOAM for VXLAN-GPE transit - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_transit_disable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - - diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api index 39ee3c8f..32cc669b 100644 --- a/src/plugins/lb/lb.api +++ b/src/plugins/lb/lb.api @@ -8,7 +8,7 @@ @param flow_timeout - Time in seconds after which, if no packet is received for a given flow, the flow is removed from the established flow table. */ -define lb_conf +autoreply define lb_conf { u32 client_index; u32 context; @@ -18,11 +18,6 @@ define lb_conf u32 flow_timeout; }; -define lb_conf_reply { - u32 context; - i32 retval; -}; - /** \brief Add a virtual address (or prefix) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -33,7 +28,7 @@ define lb_conf_reply { for this VIP (must be power of 2). @param is_del - The VIP should be removed. */ -define lb_add_del_vip { +autoreply define lb_add_del_vip { u32 client_index; u32 context; u8 ip_prefix[16]; @@ -43,11 +38,6 @@ define lb_add_del_vip { u8 is_del; }; -define lb_add_del_vip_reply { - u32 context; - i32 retval; -}; - /** \brief Add an application server for a given VIP @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -56,7 +46,7 @@ define lb_add_del_vip_reply { @param as_address - The application server address (IPv4 in lower order 32 bits). @param is_del - The AS should be removed. */ -define lb_add_del_as { +autoreply define lb_add_del_as { u32 client_index; u32 context; u8 vip_ip_prefix[16]; @@ -64,8 +54,3 @@ define lb_add_del_as { u8 as_address[16]; u8 is_del; }; - -define lb_add_del_as_reply { - u32 context; - i32 retval; -}; diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api index 6f946421..95e016c3 100644 --- a/src/plugins/memif/memif.api +++ b/src/plugins/memif/memif.api @@ -57,7 +57,7 @@ define memif_create_reply @param context - sender context, to match reply w/ request @param sw_if_index - software index of the interface to delete */ -define memif_delete +autoreply define memif_delete { u32 client_index; u32 context; @@ -65,16 +65,6 @@ define memif_delete u32 sw_if_index; }; -/** \brief Delete host-interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define memif_delete_reply -{ - u32 context; - i32 retval; -}; - /** \brief Memory interface details structure @param context - sender context, to match reply w/ request (memif_dump) @param sw_if_index - index of the interface diff --git a/src/plugins/snat/snat.api b/src/plugins/snat/snat.api index 9689f5f9..573b6753 100644 --- a/src/plugins/snat/snat.api +++ b/src/plugins/snat/snat.api @@ -29,7 +29,7 @@ @param vrf_id - VRF id of tenant, ~0 means independent of VRF @param is_add - 1 if add, 0 if delete */ -define snat_add_address_range { +autoreply define snat_add_address_range { u32 client_index; u32 context; u8 is_ip4; @@ -39,15 +39,6 @@ define snat_add_address_range { u8 is_add; }; -/** \brief Add S-NAT address range reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_address_range_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT addresses @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -77,7 +68,7 @@ define snat_address_details { @param is_inside - 1 if inside, 0 if outside @param sw_if_index - software index of the interface */ -define snat_interface_add_del_feature { +autoreply define snat_interface_add_del_feature { u32 client_index; u32 context; u8 is_add; @@ -85,15 +76,6 @@ define snat_interface_add_del_feature { u32 sw_if_index; }; -/** \brief Enable/disable S-NAT feature on the interface reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_interface_add_del_feature_reply { - u32 context; - i32 retval; -}; - /** \brief Dump interfaces with S-NAT feature @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -130,7 +112,7 @@ define snat_interface_details { used) @param vfr_id - VRF ID */ -define snat_add_static_mapping { +autoreply define snat_add_static_mapping { u32 client_index; u32 context; u8 is_add; @@ -145,15 +127,6 @@ define snat_add_static_mapping { u32 vrf_id; }; -/** \brief Add/delete S-NAT static mapping reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_static_mapping_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT static mappings @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -257,21 +230,12 @@ define snat_show_config_reply @param context - sender context, to match reply w/ request @param worker_mask - S-NAT workers mask */ -define snat_set_workers { +autoreply define snat_set_workers { u32 client_index; u32 context; u64 worker_mask; }; -/** \brief Set S-NAT workers reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_set_workers_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT workers @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -300,7 +264,7 @@ define snat_worker_details { @param is_add - 1 if add, 0 if delete @param sw_if_index - software index of the interface */ -define snat_add_del_interface_addr { +autoreply define snat_add_del_interface_addr { u32 client_index; u32 context; u8 is_add; @@ -308,15 +272,6 @@ define snat_add_del_interface_addr { u32 sw_if_index; }; -/** \brief Add/delete S-NAT pool address from specific interfce reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_del_interface_addr_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT pool addresses interfaces @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -342,7 +297,7 @@ define snat_interface_addr_details { @param src_port - source port number @param enable - 1 if enable, 0 if disable */ -define snat_ipfix_enable_disable { +autoreply define snat_ipfix_enable_disable { u32 client_index; u32 context; u32 domain_id; @@ -350,15 +305,6 @@ define snat_ipfix_enable_disable { u8 enable; }; -/** \brief Enable/disable S-NAT IPFIX logging reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_ipfix_enable_disable_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT users @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -437,7 +383,7 @@ define snat_user_session_details { @param out_addr - outside IP address @param out_addr - outside IP address prefix length */ -define snat_add_det_map { +autoreply define snat_add_det_map { u32 client_index; u32 context; u8 is_add; @@ -449,15 +395,6 @@ define snat_add_det_map { u8 out_plen; }; -/** \brief Add/delete S-NAT deterministic mapping reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_det_map_reply { - u32 context; - i32 retval; -}; - /** \brief Get outside address and port range from inside address @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -556,7 +493,7 @@ define snat_det_map_details { @param tcp_transitory - TCP transitory timeout (default 240sec) @param icmp - ICMP timeout (default 60sec) */ -define snat_det_set_timeouts { +autoreply define snat_det_set_timeouts { u32 client_index; u32 context; u32 udp; @@ -565,15 +502,6 @@ define snat_det_set_timeouts { u32 icmp; }; -/** \brief Set values of timeouts for deterministic NAT reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_det_set_timeouts_reply { - u32 context; - i32 retval; -}; - /** \brief Get values of timeouts for deterministic NAT (seconds) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/tools/vppapigen/gram.y b/src/tools/vppapigen/gram.y index de26af8d..9cea6023 100644 --- a/src/tools/vppapigen/gram.y +++ b/src/tools/vppapigen/gram.y @@ -38,7 +38,7 @@ void generate (YYSTYPE); %token NAME RPAR LPAR SEMI LBRACK RBRACK NUMBER PRIMTYPE BARF %token TPACKED DEFINE LCURLY RCURLY STRING UNION %token HELPER_STRING COMMA -%token NOVERSION MANUAL_PRINT MANUAL_ENDIAN TYPEONLY DONT_TRACE +%token NOVERSION MANUAL_PRINT MANUAL_ENDIAN TYPEONLY DONT_TRACE AUTOREPLY %% @@ -64,6 +64,7 @@ flag: | MANUAL_ENDIAN {$$ = $1;} | DONT_TRACE {$$ = $1;} | TYPEONLY {$$ = $1;} + | AUTOREPLY {$$ = $1;} ; defn: DEFINE NAME LCURLY defbody RCURLY SEMI diff --git a/src/tools/vppapigen/lex.c b/src/tools/vppapigen/lex.c index 733942ad..e6358143 100644 --- a/src/tools/vppapigen/lex.c +++ b/src/tools/vppapigen/lex.c @@ -27,6 +27,9 @@ #include "lex.h" #include "node.h" #include "tools/vppapigen/gram.h" +#include +#include +#include FILE *ifp, *ofp, *pythonfp, *jsonfp; char *vlib_app_name = "vpp"; @@ -38,6 +41,9 @@ int current_filename_allocated; unsigned long input_crc; unsigned long message_crc; int yydebug; +char *push_input_fifo; +char saved_ungetc_char; +char have_ungetc_char; /* * lexer variable definitions @@ -469,9 +475,50 @@ static char namebuf [MAXNAME]; static inline char getc_char (FILE *ifp) { + char rv; + + if (have_ungetc_char) { + have_ungetc_char = 0; + return saved_ungetc_char; + } + + if (clib_fifo_elts (push_input_fifo)) { + clib_fifo_sub1(push_input_fifo, rv); + return (rv & 0x7f); + } return ((char)(getc(ifp) & 0x7f)); } +u32 fe (char *fifo) +{ + return clib_fifo_elts (fifo); +} + +static inline void +ungetc_char (char c, FILE *ifp) +{ + saved_ungetc_char = c; + have_ungetc_char = 1; +} + +void autoreply (void *np_arg) +{ + static u8 *s; + node_t *np = (node_t *)np_arg; + int i; + + vec_reset_length (s); + + s = format (0, " define %s_reply\n", (char *)(np->data[0])); + s = format (s, "{\n"); + s = format (s, " u32 context;\n"); + s = format (s, " i32 retval;\n"); + s = format (s, "};\n"); + + for (i = 0; i < vec_len (s); i++) + clib_fifo_add1 (push_input_fifo, s[i]); +} + /* * yylex (well, yylex_1: The real yylex below does crc-hackery) */ @@ -595,7 +642,7 @@ static int yylex_1 (void) return (EOF); if (!isalnum (c) && c != '_') { - ungetc (c, ifp); + ungetc_char (c, ifp); namebuf [nameidx] = 0; the_lexer_state = START_STATE; return (name_check (namebuf, &yylval)); @@ -616,7 +663,7 @@ static int yylex_1 (void) return (EOF); if (!isdigit (c)) { - ungetc (c, ifp); + ungetc_char (c, ifp); namebuf [nameidx] = 0; the_lexer_state = START_STATE; yylval = (void *) atol(namebuf); @@ -889,6 +936,7 @@ int yylex (void) case MANUAL_ENDIAN: code = 276; break; case TYPEONLY: code = 278; break; case DONT_TRACE: code = 279; break; + case AUTOREPLY: code = 280; break; case EOF: code = ~0; break; /* hysterical compatibility */ @@ -929,6 +977,7 @@ static struct keytab { } keytab [] = /* Keep the table sorted, binary search used below! */ { + {"autoreply", NODE_AUTOREPLY}, {"define", NODE_DEFINE}, {"dont_trace", NODE_DONT_TRACE}, {"f64", NODE_F64}, @@ -1005,6 +1054,10 @@ static int name_check (const char *s, YYSTYPE *token_value) *token_value = (YYSTYPE) NODE_FLAG_DONT_TRACE; return(DONT_TRACE); + case NODE_AUTOREPLY: + *token_value = (YYSTYPE) NODE_FLAG_AUTOREPLY; + return(AUTOREPLY); + case NODE_NOVERSION: return(NOVERSION); diff --git a/src/tools/vppapigen/lex.h b/src/tools/vppapigen/lex.h index a0fdc735..275cf685 100644 --- a/src/tools/vppapigen/lex.h +++ b/src/tools/vppapigen/lex.h @@ -24,6 +24,7 @@ extern int yylex (void); extern void yyerror (char *); extern int yyparse (void); +extern void autoreply (void *); #ifndef YYSTYPE #define YYSTYPE void * diff --git a/src/tools/vppapigen/node.c b/src/tools/vppapigen/node.c index 359ac9c9..9f234037 100644 --- a/src/tools/vppapigen/node.c +++ b/src/tools/vppapigen/node.c @@ -1050,6 +1050,11 @@ YYSTYPE set_flags(YYSTYPE a1, YYSTYPE a2) flags = (int)(uword) a1; np->flags |= flags; + + /* Generate a foo_reply_t right here */ + if (flags & NODE_FLAG_AUTOREPLY) + autoreply(np); + return (a2); } /* diff --git a/src/tools/vppapigen/node.h b/src/tools/vppapigen/node.h index 297d6036..65bd5d10 100644 --- a/src/tools/vppapigen/node.h +++ b/src/tools/vppapigen/node.h @@ -53,6 +53,7 @@ enum node_subclass { /* WARNING: indices must match the vft... */ NODE_MANUAL_PRINT, NODE_MANUAL_ENDIAN, NODE_DONT_TRACE, + NODE_AUTOREPLY, }; enum passid { @@ -84,6 +85,7 @@ typedef struct node_ { #define NODE_FLAG_MANUAL_ENDIAN (1<<1) #define NODE_FLAG_TYPEONLY (1<<3) #define NODE_FLAG_DONT_TRACE (1<<4) +#define NODE_FLAG_AUTOREPLY (1<<5) typedef struct node_vft_ { void (*print)(struct node_ *); diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api index c38b483c..32e51407 100644 --- a/src/vlibmemory/memclnt.api +++ b/src/vlibmemory/memclnt.api @@ -72,7 +72,7 @@ define memclnt_read_timeout { /* * RPC */ -define rpc_call { +autoreply define rpc_call { u32 client_index; u32 context; u64 function; @@ -82,11 +82,6 @@ define rpc_call { u8 data[0]; }; -define rpc_reply { - i32 retval; - u32 context; -}; - /* * Lookup message-ID base by name */ diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index 7a536ee8..43574dea 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -1275,7 +1275,7 @@ VLIB_CLI_COMMAND (cli_show_api_plugin_command, static) = { static void vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) { - vl_api_rpc_reply_t *rmp; + vl_api_rpc_call_reply_t *rmp; int (*fp) (void *); i32 rv = 0; vlib_main_t *vm = vlib_get_main (); @@ -1305,7 +1305,7 @@ vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) if (q) { rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_RPC_REPLY); + rmp->_vl_msg_id = ntohs (VL_API_RPC_CALL_REPLY); rmp->context = mp->context; rmp->retval = rv; vl_msg_api_send_shmem (q, (u8 *) & rmp); @@ -1318,7 +1318,7 @@ vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) } static void -vl_api_rpc_reply_t_handler (vl_api_rpc_reply_t * mp) +vl_api_rpc_call_reply_t_handler (vl_api_rpc_call_reply_t * mp) { clib_warning ("unimplemented"); } @@ -1415,7 +1415,7 @@ vl_api_trace_plugin_msg_ids_t_handler (vl_api_trace_plugin_msg_ids_t * mp) #define foreach_rpc_api_msg \ _(RPC_CALL,rpc_call) \ -_(RPC_REPLY,rpc_reply) +_(RPC_CALL_REPLY,rpc_call_reply) #define foreach_plugin_trace_msg \ _(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids) diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api index 2cdcfad3..7bcaa4c3 100644 --- a/src/vnet/bfd/bfd.api +++ b/src/vnet/bfd/bfd.api @@ -18,43 +18,23 @@ @param context - sender context, to match reply w/ request @param sw_if_index - interface to use as echo source */ -define bfd_udp_set_echo_source +autoreply define bfd_udp_set_echo_source { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Set BFD feature response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_set_echo_source_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete BFD echo source @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define bfd_udp_del_echo_source +autoreply define bfd_udp_del_echo_source { u32 client_index; u32 context; }; -/** \brief Delete BFD echo source response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_del_echo_source_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -69,7 +49,7 @@ define bfd_udp_del_echo_source_reply @param bfd_key_id - key id sent out in BFD packets (if is_authenticated) @param conf_key_id - id of already configured key (if is_authenticated) */ -define bfd_udp_add +autoreply define bfd_udp_add { u32 client_index; u32 context; @@ -85,16 +65,6 @@ define bfd_udp_add u32 conf_key_id; }; -/** \brief Add UDP BFD session response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_add_reply -{ - u32 context; - i32 retval; -}; - /** \brief Modify UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -106,7 +76,7 @@ define bfd_udp_add_reply @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param detect_mult - detect multiplier (# of packets missed before connection goes down) */ -define bfd_udp_mod +autoreply define bfd_udp_mod { u32 client_index; u32 context; @@ -119,16 +89,6 @@ define bfd_udp_mod u8 detect_mult; }; -/** \brief Modify UDP BFD session response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_mod_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -137,7 +97,7 @@ define bfd_udp_mod_reply @param peer_addr - peer address @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 */ -define bfd_udp_del +autoreply define bfd_udp_del { u32 client_index; u32 context; @@ -147,16 +107,6 @@ define bfd_udp_del u8 is_ipv6; }; -/** \brief Delete UDP BFD session response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get all BFD sessions @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -206,7 +156,7 @@ define bfd_udp_session_details @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param admin_up_down - set the admin state, 1 = up, 0 = down */ -define bfd_udp_session_set_flags +autoreply define bfd_udp_session_set_flags { u32 client_index; u32 context; @@ -217,23 +167,13 @@ define bfd_udp_session_set_flags u8 admin_up_down; }; -/** \brief Reply to bfd_udp_session_set_flags - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define bfd_udp_session_set_flags_reply -{ - u32 context; - i32 retval; -}; - /** \brief Register for BFD events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param enable_disable - 1 => register for events, 0 => cancel registration @param pid - sender's pid */ -define want_bfd_events +autoreply define want_bfd_events { u32 client_index; u32 context; @@ -241,16 +181,6 @@ define want_bfd_events u32 pid; }; -/** \brief Reply for BFD events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_bfd_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief BFD UDP - add/replace key to configuration @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -259,7 +189,7 @@ define want_bfd_events_reply @param auth_type - authentication type (RFC 5880/4.1/Auth Type) @param key - key data */ -define bfd_auth_set_key +autoreply define bfd_auth_set_key { u32 client_index; u32 context; @@ -269,16 +199,6 @@ define bfd_auth_set_key u8 key[20]; }; -/** \brief BFD UDP - add/replace key reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_auth_set_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief BFD UDP - delete key from configuration @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -286,23 +206,13 @@ define bfd_auth_set_key_reply @param key_len - length of key (must be non-zero) @param key - key data */ -define bfd_auth_del_key +autoreply define bfd_auth_del_key { u32 client_index; u32 context; u32 conf_key_id; }; -/** \brief BFD UDP - delete key reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_auth_del_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get a list of configured authentication keys @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -338,7 +248,7 @@ define bfd_auth_keys_details @param bfd_key_id - key id sent out in BFD packets @param conf_key_id - id of already configured key */ -define bfd_udp_auth_activate +autoreply define bfd_udp_auth_activate { u32 client_index; u32 context; @@ -351,16 +261,6 @@ define bfd_udp_auth_activate u32 conf_key_id; }; -/** \brief BFD UDP - activate/change authentication reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_udp_auth_activate_reply -{ - u32 context; - i32 retval; -}; - /** \brief BFD UDP - deactivate authentication @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -370,7 +270,7 @@ define bfd_udp_auth_activate_reply @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param is_delayed - change is applied once peer applies the change (on first received non-authenticated packet) */ -define bfd_udp_auth_deactivate +autoreply define bfd_udp_auth_deactivate { u32 client_index; u32 context; @@ -381,16 +281,6 @@ define bfd_udp_auth_deactivate u8 is_delayed; }; -/** \brief BFD UDP - deactivate authentication reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_udp_auth_deactivate_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/classify/classify.api b/src/vnet/classify/classify.api index 51ebd6c8..cacb9bed 100644 --- a/src/vnet/classify/classify.api +++ b/src/vnet/classify/classify.api @@ -92,7 +92,7 @@ define classify_add_del_table_reply VRF id if action is 1 or 2. @param match[] - for add, match value for session, required */ -define classify_add_del_session +autoreply define classify_add_del_session { u32 client_index; u32 context; @@ -106,16 +106,6 @@ define classify_add_del_session u8 match[0]; }; -/** \brief Classify add / del session response - @param context - sender context, to match reply w/ request - @param retval - return code for the add/del session request -*/ -define classify_add_del_session_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset policer classify interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -127,7 +117,7 @@ define classify_add_del_session_reply Note: User is recommeneded to use just one valid table_index per call. (ip4_table_index, ip6_table_index, or l2_table_index) */ -define policer_classify_set_interface +autoreply define policer_classify_set_interface { u32 client_index; u32 context; @@ -138,16 +128,6 @@ define policer_classify_set_interface u8 is_add; }; -/** \brief Set/unset policer classify interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define policer_classify_set_interface_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get list of policer classify interfaces and tables @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -308,7 +288,7 @@ define classify_session_details Note: User is recommeneded to use just one valid table_index per call. (ip4_table_index, ip6_table_index, or l2_table_index) */ -define flow_classify_set_interface { +autoreply define flow_classify_set_interface { u32 client_index; u32 context; u32 sw_if_index; @@ -317,15 +297,6 @@ define flow_classify_set_interface { u8 is_add; }; -/** \brief Set/unset flow classify interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define flow_classify_set_interface_reply { - u32 context; - i32 retval; -}; - /** \brief Get list of flow classify interfaces and tables @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -353,4 +324,4 @@ define flow_classify_details { * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/cop/cop.api b/src/vnet/cop/cop.api index b34dae80..69316001 100644 --- a/src/vnet/cop/cop.api +++ b/src/vnet/cop/cop.api @@ -20,7 +20,7 @@ @param enable_disable - 1 => enable, 0 => disable */ -define cop_interface_enable_disable +autoreply define cop_interface_enable_disable { u32 client_index; u32 context; @@ -28,17 +28,6 @@ define cop_interface_enable_disable u8 enable_disable; }; -/** \brief cop: interface enable/disable junk filtration reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define cop_interface_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief cop: enable/disable whitelist filtration features on an interface Note: the supplied fib_id must match in order to remove the feature! @@ -51,7 +40,7 @@ define cop_interface_enable_disable_reply @param default_cop - 1 => enable non-ip4, non-ip6 filtration 0=> disable it */ -define cop_whitelist_enable_disable +autoreply define cop_whitelist_enable_disable { u32 client_index; u32 context; @@ -62,17 +51,6 @@ define cop_whitelist_enable_disable u8 default_cop; }; -/** \brief cop: interface enable/disable junk filtration reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define cop_whitelist_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief get_node_graph - get a copy of the vpp node graph including the current set of graph arcs. @@ -85,4 +63,4 @@ define cop_whitelist_enable_disable_reply * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/devices/af_packet/af_packet.api b/src/vnet/devices/af_packet/af_packet.api index 9fb2a207..8d40ad60 100644 --- a/src/vnet/devices/af_packet/af_packet.api +++ b/src/vnet/devices/af_packet/af_packet.api @@ -46,7 +46,7 @@ define af_packet_create_reply @param context - sender context, to match reply w/ request @param host_if_name - interface name */ -define af_packet_delete +autoreply define af_packet_delete { u32 client_index; u32 context; @@ -54,16 +54,6 @@ define af_packet_delete u8 host_if_name[64]; }; -/** \brief Delete host-interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define af_packet_delete_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/devices/netmap/netmap.api b/src/vnet/devices/netmap/netmap.api index 377ccffd..8dc698b9 100644 --- a/src/vnet/devices/netmap/netmap.api +++ b/src/vnet/devices/netmap/netmap.api @@ -22,7 +22,7 @@ @param is_pipe - is pipe @param is_master - 0=slave, 1=master */ -define netmap_create +autoreply define netmap_create { u32 client_index; u32 context; @@ -34,22 +34,12 @@ define netmap_create u8 is_master; }; -/** \brief Create netmap response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define netmap_create_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete netmap @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param netmap_if_name - interface name */ -define netmap_delete +autoreply define netmap_delete { u32 client_index; u32 context; @@ -57,16 +47,6 @@ define netmap_delete u8 netmap_if_name[64]; }; -/** \brief Delete netmap response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define netmap_delete_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/vnet/devices/virtio/vhost_user.api index 4f604e45..df7ce7ab 100644 --- a/src/vnet/devices/virtio/vhost_user.api +++ b/src/vnet/devices/virtio/vhost_user.api @@ -53,7 +53,7 @@ define create_vhost_user_if_reply @param sock_filename - unix socket filename, used to speak with frontend @param operation_mode - polling=0, interrupt=1, or adaptive=2 */ -define modify_vhost_user_if +autoreply define modify_vhost_user_if { u32 client_index; u32 context; @@ -65,36 +65,16 @@ define modify_vhost_user_if u8 operation_mode; }; -/** \brief vhost-user interface modify response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define modify_vhost_user_if_reply -{ - u32 context; - i32 retval; -}; - /** \brief vhost-user interface delete request @param client_index - opaque cookie to identify the sender */ -define delete_vhost_user_if +autoreply define delete_vhost_user_if { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief vhost-user interface delete response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define delete_vhost_user_if_reply -{ - u32 context; - i32 retval; -}; - /** \brief Vhost-user interface details structure (fix this) @param sw_if_index - index of the interface @param interface_name - name of interface diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api index 2db85a79..eb0b070d 100644 --- a/src/vnet/dhcp/dhcp.api +++ b/src/vnet/dhcp/dhcp.api @@ -24,7 +24,7 @@ @param dhcp_server[] - server address @param dhcp_src_address[] - */ -define dhcp_proxy_config +autoreply define dhcp_proxy_config { u32 client_index; u32 context; @@ -36,16 +36,6 @@ define dhcp_proxy_config u8 dhcp_src_address[16]; }; -/** \brief DHCP Proxy config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_proxy_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief DHCP Proxy set / unset vss request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -55,7 +45,7 @@ define dhcp_proxy_config_reply @param is_ipv6 - ip6 if non-zero, else ip4 @param is_add - set vss if non-zero, else delete */ -define dhcp_proxy_set_vss +autoreply define dhcp_proxy_set_vss { u32 client_index; u32 context; @@ -66,16 +56,6 @@ define dhcp_proxy_set_vss u8 is_add; }; -/** \brief DHCP proxy set / unset vss response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_proxy_set_vss_reply -{ - u32 context; - i32 retval; -}; - /** \brief DHCP Client config add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -86,7 +66,7 @@ define dhcp_proxy_set_vss_reply via dhcp_compl_event API message if non-zero @param pid - sender's pid */ -define dhcp_client_config +autoreply define dhcp_client_config { u32 client_index; u32 context; @@ -97,16 +77,6 @@ define dhcp_client_config u32 pid; }; -/** \brief DHCP Client config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_client_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief Tell client about a DHCP completion event @param client_index - opaque cookie to identify the sender @param pid - client pid registered to receive notification @@ -162,4 +132,4 @@ manual_endian manual_print define dhcp_proxy_details * Local Variables: * eval: (c-set-style "gnu") * End: - */ \ No newline at end of file + */ diff --git a/src/vnet/flow/flow.api b/src/vnet/flow/flow.api index 0e0f99bf..1c5e8c5c 100644 --- a/src/vnet/flow/flow.api +++ b/src/vnet/flow/flow.api @@ -24,7 +24,7 @@ @param template_interval - number of seconds after which to resend template @param udp_checksum - UDP checksum calculation enable flag */ -define set_ipfix_exporter +autoreply define set_ipfix_exporter { u32 client_index; u32 context; @@ -37,15 +37,6 @@ define set_ipfix_exporter u8 udp_checksum; }; -/** \brief Reply to IPFIX exporter configure request - @param context - sender context which was passed in the request -*/ -define set_ipfix_exporter_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPFIX exporter dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -84,22 +75,13 @@ define ipfix_exporter_details @param domain_id - domain ID reported in IPFIX messages for classify stream @param src_port - source port of UDP session for classify stream */ -define set_ipfix_classify_stream { +autoreply define set_ipfix_classify_stream { u32 client_index; u32 context; u32 domain_id; u16 src_port; }; -/** \brief IPFIX classify stream configure response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define set_ipfix_classify_stream_reply { - u32 context; - i32 retval; -}; - /** \brief IPFIX classify stream dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -127,7 +109,7 @@ define ipfix_classify_stream_details { @param ip_version - version of IP used in the classifier table @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified */ -define ipfix_classify_table_add_del { +autoreply define ipfix_classify_table_add_del { u32 client_index; u32 context; u32 table_id; @@ -136,14 +118,6 @@ define ipfix_classify_table_add_del { u8 is_add; }; -/** \brief IPFIX add classifier table response - @param context - sender context which was passed in the request -*/ -define ipfix_classify_table_add_del_reply { - u32 context; - i32 retval; -}; - /** \brief IPFIX classify tables dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/interface.api b/src/vnet/interface.api index 85fd73fb..9df63f18 100644 --- a/src/vnet/interface.api +++ b/src/vnet/interface.api @@ -6,7 +6,7 @@ @param link_up_down - Oper state sent on change event, not used in config. @param deleted - interface was deleted */ -define sw_interface_set_flags +autoreply define sw_interface_set_flags { u32 client_index; u32 context; @@ -17,23 +17,13 @@ define sw_interface_set_flags u8 deleted; }; -/** \brief Reply to sw_interface_set_flags - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define sw_interface_set_flags_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set interface MTU @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - index of the interface to set MTU on @param mtu - MTU */ -define sw_interface_set_mtu +autoreply define sw_interface_set_mtu { u32 client_index; u32 context; @@ -41,23 +31,13 @@ define sw_interface_set_mtu u16 mtu; }; -/** \brief Reply to sw_interface_set_mtu - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define sw_interface_set_mtu_reply -{ - u32 context; - i32 retval; -}; - /** \brief Register for interface events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param enable_disable - 1 => register for events, 0 => cancel registration @param pid - sender's pid */ -define want_interface_events +autoreply define want_interface_events { u32 client_index; u32 context; @@ -65,16 +45,6 @@ define want_interface_events u32 pid; }; -/** \brief Reply for interface events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_interface_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface details structure (fix this) @param sw_if_index - index of the interface @param sup_sw_if_index - index of parent interface if any, else same as sw_if_index @@ -184,7 +154,7 @@ define sw_interface_dump @param address_length - address length in bytes, 4 for ip4, 16 for ip6 @param address - array of address bytes */ -define sw_interface_add_del_address +autoreply define sw_interface_add_del_address { u32 client_index; u32 context; @@ -196,16 +166,6 @@ define sw_interface_add_del_address u8 address[16]; }; -/** \brief Reply to sw_interface_add_del_address - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define sw_interface_add_del_address_reply -{ - u32 context; - i32 retval; -}; - /** \brief Associate the specified interface with a fib table @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -213,7 +173,7 @@ define sw_interface_add_del_address_reply @param is_ipv6 - if non-zero ipv6, else ipv4 @param vrf_id - fib table/vrd id to associate the interface with */ -define sw_interface_set_table +autoreply define sw_interface_set_table { u32 client_index; u32 context; @@ -222,16 +182,6 @@ define sw_interface_set_table u32 vrf_id; }; -/** \brief Reply to sw_interface_set_table - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define sw_interface_set_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get VRF id assigned to interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -280,7 +230,7 @@ define vnet_interface_counters @param unnumbered_sw_if_index - interface which will use the address @param is_add - if non-zero set the association, else unset it */ -define sw_interface_set_unnumbered +autoreply define sw_interface_set_unnumbered { u32 client_index; u32 context; @@ -289,38 +239,18 @@ define sw_interface_set_unnumbered u8 is_add; }; -/** \brief Set unnumbered interface add / del response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_set_unnumbered_reply -{ - u32 context; - i32 retval; -}; - /** \brief Clear interface statistics @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - index of the interface to clear statistics */ -define sw_interface_clear_stats +autoreply define sw_interface_clear_stats { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Reply to sw_interface_clear_stats - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define sw_interface_clear_stats_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set / clear software interface tag @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -328,7 +258,7 @@ define sw_interface_clear_stats_reply @param add_del - 1 = add, 0 = delete @param tag - an ascii tag */ -define sw_interface_tag_add_del +autoreply define sw_interface_tag_add_del { u32 client_index; u32 context; @@ -337,23 +267,13 @@ define sw_interface_tag_add_del u8 tag[64]; }; -/** \brief Reply to set / clear software interface tag - @param context - sender context which was passed in the request - @param retval - return code for the request -*/ -define sw_interface_tag_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set an interface's MAC address @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - the interface whose MAC will be set @param mac_addr - the new MAC address */ -define sw_interface_set_mac_address +autoreply define sw_interface_set_mac_address { u32 client_index; u32 context; @@ -361,16 +281,6 @@ define sw_interface_set_mac_address u8 mac_address[6]; }; -/** \brief Reply to setting an interface MAC address request - @param context - sender context which was passed in the request - @param retval - return code for the request -*/ -define sw_interface_set_mac_address_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api index 6af1714f..7097a130 100644 --- a/src/vnet/ip/ip.api +++ b/src/vnet/ip/ip.api @@ -136,7 +136,7 @@ define ip_neighbor_details { @param mac_address - l2 address of the neighbor @param dst_address - ip4 or ip6 address of the neighbor */ -define ip_neighbor_add_del +autoreply define ip_neighbor_add_del { u32 client_index; u32 context; @@ -150,16 +150,6 @@ define ip_neighbor_add_del u8 dst_address[16]; }; -/** \brief Reply for IP Neighbor add / delete request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_neighbor_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set the ip flow hash config for a fib request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -172,7 +162,7 @@ define ip_neighbor_add_del_reply @param proto -if non-zero include proto in flow hash @param reverse - if non-zero include reverse in flow hash */ -define set_ip_flow_hash +autoreply define set_ip_flow_hash { u32 client_index; u32 context; @@ -186,16 +176,6 @@ define set_ip_flow_hash u8 reverse; }; -/** \brief Set the ip flow hash config for a fib response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define set_ip_flow_hash_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 router advertisement config request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -213,7 +193,7 @@ define set_ip_flow_hash_reply @param initial_count - @param initial_interval - */ -define sw_interface_ip6nd_ra_config +autoreply define sw_interface_ip6nd_ra_config { u32 client_index; u32 context; @@ -233,16 +213,6 @@ define sw_interface_ip6nd_ra_config u32 initial_interval; }; -/** \brief IPv6 router advertisement config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_ip6nd_ra_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 router advertisement prefix config request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -272,7 +242,7 @@ define sw_interface_ip6nd_ra_config_reply preferred [ADDRCONF]. A value of all one bits (0xffffffff) represents infinity. */ -define sw_interface_ip6nd_ra_prefix +autoreply define sw_interface_ip6nd_ra_prefix { u32 client_index; u32 context; @@ -289,16 +259,6 @@ define sw_interface_ip6nd_ra_prefix u32 pref_lifetime; }; -/** \brief IPv6 router advertisement prefix config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_ip6nd_ra_prefix_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 ND proxy config @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -306,7 +266,7 @@ define sw_interface_ip6nd_ra_prefix_reply @param address - The address of the host for which to proxy for @param is_add - Adding or deleting */ -define ip6nd_proxy_add_del +autoreply define ip6nd_proxy_add_del { u32 client_index; u32 context; @@ -315,16 +275,6 @@ define ip6nd_proxy_add_del u8 address[16]; }; -/** \brief IPv6 ND proxy response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define ip6nd_proxy_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 ND proxy details returned after request @param context - sender context, to match reply w/ request @param retval - return code for the request @@ -355,7 +305,7 @@ define ip6nd_proxy_dump @param sw_if_index - interface used to reach neighbor @param enable - if non-zero enable ip6 on interface, else disable */ -define sw_interface_ip6_enable_disable +autoreply define sw_interface_ip6_enable_disable { u32 client_index; u32 context; @@ -363,23 +313,13 @@ define sw_interface_ip6_enable_disable u8 enable; /* set to true if enable */ }; -/** \brief IPv6 interface enable / disable response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_ip6_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 set link local address on interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - interface to set link local on @param address[] - the new link local address */ -define sw_interface_ip6_set_link_local_address +autoreply define sw_interface_ip6_set_link_local_address { u32 client_index; u32 context; @@ -387,16 +327,6 @@ define sw_interface_ip6_set_link_local_address u8 address[16]; }; -/** \brief IPv6 set link local address on interface response - @param context - sender context, to match reply w/ request - @param retval - error code for the request -*/ -define sw_interface_ip6_set_link_local_address_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add / del route request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -422,7 +352,7 @@ define sw_interface_ip6_set_link_local_address_reply @param next_hop_out_label_stack - the next-hop output label stack, outer most first @param next_hop_via_label - The next-hop is a resolved via a local label */ -define ip_add_del_route +autoreply define ip_add_del_route { u32 client_index; u32 context; @@ -452,16 +382,6 @@ define ip_add_del_route u32 next_hop_out_label_stack[next_hop_n_out_labels]; }; -/** \brief Reply for add / del route request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_add_del_route_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add / del route request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -470,7 +390,7 @@ define ip_add_del_route_reply FIXME */ -define ip_mroute_add_del +autoreply define ip_mroute_add_del { u32 client_index; u32 context; @@ -488,16 +408,6 @@ define ip_mroute_add_del u8 src_address[16]; }; -/** \brief Reply for add / del mroute request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_mroute_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump IP multicast fib table @param client_index - opaque cookie to identify the sender */ diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api index ef090f84..203c5272 100644 --- a/src/vnet/ipsec/ipsec.api +++ b/src/vnet/ipsec/ipsec.api @@ -20,7 +20,7 @@ @param spd_id - SPD instance id (control plane allocated) */ -define ipsec_spd_add_del +autoreply define ipsec_spd_add_del { u32 client_index; u32 context; @@ -28,17 +28,6 @@ define ipsec_spd_add_del u32 spd_id; }; -/** \brief Reply for IPsec: Add/delete Security Policy Database entry - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_spd_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Add/delete SPD from interface @param client_index - opaque cookie to identify the sender @@ -49,7 +38,7 @@ define ipsec_spd_add_del_reply */ -define ipsec_interface_add_del_spd +autoreply define ipsec_interface_add_del_spd { u32 client_index; u32 context; @@ -59,17 +48,6 @@ define ipsec_interface_add_del_spd u32 spd_id; }; -/** \brief Reply for IPsec: Add/delete SPD from interface - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_interface_add_del_spd_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Add/delete Security Policy Database entry See RFC 4301, 4.4.1.1 on how to match packet to selectors @@ -95,7 +73,7 @@ define ipsec_interface_add_del_spd_reply */ -define ipsec_spd_add_del_entry +autoreply define ipsec_spd_add_del_entry { u32 client_index; u32 context; @@ -125,17 +103,6 @@ define ipsec_spd_add_del_entry u32 sa_id; }; -/** \brief Reply for IPsec: Add/delete Security Policy Database entry - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_spd_add_del_entry_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Add/delete Security Association Database entry @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -167,7 +134,7 @@ define ipsec_spd_add_del_entry_reply IPsec tunnel address copy mode (to support GDOI) */ -define ipsec_sad_add_del_entry +autoreply define ipsec_sad_add_del_entry { u32 client_index; u32 context; @@ -195,17 +162,6 @@ define ipsec_sad_add_del_entry u8 tunnel_dst_address[16]; }; -/** \brief Reply for IPsec: Add/delete Security Association Database entry - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_sad_add_del_entry_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Update Security Association keys @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -219,7 +175,7 @@ define ipsec_sad_add_del_entry_reply @param integrity_key - integrity keying material */ -define ipsec_sa_set_key +autoreply define ipsec_sa_set_key { u32 client_index; u32 context; @@ -233,17 +189,6 @@ define ipsec_sa_set_key u8 integrity_key[128]; }; -/** \brief Reply for IPsec: Update Security Association keys - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_sa_set_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Add/delete profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -251,7 +196,7 @@ define ipsec_sa_set_key_reply @param name - IKEv2 profile name @param is_add - Add IKEv2 profile if non-zero, else delete */ -define ikev2_profile_add_del +autoreply define ikev2_profile_add_del { u32 client_index; u32 context; @@ -260,16 +205,6 @@ define ikev2_profile_add_del u8 is_add; }; -/** \brief Reply for IKEv2: Add/delete profile - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 profile authentication method @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -280,7 +215,7 @@ define ikev2_profile_add_del_reply @param data_len - Authentication data length @param data - Authentication data (for rsa-sig cert file path) */ -define ikev2_profile_set_auth +autoreply define ikev2_profile_set_auth { u32 client_index; u32 context; @@ -292,16 +227,6 @@ define ikev2_profile_set_auth u8 data[0]; }; -/** \brief Reply for IKEv2: Set IKEv2 profile authentication method - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_set_auth_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 profile local/remote identification @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -312,7 +237,7 @@ define ikev2_profile_set_auth_reply @param data_len - Identification data length @param data - Identification data */ -define ikev2_profile_set_id +autoreply define ikev2_profile_set_id { u32 client_index; u32 context; @@ -324,16 +249,6 @@ define ikev2_profile_set_id u8 data[0]; }; -/** \brief Reply for IKEv2: - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_set_id_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 profile traffic selector parameters @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -346,7 +261,7 @@ define ikev2_profile_set_id_reply @param start_addr - The smallest address included in traffic selector @param end_addr - The largest address included in traffic selector */ -define ikev2_profile_set_ts +autoreply define ikev2_profile_set_ts { u32 client_index; u32 context; @@ -360,23 +275,13 @@ define ikev2_profile_set_ts u32 end_addr; }; -/** \brief Reply for IKEv2: Set IKEv2 profile traffic selector parameters - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_set_ts_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 local RSA private key @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param key_file - Key file absolute path */ -define ikev2_set_local_key +autoreply define ikev2_set_local_key { u32 client_index; u32 context; @@ -384,16 +289,6 @@ define ikev2_set_local_key u8 key_file[256]; }; -/** \brief Reply for IKEv2: Set IKEv2 local key - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_local_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 responder interface and IP address @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -402,7 +297,7 @@ define ikev2_set_local_key_reply @param sw_if_index - interface index @param address - interface address */ -define ikev2_set_responder +autoreply define ikev2_set_responder { u32 client_index; u32 context; @@ -412,17 +307,6 @@ define ikev2_set_responder u8 address[4]; }; -/** \brief Reply for IKEv2: Set IKEv2 responder interface and IP address - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_responder_reply -{ - u32 context; - i32 retval; -}; - - /** \brief IKEv2: Set IKEv2 IKE transforms in SA_INIT proposal (RFC 7296) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -434,7 +318,7 @@ define ikev2_set_responder_reply @param dh_group - Diffie-Hellman group */ -define ikev2_set_ike_transforms +autoreply define ikev2_set_ike_transforms { u32 client_index; u32 context; @@ -446,16 +330,6 @@ define ikev2_set_ike_transforms u32 dh_group; }; -/** \brief Reply for IKEv2: Set IKEv2 IKE transforms - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_ike_transforms_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 ESP transforms in SA_INIT proposal (RFC 7296) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -467,7 +341,7 @@ define ikev2_set_ike_transforms_reply @param dh_group - Diffie-Hellman group */ -define ikev2_set_esp_transforms +autoreply define ikev2_set_esp_transforms { u32 client_index; u32 context; @@ -479,16 +353,6 @@ define ikev2_set_esp_transforms u32 dh_group; }; -/** \brief Reply for IKEv2: Set IKEv2 ESP transforms - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_esp_transforms_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set Child SA lifetime, limited by time and/or data @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -500,7 +364,7 @@ define ikev2_set_esp_transforms_reply @param lifetime_maxdata - SA maximum life time in bytes (0 to disable) */ -define ikev2_set_sa_lifetime +autoreply define ikev2_set_sa_lifetime { u32 client_index; u32 context; @@ -512,16 +376,6 @@ define ikev2_set_sa_lifetime u64 lifetime_maxdata; }; -/** \brief Reply for IKEv2: Set Child SA lifetime - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_sa_lifetime_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the SA_INIT exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -529,7 +383,7 @@ define ikev2_set_sa_lifetime_reply @param name - IKEv2 profile name */ -define ikev2_initiate_sa_init +autoreply define ikev2_initiate_sa_init { u32 client_index; u32 context; @@ -537,16 +391,6 @@ define ikev2_initiate_sa_init u8 name[64]; }; -/** \brief Reply for IKEv2: Initiate the SA_INIT exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_sa_init_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the delete IKE SA exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -554,7 +398,7 @@ define ikev2_initiate_sa_init_reply @param ispi - IKE SA initiator SPI */ -define ikev2_initiate_del_ike_sa +autoreply define ikev2_initiate_del_ike_sa { u32 client_index; u32 context; @@ -562,16 +406,6 @@ define ikev2_initiate_del_ike_sa u64 ispi; }; -/** \brief Reply for IKEv2: Initiate the delete IKE SA exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_del_ike_sa_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the delete Child SA exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -579,7 +413,7 @@ define ikev2_initiate_del_ike_sa_reply @param ispi - Child SA initiator SPI */ -define ikev2_initiate_del_child_sa +autoreply define ikev2_initiate_del_child_sa { u32 client_index; u32 context; @@ -587,16 +421,6 @@ define ikev2_initiate_del_child_sa u32 ispi; }; -/** \brief Reply for IKEv2: Initiate the delete Child SA exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_del_child_sa_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the rekey Child SA exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -604,7 +428,7 @@ define ikev2_initiate_del_child_sa_reply @param ispi - Child SA initiator SPI */ -define ikev2_initiate_rekey_child_sa +autoreply define ikev2_initiate_rekey_child_sa { u32 client_index; u32 context; @@ -612,16 +436,6 @@ define ikev2_initiate_rekey_child_sa u32 ispi; }; -/** \brief Reply for IKEv2: Initiate the rekey Child SA exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_rekey_child_sa_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump ipsec policy database data @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -682,4 +496,4 @@ define ipsec_spd_details { * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index c23eebec..db42d635 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -70,66 +70,36 @@ define l2_fib_table_dump @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define l2_fib_clear_table +autoreply define l2_fib_clear_table { u32 client_index; u32 context; }; -/** \brief L2 fib clear table response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_fib_clear_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 FIB flush bridge domain entries @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bd_id - the entry's bridge domain id */ -define l2fib_flush_bd +autoreply define l2fib_flush_bd { u32 client_index; u32 context; u32 bd_id; }; -/** \brief L2 FIB flush bridge domain entries response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2fib_flush_bd_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 FIB flush interface entries @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bd_id - the entry's bridge domain id */ -define l2fib_flush_int +autoreply define l2fib_flush_int { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief L2 FIB flush interface entries response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2fib_flush_int_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 FIB add entry request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -140,7 +110,7 @@ define l2fib_flush_int_reply @param static_mac - @param filter_mac - */ -define l2fib_add_del +autoreply define l2fib_add_del { u32 client_index; u32 context; @@ -153,16 +123,6 @@ define l2fib_add_del u8 bvi_mac; }; -/** \brief L2 FIB add entry response - @param context - sender context, to match reply w/ request - @param retval - return code for the add l2fib entry request -*/ -define l2fib_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set L2 flags request !!! TODO - need more info, feature bits in l2_input.h @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -196,7 +156,7 @@ define l2_flags_reply @param bd_id - the bridge domain to create @param mac_age - mac aging time in min, 0 for disabled */ -define bridge_domain_set_mac_age +autoreply define bridge_domain_set_mac_age { u32 client_index; u32 context; @@ -204,16 +164,6 @@ define bridge_domain_set_mac_age u8 mac_age; }; -/** \brief Set bridge domain response - @param context - sender context, to match reply w/ request - @param retval - return code for the set l2 bits request -*/ -define bridge_domain_set_mac_age_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 bridge domain add or delete request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -226,7 +176,7 @@ define bridge_domain_set_mac_age_reply @param mac_age - mac aging time in min, 0 for disabled @param is_add - add or delete flag */ -define bridge_domain_add_del +autoreply define bridge_domain_add_del { u32 client_index; u32 context; @@ -240,16 +190,6 @@ define bridge_domain_add_del u8 is_add; }; -/** \brief L2 bridge domain add or delete response - @param context - sender context, to match reply w/ request - @param retval - return code for the set bridge flags request -*/ -define bridge_domain_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 bridge domain request operational state details @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -337,7 +277,7 @@ define bridge_flags_reply @param tag1 - Needed for any push or translate vtr op @param tag2 - Needed for any push 2 or translate x-2 vtr ops */ -define l2_interface_vlan_tag_rewrite +autoreply define l2_interface_vlan_tag_rewrite { u32 client_index; u32 context; @@ -348,16 +288,6 @@ define l2_interface_vlan_tag_rewrite u32 tag2; // second pushed tag }; -/** \brief L2 interface vlan tag rewrite response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_vlan_tag_rewrite_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 interface pbb tag rewrite configure request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -370,7 +300,7 @@ define l2_interface_vlan_tag_rewrite_reply @param b_vlanid - B-tag vlanid, needed for any push or translate qinq vtr op @param i_sid - I-tag service id, needed for any push or translate qinq vtr op */ -define l2_interface_pbb_tag_rewrite +autoreply define l2_interface_pbb_tag_rewrite { u32 client_index; u32 context; @@ -383,16 +313,6 @@ define l2_interface_pbb_tag_rewrite u32 i_sid; }; -/** \brief L2 interface pbb tag rewrite response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_pbb_tag_rewrite_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/l2tp/l2tp.api b/src/vnet/l2tp/l2tp.api index 5a5a5a48..4587a807 100644 --- a/src/vnet/l2tp/l2tp.api +++ b/src/vnet/l2tp/l2tp.api @@ -52,7 +52,7 @@ define l2tpv3_create_tunnel_reply u32 sw_if_index; }; -define l2tpv3_set_tunnel_cookies +autoreply define l2tpv3_set_tunnel_cookies { u32 client_index; u32 context; @@ -61,16 +61,6 @@ define l2tpv3_set_tunnel_cookies u64 new_remote_cookie; }; -/** \brief L2TP tunnel set cookies response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2tpv3_set_tunnel_cookies_reply -{ - u32 context; - i32 retval; -}; - define sw_if_l2tpv3_tunnel_details { u32 context; @@ -91,7 +81,7 @@ define sw_if_l2tpv3_tunnel_dump u32 context; }; -define l2tpv3_interface_enable_disable +autoreply define l2tpv3_interface_enable_disable { u32 client_index; u32 context; @@ -99,13 +89,7 @@ define l2tpv3_interface_enable_disable u32 sw_if_index; }; -define l2tpv3_interface_enable_disable_reply -{ - u32 context; - i32 retval; -}; - -define l2tpv3_set_lookup_key +autoreply define l2tpv3_set_lookup_key { u32 client_index; u32 context; @@ -113,12 +97,6 @@ define l2tpv3_set_lookup_key u8 key; }; -define l2tpv3_set_lookup_key_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/lisp-cp/lisp.api b/src/vnet/lisp-cp/lisp.api index a50a5ccb..8bed71b3 100644 --- a/src/vnet/lisp-cp/lisp.api +++ b/src/vnet/lisp-cp/lisp.api @@ -59,7 +59,7 @@ define lisp_add_del_locator_set_reply @param priority - priority of the lisp locator @param weight - weight of the lisp locator */ -define lisp_add_del_locator +autoreply define lisp_add_del_locator { u32 client_index; u32 context; @@ -70,16 +70,6 @@ define lisp_add_del_locator u8 weight; }; -/** \brief Reply for locator add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_locator_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete lisp eid-table @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -98,7 +88,7 @@ define lisp_add_del_locator_reply HMAC_SHA_256_128 2 @param key - secret key */ -define lisp_add_del_local_eid +autoreply define lisp_add_del_local_eid { u32 client_index; u32 context; @@ -112,16 +102,6 @@ define lisp_add_del_local_eid u8 key[64]; }; -/** \brief Reply for local_eid add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_local_eid_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add/delete map server @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -129,7 +109,7 @@ define lisp_add_del_local_eid_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - map server IP address */ -define lisp_add_del_map_server +autoreply define lisp_add_del_map_server { u32 client_index; u32 context; @@ -138,16 +118,6 @@ define lisp_add_del_map_server u8 ip_address[16]; }; -/** \brief Reply for lisp_add_del_map_server - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_map_server_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map-resolver @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -155,7 +125,7 @@ define lisp_add_del_map_server_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - array of address bytes */ -define lisp_add_del_map_resolver +autoreply define lisp_add_del_map_resolver { u32 client_index; u32 context; @@ -164,45 +134,25 @@ define lisp_add_del_map_resolver u8 ip_address[16]; }; -/** \brief Reply for map_resolver add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_map_resolver_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable or disable LISP feature @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_en - enable protocol if non-zero, else disable */ -define lisp_enable_disable +autoreply define lisp_enable_disable { u32 client_index; u32 context; u8 is_en; }; -/** \brief Reply for gpe enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable LISP PITR node @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param ls_name - locator set name @param is_add - add locator set if non-zero, else disable pitr */ -define lisp_pitr_set_locator_set +autoreply define lisp_pitr_set_locator_set { u32 client_index; u32 context; @@ -210,16 +160,6 @@ define lisp_pitr_set_locator_set u8 ls_name[64]; }; -/** \brief Reply for lisp_pitr_set_locator_set - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_pitr_set_locator_set_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable use of PETR @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -227,7 +167,7 @@ define lisp_pitr_set_locator_set_reply @param address - PETR IP address @param is_add - add locator set if non-zero, else disable pitr */ -define lisp_use_petr +autoreply define lisp_use_petr { u32 client_index; u32 context; @@ -236,16 +176,6 @@ define lisp_use_petr u8 is_add; }; -/** \brief Reply for lisp_pitr_set_locator_set - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_use_petr_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for LISP PETR status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -298,45 +228,25 @@ define show_lisp_rloc_probe_state_reply @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define lisp_rloc_probe_enable_disable +autoreply define lisp_rloc_probe_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for lisp_rloc_probe_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_rloc_probe_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable/disable LISP map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define lisp_map_register_enable_disable +autoreply define lisp_map_register_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for lisp_map_register_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_map_register_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get state of LISP map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -366,23 +276,13 @@ define show_lisp_map_register_state_reply 0 - destination only 1 - source/destaination */ -define lisp_map_request_mode +autoreply define lisp_map_request_mode { u32 client_index; u32 context; u8 mode; }; -/** \brief Reply for lisp_map_request_mode - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_map_request_mode_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for LISP map-request mode @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -430,7 +330,7 @@ typeonly manual_endian manual_print define remote_locator @param rloc_num - number of remote locators @param rlocs - remote locator records */ -manual_print manual_endian define lisp_add_del_remote_mapping +autoreply manual_print manual_endian define lisp_add_del_remote_mapping { u32 client_index; u32 context; @@ -448,16 +348,6 @@ manual_print manual_endian define lisp_add_del_remote_mapping vl_api_remote_locator_t rlocs[rloc_num]; }; -/** \brief Reply for lisp_add_del_remote_mapping - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_remote_mapping_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete LISP adjacency adjacency @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -470,7 +360,7 @@ define lisp_add_del_remote_mapping_reply @param reid - remote EID @param leid - local EID */ -define lisp_add_del_adjacency +autoreply define lisp_add_del_adjacency { u32 client_index; u32 context; @@ -483,23 +373,13 @@ define lisp_add_del_adjacency u8 leid_len; }; -/** \brief Reply for lisp_add_del_adjacency - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_adjacency_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map request itr rlocs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add address if non-zero, else delete @param locator_set_name - locator set name */ -define lisp_add_del_map_request_itr_rlocs +autoreply define lisp_add_del_map_request_itr_rlocs { u32 client_index; u32 context; @@ -512,12 +392,6 @@ define lisp_add_del_map_request_itr_rlocs @param retval - return code */ -define lisp_add_del_map_request_itr_rlocs_reply -{ - u32 context; - i32 retval; -}; - /** \brief map/unmap vni/bd_index to vrf @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -525,7 +399,7 @@ define lisp_add_del_map_request_itr_rlocs_reply @param dp_table - virtual network id/bridge domain index @param vrf - vrf */ -define lisp_eid_table_add_del_map +autoreply define lisp_eid_table_add_del_map { u32 client_index; u32 context; @@ -535,16 +409,6 @@ define lisp_eid_table_add_del_map u8 is_l2; }; -/** \brief Reply for lisp_eid_table_add_del_map - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_eid_table_add_del_map_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for map lisp locator status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/lisp-cp/one.api b/src/vnet/lisp-cp/one.api index ca82f694..2fa1edf6 100644 --- a/src/vnet/lisp-cp/one.api +++ b/src/vnet/lisp-cp/one.api @@ -59,7 +59,7 @@ define one_add_del_locator_set_reply @param priority - priority of the locator @param weight - weight of the locator */ -define one_add_del_locator +autoreply define one_add_del_locator { u32 client_index; u32 context; @@ -70,16 +70,6 @@ define one_add_del_locator u8 weight; }; -/** \brief Reply for locator add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_locator_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete ONE eid-table @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -98,7 +88,7 @@ define one_add_del_locator_reply HMAC_SHA_256_128 2 @param key - secret key */ -define one_add_del_local_eid +autoreply define one_add_del_local_eid { u32 client_index; u32 context; @@ -112,16 +102,6 @@ define one_add_del_local_eid u8 key[64]; }; -/** \brief Reply for local_eid add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_local_eid_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add/delete map server @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -129,7 +109,7 @@ define one_add_del_local_eid_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - map server IP address */ -define one_add_del_map_server +autoreply define one_add_del_map_server { u32 client_index; u32 context; @@ -138,16 +118,6 @@ define one_add_del_map_server u8 ip_address[16]; }; -/** \brief Reply for one_add_del_map_server - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_map_server_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map-resolver @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -155,7 +125,7 @@ define one_add_del_map_server_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - array of address bytes */ -define one_add_del_map_resolver +autoreply define one_add_del_map_resolver { u32 client_index; u32 context; @@ -164,45 +134,25 @@ define one_add_del_map_resolver u8 ip_address[16]; }; -/** \brief Reply for map_resolver add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_map_resolver_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable or disable ONE feature @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_en - enable protocol if non-zero, else disable */ -define one_enable_disable +autoreply define one_enable_disable { u32 client_index; u32 context; u8 is_en; }; -/** \brief Reply for gpe enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable ONE PITR node @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param ls_name - locator set name @param is_add - add locator set if non-zero, else disable pitr */ -define one_pitr_set_locator_set +autoreply define one_pitr_set_locator_set { u32 client_index; u32 context; @@ -210,16 +160,6 @@ define one_pitr_set_locator_set u8 ls_name[64]; }; -/** \brief Reply for one_pitr_set_locator_set - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_pitr_set_locator_set_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable use of PETR @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -227,7 +167,7 @@ define one_pitr_set_locator_set_reply @param address - PETR IP address @param is_add - add locator set if non-zero, else disable PETR */ -define one_use_petr +autoreply define one_use_petr { u32 client_index; u32 context; @@ -236,16 +176,6 @@ define one_use_petr u8 is_add; }; -/** \brief Reply for one_use_petr - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_use_petr_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for ONE PETR status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -298,45 +228,25 @@ define show_one_rloc_probe_state_reply @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define one_rloc_probe_enable_disable +autoreply define one_rloc_probe_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for one_rloc_probe_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_rloc_probe_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable/disable ONE map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define one_map_register_enable_disable +autoreply define one_map_register_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for one_map_register_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_map_register_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get state of ONE map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -366,23 +276,13 @@ define show_one_map_register_state_reply 0 - destination only 1 - source/destaination */ -define one_map_request_mode +autoreply define one_map_request_mode { u32 client_index; u32 context; u8 mode; }; -/** \brief Reply for one_map_request_mode - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_map_request_mode_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for ONE map-request mode @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -430,7 +330,7 @@ typeonly manual_endian manual_print define one_remote_locator @param rloc_num - number of remote locators @param rlocs - remote locator records */ -manual_print manual_endian define one_add_del_remote_mapping +autoreply manual_print manual_endian define one_add_del_remote_mapping { u32 client_index; u32 context; @@ -448,16 +348,6 @@ manual_print manual_endian define one_add_del_remote_mapping vl_api_one_remote_locator_t rlocs[rloc_num]; }; -/** \brief Reply for one_add_del_remote_mapping - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_remote_mapping_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete ONE adjacency adjacency @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -470,7 +360,7 @@ define one_add_del_remote_mapping_reply @param reid - remote EID @param leid - local EID */ -define one_add_del_adjacency +autoreply define one_add_del_adjacency { u32 client_index; u32 context; @@ -483,23 +373,13 @@ define one_add_del_adjacency u8 leid_len; }; -/** \brief Reply for one_add_del_adjacency - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_adjacency_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map request itr rlocs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add address if non-zero, else delete @param locator_set_name - locator set name */ -define one_add_del_map_request_itr_rlocs +autoreply define one_add_del_map_request_itr_rlocs { u32 client_index; u32 context; @@ -507,17 +387,6 @@ define one_add_del_map_request_itr_rlocs u8 locator_set_name[64]; }; -/** \brief Reply for one_add_del_map_request_itr_rlocs - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define one_add_del_map_request_itr_rlocs_reply -{ - u32 context; - i32 retval; -}; - /** \brief map/unmap vni/bd_index to vrf @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -525,7 +394,7 @@ define one_add_del_map_request_itr_rlocs_reply @param dp_table - virtual network id/bridge domain index @param vrf - vrf */ -define one_eid_table_add_del_map +autoreply define one_eid_table_add_del_map { u32 client_index; u32 context; @@ -535,16 +404,6 @@ define one_eid_table_add_del_map u8 is_l2; }; -/** \brief Reply for one_eid_table_add_del_map - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_eid_table_add_del_map_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for map one locator status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -901,31 +760,19 @@ define one_stats_details u32 bytes; }; -define one_stats_flush +autoreply define one_stats_flush { u32 client_index; u32 context; }; -define one_stats_flush_reply -{ - u32 context; - i32 retval; -}; - -define one_stats_enable_disable +autoreply define one_stats_enable_disable { u32 client_index; u32 context; u8 is_en; }; -define one_stats_enable_disable_reply -{ - u32 context; - i32 retval; -}; - define show_one_stats_enable_disable { u32 client_index; diff --git a/src/vnet/lisp-gpe/lisp_gpe.api b/src/vnet/lisp-gpe/lisp_gpe.api index 43a6a6cd..f79d18c1 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.api +++ b/src/vnet/lisp-gpe/lisp_gpe.api @@ -43,7 +43,7 @@ typeonly manual_print manual_endian define gpe_locator @param loc_num - number of locators @param locs - array of remote locators */ -manual_print manual_endian define gpe_add_del_fwd_entry +autoreply manual_print manual_endian define gpe_add_del_fwd_entry { u32 client_index; u32 context; @@ -60,44 +60,24 @@ manual_print manual_endian define gpe_add_del_fwd_entry vl_api_gpe_locator_t locs[loc_num]; }; -/** \brief Reply for gpe_fwd_entry add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_add_del_fwd_entry_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable or disable gpe protocol @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_en - enable protocol if non-zero, else disable */ -define gpe_enable_disable +autoreply define gpe_enable_disable { u32 client_index; u32 context; u8 is_en; }; -/** \brief Reply for gpe enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete gpe_iface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add address if non-zero, else delete */ -define gpe_add_del_iface +autoreply define gpe_add_del_iface { u32 client_index; u32 context; @@ -107,16 +87,6 @@ define gpe_add_del_iface u32 vni; }; -/** \brief Reply for gpe_iface add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_add_del_iface_reply -{ - u32 context; - i32 retval; -}; - define gpe_fwd_entries_get { u32 client_index; @@ -163,23 +133,13 @@ manual_endian manual_print define gpe_fwd_entry_path_details @param context - sender context, to match reply w/ request @param mode - LISP (value 0) or VXLAN (value 1) */ -define gpe_set_encap_mode +autoreply define gpe_set_encap_mode { u32 client_index; u32 context; u8 mode; }; -/** \brief Reply for set_encap_mode - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_set_encap_mode_reply -{ - u32 context; - i32 retval; -}; - /** \brief get GPE encapsulation mode @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/map/map.api b/src/vnet/map/map.api index 4e4be85e..d68f13f0 100644 --- a/src/vnet/map/map.api +++ b/src/vnet/map/map.api @@ -62,22 +62,13 @@ define map_add_domain_reply @param context - sender context, to match reply w/ request @param index - MAP Domain index */ -define map_del_domain +autoreply define map_del_domain { u32 client_index; u32 context; u32 index; }; -/** \brief Reply for MAP domain del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define map_del_domain_reply -{ - u32 context; - i32 retval; -}; /** \brief Add or Delete MAP rule from a domain (Only used for shared IPv4 per subscriber) @param client_index - opaque cookie to identify the sender @@ -87,7 +78,7 @@ define map_del_domain_reply @param ip6_dst - MAP CE IPv6 address @param psid - Rule PSID */ -define map_add_del_rule +autoreply define map_add_del_rule { u32 client_index; u32 context; @@ -97,15 +88,6 @@ define map_add_del_rule u16 psid; }; -/** \brief Reply for MAP rule add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define map_add_del_rule_reply -{ - u32 context; - i32 retval; -}; /** \brief Get list of map domains @param client_index - opaque cookie to identify the sender diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api index a1e1270a..c8a3ffb7 100644 --- a/src/vnet/mpls/mpls.api +++ b/src/vnet/mpls/mpls.api @@ -26,7 +26,7 @@ @param mb_address_length - Length of IP prefix @param mb_address[16] - IP prefix/ */ -define mpls_ip_bind_unbind +autoreply define mpls_ip_bind_unbind { u32 client_index; u32 context; @@ -40,16 +40,6 @@ define mpls_ip_bind_unbind u8 mb_address[16]; }; -/** \brief Reply for MPLS IP bind/unbind request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define mpls_ip_bind_unbind_reply -{ - u32 context; - i32 retval; -}; - /** \brief MPLS tunnel Add / del route @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -172,7 +162,7 @@ manual_endian manual_print define mpls_tunnel_details @param mr_next_hop_out_label_stack - the next-hop output label stack, outer most first @param next_hop_via_label - The next-hop is a resolved via a local label */ -define mpls_route_add_del +autoreply define mpls_route_add_del { u32 client_index; u32 context; @@ -199,16 +189,6 @@ define mpls_route_add_del u32 mr_next_hop_out_label_stack[mr_next_hop_n_out_labels]; }; -/** \brief Reply for MPLS route add / del request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define mpls_route_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump MPLS fib table @param client_index - opaque cookie to identify the sender */ @@ -240,4 +220,4 @@ manual_endian manual_print define mpls_fib_details * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api index e207e46f..4aef09da 100644 --- a/src/vnet/session/session.api +++ b/src/vnet/session/session.api @@ -49,26 +49,17 @@ define application_attach_reply { @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ - define application_detach { +autoreply define application_detach { u32 client_index; u32 context; }; - /** \brief detach reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define application_detach_reply { - u32 context; - i32 retval; -}; - /** \brief vpp->client, please map an additional shared memory segment @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param segment_name - */ -define map_another_segment { +autoreply define map_another_segment { u32 client_index; u32 context; u32 segment_size; @@ -83,7 +74,7 @@ define map_another_segment { "tcp://::/0/80" [ipv6] etc. @param options - socket options, fifo sizes, etc. */ -define bind_uri { +autoreply define bind_uri { u32 client_index; u32 context; u32 accept_cookie; @@ -97,7 +88,7 @@ define bind_uri { "tcp://::/0/80" [ipv6], etc. @param options - socket options, fifo sizes, etc. */ -define unbind_uri { +autoreply define unbind_uri { u32 client_index; u32 context; u8 uri[128]; @@ -122,24 +113,6 @@ define connect_uri { u64 options[16]; }; -/** \brief Bind reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bind_uri_reply { - u32 context; - i32 retval; -}; - -/** \brief unbind reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define unbind_uri_reply { - u32 context; - i32 retval; -}; - /** \brief vpp->client, connect reply @param context - sender context, to match reply w/ request @param retval - return code for the request @@ -165,15 +138,6 @@ define connect_uri_reply { u8 segment_name[128]; }; -/** \brief client->vpp - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define map_another_segment_reply { - u32 context; - i32 retval; -}; - /** \brief vpp->client, accept this session @param context - sender context, to match reply w/ request @param listener_handle - tells client which listener this pertains to @@ -290,7 +254,7 @@ define bind_sock { @param context - sender context, to match reply w/ request @param handle - bind handle obtained from bind reply */ -define unbind_sock { +autoreply define unbind_sock { u32 client_index; u32 context; u64 handle; @@ -339,15 +303,6 @@ define bind_sock_reply { u8 segment_name[128]; }; -/** \brief unbind reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define unbind_sock_reply { - u32 context; - i32 retval; -}; - /** \brief vpp/server->client, connect reply @param context - sender context, to match reply w/ request @param retval - return code for the request @@ -378,23 +333,14 @@ define connect_sock_reply { @param context - sender context, to match reply w/ request @param is_enable - disable session layer if 0, enable otherwise */ -define session_enable_disable { +autoreply define session_enable_disable { u32 client_index; u32 context; u8 is_enable; }; -/** \brief Reply for session enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define session_enable_disable_reply { - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") * End: - */ \ No newline at end of file + */ diff --git a/src/vnet/span/span.api b/src/vnet/span/span.api index 4babdd83..914fd8d0 100644 --- a/src/vnet/span/span.api +++ b/src/vnet/span/span.api @@ -21,7 +21,7 @@ @param sw_if_index_to - interface where the traffic is mirrored @param state - 0 = disabled, 1 = rx enabled, 2 = tx enabled, 3 tx & rx enabled */ -define sw_interface_span_enable_disable { +autoreply define sw_interface_span_enable_disable { u32 client_index; u32 context; u32 sw_if_index_from; @@ -29,14 +29,6 @@ define sw_interface_span_enable_disable { u8 state; }; -/** \brief Reply to SPAN enable/disable request - @param context - sender context which was passed in the request -*/ -define sw_interface_span_enable_disable_reply { - u32 context; - i32 retval; -}; - /** \brief SPAN dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/sr/sr.api b/src/vnet/sr/sr.api index 5feadcb0..9e900741 100644 --- a/src/vnet/sr/sr.api +++ b/src/vnet/sr/sr.api @@ -25,7 +25,7 @@ @param fib_table FIB table in which we should install the localsid entry @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. */ -define sr_localsid_add_del +autoreply define sr_localsid_add_del { u32 client_index; u32 context; @@ -39,16 +39,6 @@ define sr_localsid_add_del u8 nh_addr[16]; }; -/** \brief IPv6 SR LocalSID add/del request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_localsid_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR policy add @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -59,7 +49,7 @@ define sr_localsid_add_del_reply @param fib_table is the VRF where to install the FIB entry for the BSID @param segments is a vector of IPv6 address composing the segment list */ -define sr_policy_add +autoreply define sr_policy_add { u32 client_index; u32 context; @@ -72,16 +62,6 @@ define sr_policy_add u8 segments[0]; }; -/** \brief IPv6 SR Policy add request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_add_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR policy modification @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -94,7 +74,7 @@ define sr_policy_add_reply @param weight is the weight of the sid list. optional. @param is_encap Mode. Encapsulation or SRH insertion. */ -define sr_policy_mod +autoreply define sr_policy_mod { u32 client_index; u32 context; @@ -108,23 +88,13 @@ define sr_policy_mod u8 segments[0]; }; -/** \brief IPv6 SR Policy modification request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_mod_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR policy deletion @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bsid is the bindingSID of the SR Policy @param index is the index of the SR policy */ -define sr_policy_del +autoreply define sr_policy_del { u32 client_index; u32 context; @@ -132,16 +102,6 @@ define sr_policy_del u32 sr_policy_index; }; -/** \brief IPv6 SR Policy deletion request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR steering add/del @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -154,7 +114,7 @@ define sr_policy_del_reply @param sw_if_index is the incoming interface for L2 traffic @param traffic_type describes the type of traffic */ -define sr_steering_add_del +autoreply define sr_steering_add_del { u32 client_index; u32 context; @@ -168,16 +128,6 @@ define sr_steering_add_del u8 traffic_type; }; -/** \brief IPv6 SR steering add/del request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_steering_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump the list of SR LocalSIDs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/unix/tap.api b/src/vnet/unix/tap.api index 1fd0bb09..d9fba371 100644 --- a/src/vnet/unix/tap.api +++ b/src/vnet/unix/tap.api @@ -93,23 +93,13 @@ define tap_modify_reply @param context - sender context, to match reply w/ request @param sw_if_index - interface index of existing tap interface */ -define tap_delete +autoreply define tap_delete { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Reply for tap delete request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define tap_delete_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump tap interfaces request */ define sw_interface_tap_dump { diff --git a/src/vnet/vxlan/vxlan.api b/src/vnet/vxlan/vxlan.api index 048220fb..6c331a58 100644 --- a/src/vnet/vxlan/vxlan.api +++ b/src/vnet/vxlan/vxlan.api @@ -61,7 +61,7 @@ define vxlan_tunnel_details @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass @param enable - if non-zero enable, else disable */ -define sw_interface_set_vxlan_bypass +autoreply define sw_interface_set_vxlan_bypass { u32 client_index; u32 context; @@ -69,13 +69,3 @@ define sw_interface_set_vxlan_bypass u8 is_ipv6; u8 enable; }; - -/** \brief Interface set vxlan-bypass response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_set_vxlan_bypass_reply -{ - u32 context; - i32 retval; -}; \ No newline at end of file diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index a4ba180d..7c07c822 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -80,7 +80,7 @@ define create_vlan_subif_reply @param sw_if_index - index of the interface @param enable - if non-zero enable, else disable */ -define sw_interface_set_mpls_enable +autoreply define sw_interface_set_mpls_enable { u32 client_index; u32 context; @@ -88,16 +88,6 @@ define sw_interface_set_mpls_enable u8 enable; }; -/** \brief Reply for MPLS state on an interface - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define sw_interface_set_mpls_enable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Proxy ARP add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -106,7 +96,7 @@ define sw_interface_set_mpls_enable_reply @param low_address[4] - Low address of the Proxy ARP range @param hi_address[4] - High address of the Proxy ARP range */ -define proxy_arp_add_del +autoreply define proxy_arp_add_del { u32 client_index; u32 context; @@ -116,23 +106,13 @@ define proxy_arp_add_del u8 hi_address[4]; }; -/** \brief Reply for proxy arp add / del request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define proxy_arp_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Proxy ARP add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - Which interface to enable / disable Proxy Arp on @param enable_disable - 1 to enable Proxy ARP on interface, 0 to disable */ -define proxy_arp_intfc_enable_disable +autoreply define proxy_arp_intfc_enable_disable { u32 client_index; u32 context; @@ -141,23 +121,13 @@ define proxy_arp_intfc_enable_disable u8 enable_disable; }; -/** \brief Reply for Proxy ARP interface enable / disable request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define proxy_arp_intfc_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Reset VRF (remove all routes etc) request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4 @param vrf_id - ID of th FIB table / VRF to reset */ -define reset_vrf +autoreply define reset_vrf { u32 client_index; u32 context; @@ -165,16 +135,6 @@ define reset_vrf u32 vrf_id; }; -/** \brief Reply for Reset VRF request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define reset_vrf_reply -{ - u32 context; - i32 retval; -}; - /** \brief Is Address Reachable request - DISABLED @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -200,7 +160,7 @@ define is_address_reachable @param enable_disable - 1 = enable stats, 0 = disable @param pid - pid of process requesting stats updates */ -define want_stats +autoreply define want_stats { u32 client_index; u32 context; @@ -208,16 +168,6 @@ define want_stats u32 pid; }; -/** \brief Reply for Want Stats request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_stats_reply -{ - u32 context; - i32 retval; -}; - typeonly manual_print manual_endian define ip4_fib_counter { u32 address; @@ -331,7 +281,7 @@ define oam_event @param enable_disable- enable if non-zero, else disable @param pid - pid of the requesting process */ -define want_oam_events +autoreply define want_oam_events { u32 client_index; u32 context; @@ -339,16 +289,6 @@ define want_oam_events u32 pid; }; -/** \brief Want OAM events response - @param context - sender context, to match reply w/ request - @param retval - return code for the want oam stats request -*/ -define want_oam_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief OAM add / del target request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -357,7 +297,7 @@ define want_oam_events_reply @param dst_address[] - destination address of the target @param is_add - add target if non-zero, else delete */ -define oam_add_del +autoreply define oam_add_del { u32 client_index; u32 context; @@ -367,23 +307,13 @@ define oam_add_del u8 is_add; }; -/** \brief OAM add / del target response - @param context - sender context, to match reply w/ request - @param retval - return code of the request -*/ -define oam_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Reset fib table request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param vrf_id - vrf/table id of the fib table to reset @param is_ipv6 - an ipv6 fib to reset if non-zero, else ipv4 */ -define reset_fib +autoreply define reset_fib { u32 client_index; u32 context; @@ -391,16 +321,6 @@ define reset_fib u8 is_ipv6; }; -/** \brief Reset fib response - @param context - sender context, to match reply w/ request - @param retval - return code for the reset bfib request -*/ -define reset_fib_reply -{ - u32 context; - i32 retval; -}; - /** \brief Create loopback interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -458,23 +378,13 @@ define create_loopback_instance_reply @param context - sender context, to match reply w/ request @param sw_if_index - sw index of the interface that was created */ -define delete_loopback +autoreply define delete_loopback { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Delete loopback interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define delete_loopback_reply -{ - u32 context; - i32 retval; -}; - /** \brief Control ping from client to api server request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -543,7 +453,7 @@ define cli_inband_reply @param is_ipv6 - neighbor limit if non-zero, else ARP limit @param arp_neighbor_limit - the new limit, defaults are ~ 50k */ -define set_arp_neighbor_limit +autoreply define set_arp_neighbor_limit { u32 client_index; u32 context; @@ -551,16 +461,6 @@ define set_arp_neighbor_limit u32 arp_neighbor_limit; }; -/** \brief Set max allowed ARP or ip6 neighbor entries response - @param context - sender context, to match reply w/ request - @param retval - return code for request -*/ -define set_arp_neighbor_limit_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 interface patch add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -568,7 +468,7 @@ define set_arp_neighbor_limit_reply @param tx_sw_if_index - transmit side interface @param is_add - if non-zero set up the interface patch, else remove it */ -define l2_patch_add_del +autoreply define l2_patch_add_del { u32 client_index; u32 context; @@ -577,23 +477,13 @@ define l2_patch_add_del u8 is_add; }; -/** \brief L2 interface patch add / del response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_patch_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface set vpath request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - interface used to reach neighbor @param enable - if non-zero enable, else disable */ -define sw_interface_set_vpath +autoreply define sw_interface_set_vpath { u32 client_index; u32 context; @@ -601,16 +491,6 @@ define sw_interface_set_vpath u8 enable; }; -/** \brief Interface set vpath response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_set_vpath_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set L2 XConnect between two interfaces request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -618,7 +498,7 @@ define sw_interface_set_vpath_reply @param tx_sw_if_index - Transmit interface index @param enable - enable xconnect if not 0, else set to L3 mode */ -define sw_interface_set_l2_xconnect +autoreply define sw_interface_set_l2_xconnect { u32 client_index; u32 context; @@ -627,16 +507,6 @@ define sw_interface_set_l2_xconnect u8 enable; }; -/** \brief Set L2 XConnect response - @param context - sender context, to match reply w/ request - @param retval - L2 XConnect request return code -*/ -define sw_interface_set_l2_xconnect_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface bridge mode request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -646,7 +516,7 @@ define sw_interface_set_l2_xconnect_reply @param shg - Shared horizon group, for bridge mode only @param enable - Enable beige mode if not 0, else set to L3 mode */ -define sw_interface_set_l2_bridge +autoreply define sw_interface_set_l2_bridge { u32 client_index; u32 context; @@ -657,16 +527,6 @@ define sw_interface_set_l2_bridge u8 enable; }; -/** \brief Interface bridge mode response - @param context - sender context, to match reply w/ request - @param retval - Bridge mode request return code -*/ -define sw_interface_set_l2_bridge_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set bridge domain ip to mac entry request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -676,7 +536,7 @@ define sw_interface_set_l2_bridge_reply @param mac_address - MAC address @param */ -define bd_ip_mac_add_del +autoreply define bd_ip_mac_add_del { u32 client_index; u32 context; @@ -687,16 +547,6 @@ define bd_ip_mac_add_del u8 mac_address[6]; }; -/** \brief Set bridge domain ip to mac entry response - @param context - sender context, to match reply w/ request - @param retval - return code for the set bridge flags request -*/ -define bd_ip_mac_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset the classification table for an interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -704,7 +554,7 @@ define bd_ip_mac_add_del_reply @param sw_if_index - interface to associate with the table @param table_index - index of the table, if ~0 unset the table */ -define classify_set_interface_ip_table +autoreply define classify_set_interface_ip_table { u32 client_index; u32 context; @@ -713,16 +563,6 @@ define classify_set_interface_ip_table u32 table_index; /* ~0 => off */ }; -/** \brief Set/unset interface classification table response - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define classify_set_interface_ip_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset l2 classification tables for an interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -731,7 +571,7 @@ define classify_set_interface_ip_table_reply @param ip6_table_index - ip6 index @param other_table_index - other index */ -define classify_set_interface_l2_tables +autoreply define classify_set_interface_l2_tables { u32 client_index; u32 context; @@ -743,16 +583,6 @@ define classify_set_interface_l2_tables u8 is_input; }; -/** \brief Set/unset l2 classification tables for an interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define classify_set_interface_l2_tables_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get node index using name request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -809,7 +639,7 @@ define add_node_next_reply @param sw_if_index - interface to enable/disable filtering on @param enable_disable - if non-zero enable filtering, else disable */ -define l2_interface_efp_filter +autoreply define l2_interface_efp_filter { u32 client_index; u32 context; @@ -817,16 +647,6 @@ define l2_interface_efp_filter u32 enable_disable; }; -/** \brief L2 interface ethernet flow point filtering response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_efp_filter_reply -{ - u32 context; - i32 retval; -}; - define create_subif { u32 client_index; @@ -882,7 +702,7 @@ define show_version_reply }; /* Gross kludge, DGMS */ -define interface_name_renumber +autoreply define interface_name_renumber { u32 client_index; u32 context; @@ -890,12 +710,6 @@ define interface_name_renumber u32 new_show_dev_instance; }; -define interface_name_renumber_reply -{ - u32 context; - i32 retval; -}; - /** \brief Register for ip4 arp resolution events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -903,7 +717,7 @@ define interface_name_renumber_reply @param pid - sender's pid @param address - the exact ip4 address of interest */ -define want_ip4_arp_events +autoreply define want_ip4_arp_events { u32 client_index; u32 context; @@ -912,16 +726,6 @@ define want_ip4_arp_events u32 address; }; -/** \brief Reply for interface events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_ip4_arp_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief Tell client about an ip4 arp resolution event @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -949,7 +753,7 @@ define ip4_arp_event @param pid - sender's pid @param address - the exact ip6 address of interest */ -define want_ip6_nd_events +autoreply define want_ip6_nd_events { u32 client_index; u32 context; @@ -958,16 +762,6 @@ define want_ip6_nd_events u8 address[16]; }; -/** \brief Reply for ip6 nd resolution events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_ip6_nd_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief Tell client about an ip6 nd resolution or mac/ip event @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -999,7 +793,7 @@ define ip6_nd_event Note: User is recommeneded to use just one valid table_index per call. (ip4_table_index, ip6_table_index, or l2_table_index) */ -define input_acl_set_interface +autoreply define input_acl_set_interface { u32 client_index; u32 context; @@ -1010,16 +804,6 @@ define input_acl_set_interface u8 is_add; }; -/** \brief Set/unset input ACL interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define input_acl_set_interface_reply -{ - u32 context; - i32 retval; -}; - define get_node_graph { u32 client_index; @@ -1048,7 +832,7 @@ define get_node_graph_reply @param pow_enable - Proof of Work enabled or not flag @param trace_enable - iOAM Trace enabled or not flag */ -define ioam_enable +autoreply define ioam_enable { u32 client_index; u32 context; @@ -1060,38 +844,18 @@ define ioam_enable u32 node_id; }; -/** \brief iOAM Trace profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define ioam_enable_reply -{ - u32 context; - i32 retval; -}; - /** \brief iOAM disable @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param index - MAP Domain index */ -define ioam_disable +autoreply define ioam_disable { u32 client_index; u32 context; u16 id; }; -/** \brief iOAM disable response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define ioam_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Query relative index via node names @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1149,7 +913,7 @@ define pg_create_interface_reply @param count - number of packets to be captured @param pcap_file - pacp file name to store captured packets */ -define pg_capture +autoreply define pg_capture { u32 client_index; u32 context; @@ -1160,23 +924,13 @@ define pg_capture u8 pcap_file_name[pcap_name_length]; }; -/** \brief PacketGenerator capture packets response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pg_capture_reply -{ - u32 context; - i32 retval; -}; - /** \brief Enable / disable packet generator request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_enabled - 1 if enabling streams, 0 if disabling @param stream - stream name to be enable/disabled, if not specified handle all streams */ -define pg_enable_disable +autoreply define pg_enable_disable { u32 client_index; u32 context; @@ -1185,16 +939,6 @@ define pg_enable_disable u8 stream_name[stream_name_length]; }; -/** \brief Reply for enable / disable packet generator - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define pg_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Configure IP source and L4 port-range check @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1208,7 +952,7 @@ define pg_enable_disable_reply @param vrf_id - fib table/vrf id to associate the source and port-range check with @note To specify a single port set low_port and high_port entry the same */ -define ip_source_and_port_range_check_add_del +autoreply define ip_source_and_port_range_check_add_del { u32 client_index; u32 context; @@ -1222,16 +966,6 @@ define ip_source_and_port_range_check_add_del u32 vrf_id; }; -/** \brief Configure IP source and L4 port-range check reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_source_and_port_range_check_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set interface source and L4 port-range request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1239,7 +973,7 @@ define ip_source_and_port_range_check_add_del_reply @param tcp_vrf_id - VRF associated with source and TCP port-range check @param udp_vrf_id - VRF associated with source and TCP port-range check */ -define ip_source_and_port_range_check_interface_add_del +autoreply define ip_source_and_port_range_check_interface_add_del { u32 client_index; u32 context; @@ -1251,36 +985,17 @@ define ip_source_and_port_range_check_interface_add_del u32 udp_out_vrf_id; }; -/** \brief Set interface source and L4 port-range response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define ip_source_and_port_range_check_interface_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete sub interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - sw index of the interface that was created by create_subif */ -define delete_subif { +autoreply define delete_subif { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Delete sub interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define delete_subif_reply { - u32 context; - i32 retval; -}; - /** \brief Punt traffic to the host @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1289,7 +1004,7 @@ define delete_subif_reply { @param l4_protocol - L4 protocol to be punted, only UDP (0x11) is supported @param l4_port - TCP/UDP port to be punted */ -define punt { +autoreply define punt { u32 client_index; u32 context; u8 is_add; @@ -1298,23 +1013,13 @@ define punt { u16 l4_port; }; -/** \brief Reply to the punt request - @param context - sender context which was passed in the request - @param retval - return code of punt request -*/ -define punt_reply -{ - u32 context; - i32 retval; -}; - /** \brief Feature path enable/disable request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - the interface @param enable - 1 = on, 0 = off */ -define feature_enable_disable { +autoreply define feature_enable_disable { u32 client_index; u32 context; u32 sw_if_index; @@ -1323,16 +1028,6 @@ define feature_enable_disable { u8 feature_name[64]; }; -/** \brief Reply to the eature path enable/disable request - @param context - sender context which was passed in the request - @param retval - return code for the request -*/ -define feature_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") -- cgit 1.2.3-korg From 227038a444b98f922b4a4f44b85ae60f9ee86e1c Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Fri, 21 Apr 2017 01:07:59 -0700 Subject: IP Flow Hash Config fixes - the flow hash config is (and was) cached on the load-balance object so the fib_table_t struct is not used a switch time. Therefore changes to the table's flow hash config need to be propagated to all load-balances and hance all FIB entries in the table. - enable API for setting the IPv6 table flow hash config - use only the hash config in the fib_table_t object and not on the ipX_fib_t - add tests. Change-Id: Ib804c11162c6d4972c764957562c372f663e05d4 Signed-off-by: Neale Ranns --- src/vnet/fib/fib_entry.c | 29 +++++++++ src/vnet/fib/fib_entry.h | 2 + src/vnet/fib/fib_table.c | 54 +++++++++++++--- src/vnet/fib/fib_table.h | 19 ++++++ src/vnet/fib/fib_test.c | 23 +++++++ src/vnet/fib/ip4_fib.c | 12 +--- src/vnet/fib/ip4_fib.h | 6 -- src/vnet/fib/ip6_fib.c | 12 +--- src/vnet/fib/ip6_fib.h | 2 - src/vnet/fib/mpls_fib.c | 18 +----- src/vnet/fib/mpls_fib.h | 7 ++- src/vnet/ip/ip4_forward.c | 5 +- src/vnet/ip/ip6.h | 3 - src/vnet/ip/ip6_forward.c | 12 ++-- src/vnet/ip/ip_api.c | 12 +++- src/vnet/mpls/mpls_tunnel.c | 18 +++++- src/vpp/api/api.c | 3 + test/test_ip4.py | 137 ++++++++++++++++++++++++++++++++++++++++ test/test_ip6.py | 149 ++++++++++++++++++++++++++++++++++++++++++++ test/vpp_papi_provider.py | 19 ++++++ 20 files changed, 468 insertions(+), 74 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index 6f811aa1..29f5b359 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -1390,6 +1390,35 @@ fib_entry_is_resolved (fib_node_index_t fib_entry_index) } } +void +fib_entry_set_flow_hash_config (fib_node_index_t fib_entry_index, + flow_hash_config_t hash_config) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + /* + * pass the hash-config on to the load-balance object where it is cached. + * we can ignore LBs in the delegate chains, since they will not be of the + * correct protocol type (i.e. they are not IP) + * There's no way, nor need, to change the hash config for MPLS. + */ + if (dpo_id_is_valid(&fib_entry->fe_lb)) + { + load_balance_t *lb; + + ASSERT(DPO_LOAD_BALANCE == fib_entry->fe_lb.dpoi_type); + + lb = load_balance_get(fib_entry->fe_lb.dpoi_index); + + /* + * atomic update for packets in flight + */ + lb->lb_hash_config = hash_config; + } +} + static int fib_ip4_address_compare (const ip4_address_t * a1, const ip4_address_t * a2) diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index b17a0b64..2196079b 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -533,6 +533,8 @@ extern int fib_entry_is_sourced(fib_node_index_t fib_entry_index, extern fib_node_index_t fib_entry_get_path_list(fib_node_index_t fib_entry_index); extern int fib_entry_is_resolved(fib_node_index_t fib_entry_index); +extern void fib_entry_set_flow_hash_config(fib_node_index_t fib_entry_index, + flow_hash_config_t hash_config); extern void fib_entry_module_init(void); diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index ff428049..d50f17f1 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -945,18 +945,52 @@ flow_hash_config_t fib_table_get_flow_hash_config (u32 fib_index, fib_protocol_t proto) { - switch (proto) - { - case FIB_PROTOCOL_IP4: - return (ip4_fib_table_get_flow_hash_config(fib_index)); - case FIB_PROTOCOL_IP6: - return (ip6_fib_table_get_flow_hash_config(fib_index)); - case FIB_PROTOCOL_MPLS: - return (mpls_fib_table_get_flow_hash_config(fib_index)); - } - return (0); + fib_table_t *fib; + + fib = fib_table_get(fib_index, proto); + + return (fib->ft_flow_hash_config); } +/** + * @brief Table set flow hash config context. + */ +typedef struct fib_table_set_flow_hash_config_ctx_t_ +{ + /** + * the flow hash config to set + */ + flow_hash_config_t hash_config; +} fib_table_set_flow_hash_config_ctx_t; + +static int +fib_table_set_flow_hash_config_cb (fib_node_index_t fib_entry_index, + void *arg) +{ + fib_table_set_flow_hash_config_ctx_t *ctx = arg; + + fib_entry_set_flow_hash_config(fib_entry_index, ctx->hash_config); + + return (1); +} + +void +fib_table_set_flow_hash_config (u32 fib_index, + fib_protocol_t proto, + flow_hash_config_t hash_config) +{ + fib_table_set_flow_hash_config_ctx_t ctx = { + .hash_config = hash_config, + }; + fib_table_t *fib; + + fib = fib_table_get(fib_index, proto); + fib->ft_flow_hash_config = hash_config; + + fib_table_walk(fib_index, proto, + fib_table_set_flow_hash_config_cb, + &ctx); +} u32 fib_table_get_table_id_for_sw_if_index (fib_protocol_t proto, diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h index f24d28b7..21773342 100644 --- a/src/vnet/fib/fib_table.h +++ b/src/vnet/fib/fib_table.h @@ -665,6 +665,25 @@ extern u32 fib_table_create_and_lock(fib_protocol_t proto, extern flow_hash_config_t fib_table_get_flow_hash_config(u32 fib_index, fib_protocol_t proto); +/** + * @brief + * Set the flow hash configured used by the table + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param hash_config + * The flow-hash config to set + * + * @return none + */ +extern void fib_table_set_flow_hash_config(u32 fib_index, + fib_protocol_t proto, + flow_hash_config_t hash_config); + /** * @brief * Take a reference counting lock on the table diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index d3bdfa35..ddea6b86 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -3832,6 +3832,29 @@ fib_test_v4 (void) fib_table_entry_delete(fib_index, &pfx_10_10_10_127_s_32, FIB_SOURCE_ADJ); + /* + * change the table's flow-hash config - expect the update to propagete to + * the entries' load-balance objects + */ + flow_hash_config_t old_hash_config, new_hash_config; + + old_hash_config = fib_table_get_flow_hash_config(fib_index, + FIB_PROTOCOL_IP4); + new_hash_config = (IP_FLOW_HASH_SRC_ADDR | + IP_FLOW_HASH_DST_ADDR); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + dpo = fib_entry_contribute_ip_forwarding(fei); + lb = load_balance_get(dpo->dpoi_index); + FIB_TEST((lb->lb_hash_config == old_hash_config), + "Table and LB hash config match: %U", + format_ip_flow_hash_config, lb->lb_hash_config); + + fib_table_set_flow_hash_config(fib_index, FIB_PROTOCOL_IP4, new_hash_config); + + FIB_TEST((lb->lb_hash_config == new_hash_config), + "Table and LB newhash config match: %U", + format_ip_flow_hash_config, lb->lb_hash_config); /* * CLEANUP diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index 8e92d851..878b4dbf 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -124,9 +124,7 @@ ip4_create_fib_with_table_id (u32 table_id) fib_table->ft_table_id = v4_fib->table_id = table_id; - fib_table->ft_flow_hash_config = - v4_fib->flow_hash_config = - IP_FLOW_HASH_DEFAULT; + fib_table->ft_flow_hash_config = IP_FLOW_HASH_DEFAULT; v4_fib->fwd_classify_table_index = ~0; v4_fib->rev_classify_table_index = ~0; @@ -233,12 +231,6 @@ ip4_fib_table_get_index_for_sw_if_index (u32 sw_if_index) return (ip4_main.fib_index_by_sw_if_index[sw_if_index]); } -flow_hash_config_t -ip4_fib_table_get_flow_hash_config (u32 fib_index) -{ - return (ip4_fib_get(fib_index)->flow_hash_config); -} - /* * ip4_fib_table_lookup_exact_match * @@ -542,7 +534,7 @@ ip4_show_fib (vlib_main_t * vm, vlib_cli_output (vm, "%U, fib_index %d, flow hash: %U", format_fib_table_name, fib->index, FIB_PROTOCOL_IP4, fib->index, - format_ip_flow_hash_config, fib->flow_hash_config); + format_ip_flow_hash_config, fib_table->ft_flow_hash_config); /* Show summary? */ if (! verbose) diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h index 4cf9e58a..006163b4 100644 --- a/src/vnet/fib/ip4_fib.h +++ b/src/vnet/fib/ip4_fib.h @@ -53,9 +53,6 @@ typedef struct ip4_fib_t_ /* Index into FIB vector. */ u32 index; - /* flow hash configuration */ - flow_hash_config_t flow_hash_config; - /* N-tuple classifier indices */ u32 fwd_classify_table_index; u32 rev_classify_table_index; @@ -149,9 +146,6 @@ u32 ip4_fib_index_from_table_id (u32 table_id) extern u32 ip4_fib_table_get_index_for_sw_if_index(u32 sw_if_index); -extern flow_hash_config_t ip4_fib_table_get_flow_hash_config(u32 fib_index); - - always_inline index_t ip4_fib_forwarding_lookup (u32 fib_index, const ip4_address_t * addr) diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index d00f4c55..e046b349 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -74,9 +74,7 @@ create_fib_with_table_id (u32 table_id) fib_table->ft_table_id = v6_fib->table_id = table_id; - fib_table->ft_flow_hash_config = - v6_fib->flow_hash_config = - IP_FLOW_HASH_DEFAULT; + fib_table->ft_flow_hash_config = IP_FLOW_HASH_DEFAULT; vnet_ip6_fib_init(fib_table->ft_index); fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6); @@ -390,12 +388,6 @@ u32 ip6_fib_table_fwding_lookup_with_if_index (ip6_main_t * im, return ip6_fib_table_fwding_lookup(im, fib_index, dst); } -flow_hash_config_t -ip6_fib_table_get_flow_hash_config (u32 fib_index) -{ - return (ip6_fib_get(fib_index)->flow_hash_config); -} - u32 ip6_fib_table_get_index_for_sw_if_index (u32 sw_if_index) { @@ -643,7 +635,7 @@ ip6_show_fib (vlib_main_t * vm, vlib_cli_output (vm, "%s, fib_index %d, flow hash: %U", fib_table->ft_desc, fib->index, - format_ip_flow_hash_config, fib->flow_hash_config); + format_ip_flow_hash_config, fib_table->ft_flow_hash_config); /* Show summary? */ if (! verbose) diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h index e2f28452..2bf8ef78 100644 --- a/src/vnet/fib/ip6_fib.h +++ b/src/vnet/fib/ip6_fib.h @@ -133,7 +133,5 @@ u32 ip6_fib_index_from_table_id (u32 table_id) extern u32 ip6_fib_table_get_index_for_sw_if_index(u32 sw_if_index); -extern flow_hash_config_t ip6_fib_table_get_flow_hash_config(u32 fib_index); - #endif diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c index 19f9f3c1..ca6271fe 100644 --- a/src/vnet/fib/mpls_fib.c +++ b/src/vnet/fib/mpls_fib.c @@ -61,11 +61,6 @@ */ static index_t mpls_fib_drop_dpo_index = INDEX_INVALID; -/** - * FIXME - */ -#define MPLS_FLOW_HASH_DEFAULT 0 - static inline u32 mpls_fib_entry_mk_key (mpls_label_t label, mpls_eos_bit_t eos) @@ -109,10 +104,8 @@ mpls_fib_create_with_table_id (u32 table_id) hash_set (mpls_main.fib_index_by_table_id, table_id, fib_table->ft_index); - fib_table->ft_table_id = - table_id; - fib_table->ft_flow_hash_config = - MPLS_FLOW_HASH_DEFAULT; + fib_table->ft_table_id = table_id; + fib_table->ft_flow_hash_config = MPLS_FLOW_HASH_DEFAULT; fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS); @@ -350,13 +343,6 @@ mpls_fib_forwarding_table_reset (mpls_fib_t *mf, mf->mf_lbs[key] = mpls_fib_drop_dpo_index; } -flow_hash_config_t -mpls_fib_table_get_flow_hash_config (u32 fib_index) -{ - // FIXME. - return (0); -} - void mpls_fib_table_walk (mpls_fib_t *mpls_fib, fib_table_walk_fn_t fn, diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h index 78a61a14..dfb8b7fc 100644 --- a/src/vnet/fib/mpls_fib.h +++ b/src/vnet/fib/mpls_fib.h @@ -33,6 +33,11 @@ #define MPLS_FIB_KEY_SIZE 21 #define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1)) +/** + * There are no options for controlling the MPLS flow hash + */ +#define MPLS_FLOW_HASH_DEFAULT 0 + typedef struct mpls_fib_t_ { /** @@ -130,6 +135,4 @@ mpls_fib_table_get_index_for_sw_if_index (u32 sw_if_index) return (mm->fib_index_by_sw_if_index[sw_if_index]); } -extern flow_hash_config_t mpls_fib_table_get_flow_hash_config(u32 fib_index); - #endif diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 697d2169..d85f76d4 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -3020,7 +3020,6 @@ VLIB_CLI_COMMAND (lookup_test_command, static) = int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config) { - ip4_fib_t *fib; u32 fib_index; fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id); @@ -3028,9 +3027,9 @@ vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config) if (~0 == fib_index) return VNET_API_ERROR_NO_SUCH_FIB; - fib = ip4_fib_get (fib_index); + fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4, + flow_hash_config); - fib->flow_hash_config = flow_hash_config; return 0; } diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index bf7ec7d5..d623c95f 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -71,9 +71,6 @@ typedef struct /* Index into FIB vector. */ u32 index; - - /* flow hash configuration */ - flow_hash_config_t flow_hash_config; } ip6_fib_t; typedef struct ip6_mfib_t diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 3bc07d0e..0ad96d0b 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -267,11 +267,10 @@ ip6_lookup_inline (vlib_main_t * vm, (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - flow_hash_config0 = ip6_fib_get (fib_index0)->flow_hash_config; - lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0); lb0 = load_balance_get (lbi0); + flow_hash_config0 = lb0->lb_hash_config; vnet_buffer (p0)->ip.flow_hash = 0; ASSERT (lb0->lb_n_buckets > 0); @@ -3156,7 +3155,6 @@ VLIB_CLI_COMMAND (test_link_command, static) = int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config) { - ip6_fib_t *fib; u32 fib_index; fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id); @@ -3164,10 +3162,10 @@ vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config) if (~0 == fib_index) return VNET_API_ERROR_NO_SUCH_FIB; - fib = ip6_fib_get (fib_index); + fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP6, + flow_hash_config); - fib->flow_hash_config = flow_hash_config; - return 1; + return 0; } static clib_error_t * @@ -3199,7 +3197,7 @@ set_ip6_flow_hash_command_fn (vlib_main_t * vm, rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config); switch (rv) { - case 1: + case 0: break; case -1: diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index 9c9cb4a4..2680d601 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -1336,9 +1336,17 @@ static void set_ip6_flow_hash (vl_api_set_ip_flow_hash_t * mp) { vl_api_set_ip_flow_hash_reply_t *rmp; - int rv = VNET_API_ERROR_UNIMPLEMENTED; + int rv; + u32 table_id; + flow_hash_config_t flow_hash_config = 0; + + table_id = ntohl (mp->vrf_id); + +#define _(a,b) if (mp->a) flow_hash_config |= b; + foreach_flow_hash_bit; +#undef _ - clib_warning ("unimplemented..."); + rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config); REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_REPLY); } diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c index 1254dd9d..457d48eb 100644 --- a/src/vnet/mpls/mpls_tunnel.c +++ b/src/vnet/mpls/mpls_tunnel.c @@ -24,6 +24,7 @@ #include #include #include +#include /** * @brief pool of tunnel instances @@ -200,9 +201,20 @@ mpls_tunnel_mk_lb (mpls_tunnel_t *mt, { flow_hash_config_t fhc; - fhc = 0; // FIXME - /* fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, */ - /* dpo_proto_to_fib(lb_proto)); */ + switch (linkt) + { + case VNET_LINK_MPLS: + fhc = MPLS_FLOW_HASH_DEFAULT; + break; + case VNET_LINK_IP4: + case VNET_LINK_IP6: + fhc = IP_FLOW_HASH_DEFAULT; + break; + default: + fhc = 0; + break; + } + dpo_set(dpo_lb, DPO_LOAD_BALANCE, lb_proto, diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index f1b6877f..9c230574 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -971,6 +971,9 @@ ip6_reset_fib_t_handler (vl_api_reset_fib_t * mp) vec_reset_length (sw_if_indices_to_shut); + /* Set the flow hash for this fib to the default */ + vnet_set_ip6_flow_hash (fib->table_id, IP_FLOW_HASH_DEFAULT); + /* Shut down interfaces in this FIB / clean out intfc routes */ pool_foreach (si, im->sw_interfaces, ({ diff --git a/test/test_ip4.py b/test/test_ip4.py index ed364b62..3fe61e26 100644 --- a/test/test_ip4.py +++ b/test/test_ip4.py @@ -766,5 +766,142 @@ class TestIPSubNets(VppTestCase): rx = self.pg1.get_capture(1) +class TestIPLoadBalance(VppTestCase): + """ IPv4 Load-Balancing """ + + def setUp(self): + super(TestIPLoadBalance, self).setUp() + + self.create_pg_interfaces(range(5)) + + for i in self.pg_interfaces: + i.admin_up() + i.config_ip4() + i.resolve_arp() + + def tearDown(self): + super(TestIPLoadBalance, self).tearDown() + for i in self.pg_interfaces: + i.unconfig_ip4() + i.admin_down() + + def send_and_expect_load_balancing(self, input, pkts, outputs): + input.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + for oo in outputs: + rx = oo._get_capture(1) + self.assertNotEqual(0, len(rx)) + + def test_ip_load_balance(self): + """ IP Load-Balancing """ + + # + # An array of packets that differ only in the destination port + # + port_pkts = [] + + # + # An array of packets that differ only in the source address + # + src_pkts = [] + + for ii in range(65): + port_pkts.append((Ether(src=self.pg0.remote_mac, + dst=self.pg0.local_mac) / + IP(dst="10.0.0.1", src="20.0.0.1") / + UDP(sport=1234, dport=1234 + ii) / + Raw('\xa5' * 100))) + src_pkts.append((Ether(src=self.pg0.remote_mac, + dst=self.pg0.local_mac) / + IP(dst="10.0.0.1", src="20.0.0.%d" % ii) / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100))) + + route_10_0_0_1 = VppIpRoute(self, "10.0.0.1", 32, + [VppRoutePath(self.pg1.remote_ip4, + self.pg1.sw_if_index), + VppRoutePath(self.pg2.remote_ip4, + self.pg2.sw_if_index)]) + route_10_0_0_1.add_vpp_config() + + # + # inject the packet on pg0 - expect load-balancing across the 2 paths + # - since the default hash config is to use IP src,dst and port + # src,dst + # We are not going to ensure equal amounts of packets across each link, + # since the hash algorithm is statistical and therefore this can never + # be guaranteed. But wuth 64 different packets we do expect some + # balancing. So instead just ensure there is traffic on each link. + # + self.send_and_expect_load_balancing(self.pg0, port_pkts, + [self.pg1, self.pg2]) + self.send_and_expect_load_balancing(self.pg0, src_pkts, + [self.pg1, self.pg2]) + + # + # change the flow hash config so it's only IP src,dst + # - now only the stream with differing source address will + # load-balance + # + self.vapi.set_ip_flow_hash(0, src=1, dst=1, sport=0, dport=0) + + self.send_and_expect_load_balancing(self.pg0, src_pkts, + [self.pg1, self.pg2]) + + self.pg0.add_stream(port_pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg2.get_capture(len(port_pkts)) + + # + # change the flow hash config back to defaults + # + self.vapi.set_ip_flow_hash(0, src=1, dst=1, sport=1, dport=1) + + # + # Recursive prefixes + # - testing that 2 stages of load-balancing occurs and there is no + # polarisation (i.e. only 2 of 4 paths are used) + # + port_pkts = [] + src_pkts = [] + + for ii in range(257): + port_pkts.append((Ether(src=self.pg0.remote_mac, + dst=self.pg0.local_mac) / + IP(dst="1.1.1.1", src="20.0.0.1") / + UDP(sport=1234, dport=1234 + ii) / + Raw('\xa5' * 100))) + src_pkts.append((Ether(src=self.pg0.remote_mac, + dst=self.pg0.local_mac) / + IP(dst="1.1.1.1", src="20.0.0.%d" % ii) / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100))) + + route_10_0_0_2 = VppIpRoute(self, "10.0.0.2", 32, + [VppRoutePath(self.pg3.remote_ip4, + self.pg3.sw_if_index), + VppRoutePath(self.pg4.remote_ip4, + self.pg4.sw_if_index)]) + route_10_0_0_2.add_vpp_config() + + route_1_1_1_1 = VppIpRoute(self, "1.1.1.1", 32, + [VppRoutePath("10.0.0.2", 0xffffffff), + VppRoutePath("10.0.0.1", 0xffffffff)]) + route_1_1_1_1.add_vpp_config() + + # + # inject the packet on pg0 - expect load-balancing across all 4 paths + # + self.vapi.cli("clear trace") + self.send_and_expect_load_balancing(self.pg0, port_pkts, + [self.pg1, self.pg2, + self.pg3, self.pg4]) + self.send_and_expect_load_balancing(self.pg0, src_pkts, + [self.pg1, self.pg2, + self.pg3, self.pg4]) + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/test_ip6.py b/test/test_ip6.py index 3ba09230..ebeffe20 100644 --- a/test/test_ip6.py +++ b/test/test_ip6.py @@ -1133,5 +1133,154 @@ class TestIPDisabled(VppTestCase): self.send_and_assert_no_replies(self.pg1, pm, "IPv6 disabled") +class TestIP6LoadBalance(VppTestCase): + """ IPv6 Load-Balancing """ + + def setUp(self): + super(TestIP6LoadBalance, self).setUp() + + self.create_pg_interfaces(range(5)) + + for i in self.pg_interfaces: + i.admin_up() + i.config_ip6() + i.resolve_ndp() + + def tearDown(self): + super(TestIP6LoadBalance, self).tearDown() + for i in self.pg_interfaces: + i.unconfig_ip6() + i.admin_down() + + def send_and_expect_load_balancing(self, input, pkts, outputs): + input.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + for oo in outputs: + rx = oo._get_capture(1) + self.assertNotEqual(0, len(rx)) + + def test_ip6_load_balance(self): + """ IPv6 Load-Balancing """ + + # + # An array of packets that differ only in the destination port + # + port_pkts = [] + + # + # An array of packets that differ only in the source address + # + src_pkts = [] + + for ii in range(65): + port_pkts.append((Ether(src=self.pg0.remote_mac, + dst=self.pg0.local_mac) / + IPv6(dst="3000::1", src="3000:1::1") / + UDP(sport=1234, dport=1234 + ii) / + Raw('\xa5' * 100))) + src_pkts.append((Ether(src=self.pg0.remote_mac, + dst=self.pg0.local_mac) / + IPv6(dst="3000::1", src="3000:1::%d" % ii) / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100))) + + route_3000_1 = VppIpRoute(self, "3000::1", 128, + [VppRoutePath(self.pg1.remote_ip6, + self.pg1.sw_if_index, + is_ip6=1), + VppRoutePath(self.pg2.remote_ip6, + self.pg2.sw_if_index, + is_ip6=1)], + is_ip6=1) + route_3000_1.add_vpp_config() + + # + # inject the packet on pg0 - expect load-balancing across the 2 paths + # - since the default hash config is to use IP src,dst and port + # src,dst + # We are not going to ensure equal amounts of packets across each link, + # since the hash algorithm is statistical and therefore this can never + # be guaranteed. But wuth 64 different packets we do expect some + # balancing. So instead just ensure there is traffic on each link. + # + self.send_and_expect_load_balancing(self.pg0, port_pkts, + [self.pg1, self.pg2]) + self.send_and_expect_load_balancing(self.pg0, src_pkts, + [self.pg1, self.pg2]) + + # + # change the flow hash config so it's only IP src,dst + # - now only the stream with differing source address will + # load-balance + # + self.vapi.set_ip_flow_hash(0, is_ip6=1, src=1, dst=1, sport=0, dport=0) + + self.send_and_expect_load_balancing(self.pg0, src_pkts, + [self.pg1, self.pg2]) + + self.pg0.add_stream(port_pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg2.get_capture(len(port_pkts)) + + # + # change the flow hash config back to defaults + # + self.vapi.set_ip_flow_hash(0, is_ip6=1, src=1, dst=1, sport=1, dport=1) + + # + # Recursive prefixes + # - testing that 2 stages of load-balancing occurs and there is no + # polarisation (i.e. only 2 of 4 paths are used) + # + port_pkts = [] + src_pkts = [] + + for ii in range(257): + port_pkts.append((Ether(src=self.pg0.remote_mac, + dst=self.pg0.local_mac) / + IPv6(dst="4000::1", src="4000:1::1") / + UDP(sport=1234, dport=1234 + ii) / + Raw('\xa5' * 100))) + src_pkts.append((Ether(src=self.pg0.remote_mac, + dst=self.pg0.local_mac) / + IPv6(dst="4000::1", src="4000:1::%d" % ii) / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100))) + + route_3000_2 = VppIpRoute(self, "3000::2", 128, + [VppRoutePath(self.pg3.remote_ip6, + self.pg3.sw_if_index, + is_ip6=1), + VppRoutePath(self.pg4.remote_ip6, + self.pg4.sw_if_index, + is_ip6=1)], + is_ip6=1) + route_3000_2.add_vpp_config() + + route_4000_1 = VppIpRoute(self, "4000::1", 128, + [VppRoutePath("3000::1", + 0xffffffff, + is_ip6=1), + VppRoutePath("3000::2", + 0xffffffff, + is_ip6=1)], + is_ip6=1) + route_4000_1.add_vpp_config() + + # + # inject the packet on pg0 - expect load-balancing across all 4 paths + # + self.vapi.cli("clear trace") + self.send_and_expect_load_balancing(self.pg0, port_pkts, + [self.pg1, self.pg2, + self.pg3, self.pg4]) + self.send_and_expect_load_balancing(self.pg0, src_pkts, + [self.pg1, self.pg2, + self.pg3, self.pg4]) + + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index d94c0cb6..83c4a83b 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -277,6 +277,25 @@ class VppPapiProvider(object): {'sw_if_index': sw_if_index, 'suppress': suppress}) + def set_ip_flow_hash(self, + table_id, + src=1, + dst=1, + sport=1, + dport=1, + proto=1, + reverse=0, + is_ip6=0): + return self.api(self.papi.set_ip_flow_hash, + {'vrf_id': table_id, + 'src': src, + 'dst': dst, + 'dport': dport, + 'sport': sport, + 'proto': proto, + 'reverse': reverse, + 'is_ipv6': is_ip6}) + def ip6_nd_proxy(self, address, sw_if_index, is_del=0): return self.api(self.papi.ip6nd_proxy_add_del, {'address': address, -- cgit 1.2.3-korg From 1c7d4858369881ac4cc287c4fa16eff2e9890c1c Mon Sep 17 00:00:00 2001 From: Jon Loeliger Date: Tue, 2 May 2017 11:06:23 -0500 Subject: Prevent Bridge Domain operations on BD 0. The default bridge domain, 0, is created automatically with static features. It should be modified by neither the CLI nor the API. So add tests for, and reject any operation on BD 0. The new API error message BD_NOT_MODIFIABLE is returned in such cases. Change-Id: Iaf3dd80c4f43cf41689ca55756a0a3525420cd12 Signed-off-by: Jon Loeliger --- src/vnet/api_errno.h | 3 ++- src/vnet/l2/l2_api.c | 19 ++++++++++++++++++- src/vnet/l2/l2_bd.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- src/vpp/api/api.c | 6 ++++++ 4 files changed, 70 insertions(+), 3 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index b87c197f..e694fbf1 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -110,7 +110,8 @@ _(SVM_SEGMENT_CREATE_FAIL, -117, "svm segment create fail") \ _(APPLICATION_NOT_ATTACHED, -118, "application not attached") \ _(BD_ALREADY_EXISTS, -119, "Bridge domain already exists") \ _(BD_IN_USE, -120, "Bridge domain has member interfaces") \ -_(UNSUPPORTED, -121, "Unsupported") +_(BD_NOT_MODIFIABLE, -121, "Default bridge domain 0 can be neither deleted nor modified") \ +_(UNSUPPORTED, -122, "Unsupported") typedef enum { diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index 5a3c8dc2..8cc7c794 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -310,7 +310,15 @@ vl_api_bridge_domain_set_mac_age_t_handler (vl_api_bridge_domain_set_mac_age_t vl_api_bridge_domain_set_mac_age_reply_t *rmp; int rv = 0; u32 bd_id = ntohl (mp->bd_id); - uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id); + uword *p; + + if (bd_id == 0) + { + rv = VNET_API_ERROR_BD_NOT_MODIFIABLE; + goto out; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p == 0) { rv = VNET_API_ERROR_NO_SUCH_ENTRY; @@ -401,10 +409,13 @@ vl_api_bridge_domain_dump_t_handler (vl_api_bridge_domain_dump_t * mp) return; bd_id = ntohl (mp->bd_id); + if (bd_id == 0) + return; bd_index = (bd_id == ~0) ? 0 : bd_find_index (bdm, bd_id); ASSERT (bd_index != ~0); end = (bd_id == ~0) ? vec_len (l2im->bd_configs) : bd_index + 1; + for (; bd_index < end; bd_index++) { bd_config = l2input_bd_config_from_index (l2im, bd_index); @@ -437,6 +448,12 @@ vl_api_bridge_flags_t_handler (vl_api_bridge_flags_t * mp) u32 flags = ntohl (mp->feature_bitmap); uword *p; + if (bd_id == 0) + { + rv = VNET_API_ERROR_BD_NOT_MODIFIABLE; + goto out; + } + p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p == 0) { diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index 7c55789b..4ebbb547 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -304,6 +304,10 @@ bd_learn (vlib_main_t * vm, goto done; } + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p == 0) @@ -369,6 +373,10 @@ bd_fwd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) goto done; } + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p == 0) @@ -436,6 +444,10 @@ bd_flood (vlib_main_t * vm, goto done; } + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p == 0) @@ -502,6 +514,10 @@ bd_uu_flood (vlib_main_t * vm, goto done; } + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p == 0) @@ -568,6 +584,10 @@ bd_arp_term (vlib_main_t * vm, goto done; } + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p) bd_index = *p; @@ -607,6 +627,10 @@ bd_mac_age (vlib_main_t * vm, goto done; } + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p == 0) @@ -780,6 +804,10 @@ bd_arp_entry (vlib_main_t * vm, goto done; } + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p) @@ -900,7 +928,7 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) u32 bd_id = ~0; uword *p; - start = 0; + start = 1; end = vec_len (l2input_main.bd_configs); if (unformat (input, "%d", &bd_id)) @@ -914,6 +942,10 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) if (unformat (input, "arp")) arp = 1; + if (bd_id == 0) + return clib_error_return (0, + "No operations on the default bridge domain are supported"); + p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p) bd_index = *p; @@ -1125,6 +1157,8 @@ bd_add_del (l2_bridge_domain_add_del_args_t * a) { if (bd_index == ~0) return VNET_API_ERROR_NO_SUCH_ENTRY; + if (bd_index == 0) + return VNET_API_ERROR_BD_NOT_MODIFIABLE; if (vec_len (l2input_main.bd_configs[bd_index].members)) return VNET_API_ERROR_BD_IN_USE; rv = bd_delete (bdm, bd_index); @@ -1188,6 +1222,12 @@ bd_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input, goto done; } + if (bd_id == 0) + { + error = clib_error_return (0, "bridge domain 0 can not be modified"); + goto done; + } + if (mac_age > 255) { error = clib_error_return (0, "mac age must be less than 256"); @@ -1218,6 +1258,9 @@ bd_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input, case VNET_API_ERROR_NO_SUCH_ENTRY: error = clib_error_return (0, "bridge domain id does not exist"); goto done; + case VNET_API_ERROR_BD_NOT_MODIFIABLE: + error = clib_error_return (0, "bridge domain 0 can not be modified"); + goto done; default: error = clib_error_return (0, "bd_add_del returned %d", rv); goto done; diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 9c230574..baf45d5c 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -446,6 +446,12 @@ vl_api_bd_ip_mac_add_del_t_handler (vl_api_bd_ip_mac_add_del_t * mp) u32 bd_index; uword *p; + if (bd_id == 0) + { + rv = VNET_API_ERROR_BD_NOT_MODIFIABLE; + goto out; + } + p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p == 0) { -- cgit 1.2.3-korg From e72be39cd0f498178fd62dfc0a0b0daa2b633f62 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 26 Apr 2017 13:59:20 -0700 Subject: A sprinkling of const in vlibmemory/api.h and friends Change-Id: I953ebb37eeec7de0c4a6b00258c3c67a83cbc020 Signed-off-by: Neale Ranns --- src/svm/svm.c | 6 +++--- src/svm/svm.h | 10 +++++----- src/svm/svmdb.h | 2 +- src/vlibapi/api.h | 11 ++++++----- src/vlibapi/api_shared.c | 4 ++-- src/vlibmemory/api.h | 20 ++++++++++---------- src/vlibmemory/memory_client.c | 16 +++++++++------- src/vlibmemory/memory_shared.c | 4 ++-- src/vlibmemory/memory_vlib.c | 4 ++-- src/vpp/api/api_main.c | 2 +- 10 files changed, 41 insertions(+), 38 deletions(-) (limited to 'src/vpp/api') diff --git a/src/svm/svm.c b/src/svm/svm.c index e4ca98e1..97add5a7 100644 --- a/src/svm/svm.c +++ b/src/svm/svm.c @@ -796,7 +796,7 @@ svm_region_init (void) } void -svm_region_init_chroot (char *root_path) +svm_region_init_chroot (const char *root_path) { svm_map_region_args_t _a, *a = &_a; @@ -813,7 +813,7 @@ svm_region_init_chroot (char *root_path) } void -svm_region_init_chroot_uid_gid (char *root_path, int uid, int gid) +svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid) { svm_map_region_args_t _a, *a = &_a; @@ -1151,7 +1151,7 @@ svm_client_scan_this_region_nolock (svm_region_t * rp) * Scan svm regions for dead clients */ void -svm_client_scan (char *root_path) +svm_client_scan (const char *root_path) { int i, j; svm_main_region_t *mp; diff --git a/src/svm/svm.h b/src/svm/svm.h index 0b87dbcb..06797fa1 100644 --- a/src/svm/svm.h +++ b/src/svm/svm.h @@ -69,8 +69,8 @@ typedef struct svm_region_ typedef struct svm_map_region_args_ { - char *root_path; /* NULL means use the truly global arena */ - char *name; + const char *root_path; /* NULL means use the truly global arena */ + const char *name; u64 baseva; u64 size; u64 pvt_heap_size; @@ -115,12 +115,12 @@ typedef struct void *svm_region_find_or_create (svm_map_region_args_t * a); void svm_region_init (void); -void svm_region_init_chroot (char *root_path); -void svm_region_init_chroot_uid_gid (char *root_path, int uid, int gid); +void svm_region_init_chroot (const char *root_path); +void svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid); void svm_region_init_args (svm_map_region_args_t * a); void svm_region_exit (void); void svm_region_unmap (void *rp_arg); -void svm_client_scan (char *root_path); +void svm_client_scan (const char *root_path); void svm_client_scan_this_region_nolock (svm_region_t * rp); u8 *shm_name_from_svm_map_region_args (svm_map_region_args_t * a); diff --git a/src/svm/svmdb.h b/src/svm/svmdb.h index e02628a0..e35be8aa 100644 --- a/src/svm/svmdb.h +++ b/src/svm/svmdb.h @@ -83,7 +83,7 @@ typedef struct typedef struct { - char *root_path; + const char *root_path; uword size; u32 uid; u32 gid; diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h index a62fa644..7538050e 100644 --- a/src/vlibapi/api.h +++ b/src/vlibapi/api.h @@ -127,7 +127,7 @@ typedef struct void (**msg_cleanup_handlers) (void *); void (**msg_endian_handlers) (void *); void (**msg_print_handlers) (void *, void *); - char **msg_names; + const char **msg_names; u8 *message_bounce; u8 *is_mp_safe; struct ring_alloc_ *arings; @@ -195,8 +195,8 @@ typedef struct /* client side message index hash table */ uword *msg_index_by_name_and_crc; - char *region_name; - char *root_path; + const char *region_name; + const char *root_path; /* Replay in progress? */ int replay_in_progress; @@ -276,8 +276,9 @@ void vl_msg_api_register_pd_handler (void *handler, int vl_msg_api_pd_handler (void *mp, int rv); void vl_msg_api_set_first_available_msg_id (u16 first_avail); -u16 vl_msg_api_get_msg_ids (char *name, int n); -void vl_msg_api_add_msg_name_crc (api_main_t * am, char *string, u32 id); +u16 vl_msg_api_get_msg_ids (const char *name, int n); +void vl_msg_api_add_msg_name_crc (api_main_t * am, const char *string, + u32 id); u32 vl_api_get_msg_index (u8 * name_and_crc); /* node_serialize.c prototypes */ diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c index 6774e3dd..0817f38e 100644 --- a/src/vlibapi/api_shared.c +++ b/src/vlibapi/api_shared.c @@ -828,7 +828,7 @@ vl_msg_api_set_first_available_msg_id (u16 first_avail) } u16 -vl_msg_api_get_msg_ids (char *name, int n) +vl_msg_api_get_msg_ids (const char *name, int n) { api_main_t *am = &api_main; u8 *name_copy; @@ -872,7 +872,7 @@ vl_msg_api_get_msg_ids (char *name, int n) } void -vl_msg_api_add_msg_name_crc (api_main_t * am, char *string, u32 id) +vl_msg_api_add_msg_name_crc (api_main_t * am, const char *string, u32 id) { uword *p; diff --git a/src/vlibmemory/api.h b/src/vlibmemory/api.h index 8e44c20d..c195e181 100644 --- a/src/vlibmemory/api.h +++ b/src/vlibmemory/api.h @@ -123,20 +123,20 @@ void *vl_msg_api_alloc_or_null (int nbytes); void *vl_msg_api_alloc_as_if_client (int nbytes); void *vl_msg_api_alloc_as_if_client_or_null (int nbytes); void vl_msg_api_free (void *a); -int vl_map_shmem (char *region_name, int is_vlib); +int vl_map_shmem (const char *region_name, int is_vlib); void vl_register_mapped_shmem_region (svm_region_t * rp); void vl_unmap_shmem (void); void vl_msg_api_send_shmem (unix_shared_memory_queue_t * q, u8 * elem); void vl_msg_api_send_shmem_nolock (unix_shared_memory_queue_t * q, u8 * elem); void vl_msg_api_send (vl_api_registration_t * rp, u8 * elem); -int vl_client_connect (char *name, int ctx_quota, int input_queue_size); +int vl_client_connect (const char *name, int ctx_quota, int input_queue_size); void vl_client_disconnect (void); unix_shared_memory_queue_t *vl_api_client_index_to_input_queue (u32 index); vl_api_registration_t *vl_api_client_index_to_registration (u32 index); -int vl_client_api_map (char *region_name); +int vl_client_api_map (const char *region_name); void vl_client_api_unmap (void); -void vl_set_memory_region_name (char *name); -void vl_set_memory_root_path (char *root_path); +void vl_set_memory_region_name (const char *name); +void vl_set_memory_root_path (const char *root_path); void vl_set_memory_uid (int uid); void vl_set_memory_gid (int gid); void vl_set_global_memory_baseva (u64 baseva); @@ -146,12 +146,12 @@ void vl_set_global_pvt_heap_size (u64 size); void vl_set_api_pvt_heap_size (u64 size); void vl_enable_disable_memory_api (vlib_main_t * vm, int yesno); void vl_client_disconnect_from_vlib (void); -int vl_client_connect_to_vlib (char *svm_name, char *client_name, - int rx_queue_size); -int vl_client_connect_to_vlib_no_rx_pthread (char *svm_name, - char *client_name, +int vl_client_connect_to_vlib (const char *svm_name, + const char *client_name, int rx_queue_size); +int vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name, + const char *client_name, int rx_queue_size); -u16 vl_client_get_first_plugin_msg_id (char *plugin_name); +u16 vl_client_get_first_plugin_msg_id (const char *plugin_name); void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c index 25b06f65..d48a4fa1 100644 --- a/src/vlibmemory/memory_client.c +++ b/src/vlibmemory/memory_client.c @@ -155,7 +155,7 @@ noop_handler (void *notused) } int -vl_client_connect (char *name, int ctx_quota, int input_queue_size) +vl_client_connect (const char *name, int ctx_quota, int input_queue_size) { svm_region_t *svm; vl_api_memclnt_create_t *mp; @@ -326,7 +326,7 @@ _(MEMCLNT_DELETE_REPLY, memclnt_delete_reply) int -vl_client_api_map (char *region_name) +vl_client_api_map (const char *region_name) { int rv; @@ -354,7 +354,8 @@ vl_client_api_unmap (void) } static int -connect_to_vlib_internal (char *svm_name, char *client_name, +connect_to_vlib_internal (const char *svm_name, + const char *client_name, int rx_queue_size, int want_pthread) { int rv = 0; @@ -388,15 +389,16 @@ connect_to_vlib_internal (char *svm_name, char *client_name, } int -vl_client_connect_to_vlib (char *svm_name, char *client_name, - int rx_queue_size) +vl_client_connect_to_vlib (const char *svm_name, + const char *client_name, int rx_queue_size) { return connect_to_vlib_internal (svm_name, client_name, rx_queue_size, 1 /* want pthread */ ); } int -vl_client_connect_to_vlib_no_rx_pthread (char *svm_name, char *client_name, +vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name, + const char *client_name, int rx_queue_size) { return connect_to_vlib_internal (svm_name, client_name, rx_queue_size, @@ -437,7 +439,7 @@ static void vl_api_get_first_msg_id_reply_t_handler } u16 -vl_client_get_first_plugin_msg_id (char *plugin_name) +vl_client_get_first_plugin_msg_id (const char *plugin_name) { vl_api_get_first_msg_id_t *mp; api_main_t *am = &api_main; diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c index 6cea5df9..aea90330 100644 --- a/src/vlibmemory/memory_shared.c +++ b/src/vlibmemory/memory_shared.c @@ -257,7 +257,7 @@ vl_msg_api_free_nolock (void *a) } void -vl_set_memory_root_path (char *name) +vl_set_memory_root_path (const char *name) { api_main_t *am = &api_main; @@ -321,7 +321,7 @@ vl_set_api_pvt_heap_size (u64 size) } int -vl_map_shmem (char *region_name, int is_vlib) +vl_map_shmem (const char *region_name, int is_vlib) { svm_map_region_args_t _a, *a = &_a; svm_region_t *vlib_rp, *root_rp; diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index 43574dea..29a5c2c2 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -361,7 +361,7 @@ _(GET_FIRST_MSG_ID, get_first_msg_id) * vl_api_init */ static int -memory_api_init (char *region_name) +memory_api_init (const char *region_name) { int rv; vl_msg_api_msg_config_t cfg; @@ -1202,7 +1202,7 @@ vlibmemory_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (vlibmemory_init); void -vl_set_memory_region_name (char *name) +vl_set_memory_region_name (const char *name) { api_main_t *am = &api_main; diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index d48e4eff..ac09cd15 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -211,7 +211,7 @@ api_cli_output (void *notused, const char *fmt, ...) } u16 -vl_client_get_first_plugin_msg_id (char *plugin_name) +vl_client_get_first_plugin_msg_id (const char *plugin_name) { api_main_t *am = &api_main; vl_api_msg_range_t *rp; -- cgit 1.2.3-korg From 816f437d943688f67d61fb6b9708eff59432b2ee Mon Sep 17 00:00:00 2001 From: Filip Tehlar Date: Wed, 26 Apr 2017 16:09:06 +0200 Subject: Fix vnet unit tests Change-Id: Ibe55e4399c6b78d83268d7c49ed498cab7bfdb43 Signed-off-by: Filip Tehlar --- build-data/packages/vpp.mk | 4 +++ src/tests/vnet/lisp-cp/test_cp_serdes.c | 51 ++++++++++++++++++++++++++++++-- src/tests/vnet/lisp-cp/test_lisp_types.c | 45 +++++++++++++++++++++------- src/tests/vnet/lisp-gpe/test.c | 18 ----------- src/vlib/buffer.h | 12 ++++++++ src/vnet.am | 13 -------- src/vnet/lisp-cp/control.c | 14 ++++----- src/vnet/lisp-cp/control.h | 9 ++++++ src/vnet/lisp-cp/lisp_msg_serdes.c | 8 +++++ src/vnet/lisp-cp/lisp_types.c | 31 ++++++++++--------- src/vpp.am | 1 - src/vpp/api/test_client.c | 11 ------- src/vppinfra/test_tw_timer.c | 8 ++--- 13 files changed, 142 insertions(+), 83 deletions(-) delete mode 100644 src/tests/vnet/lisp-gpe/test.c (limited to 'src/vpp/api') diff --git a/build-data/packages/vpp.mk b/build-data/packages/vpp.mk index 64eb0d89..1acc59b2 100644 --- a/build-data/packages/vpp.mk +++ b/build-data/packages/vpp.mk @@ -30,3 +30,7 @@ ifeq ($($(PLATFORM)_uses_dpdk_mlx5_pmd),yes) vpp_configure_args += --with-dpdk-mlx5-pmd endif endif + +ifeq ($($(PLATFORM)_enable_tests),yes) +vpp_configure_args += --enable-tests +endif diff --git a/src/tests/vnet/lisp-cp/test_cp_serdes.c b/src/tests/vnet/lisp-cp/test_cp_serdes.c index 0766bee1..8e8c8455 100644 --- a/src/tests/vnet/lisp-cp/test_cp_serdes.c +++ b/src/tests/vnet/lisp-cp/test_cp_serdes.c @@ -21,9 +21,6 @@ #include #include -/* FIXME */ -#include - #define _assert(e) \ error = CLIB_ERROR_ASSERT (e); \ if (error) \ @@ -489,6 +486,53 @@ done: return error; } +static vlib_buffer_t * +create_buffer (u8 * data, u32 data_len) +{ + vlib_buffer_t *b; + + u8 *buf_data = clib_mem_alloc(500); + memset (buf_data, 0, 500); + b = (vlib_buffer_t *)buf_data; + + u8 * p = vlib_buffer_put_uninit (b, data_len); + clib_memcpy (p, data, data_len); + + return b; +} + +static clib_error_t * +test_lisp_parse_map_reply () +{ + clib_error_t * error = 0; + u8 map_reply_data[] = + { + 0x00, 0x00, 0x00, 0x01, /* type; rsvd; mapping count */ + 0x00, 0x00, 0x00, 0x00, + }; + vlib_buffer_t *b = create_buffer (map_reply_data, sizeof (map_reply_data)); + map_records_arg_t *mrecs = parse_map_reply (b); + _assert (0 == mrecs); + clib_mem_free (b); + + u8 map_reply_data2[] = + { + 0x00, 0x00, 0x00, 0x01, /* type; rsvd */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, /* nonce */ + + /* 1. record - incomplete */ + 0x01, 0x02, 0x03, 0x04, /* record TTL */ + 0x01, /* locator count */ + }; + b = create_buffer (map_reply_data2, sizeof (map_reply_data2)); + mrecs = parse_map_reply (b); + _assert (0 == mrecs); +done: + clib_mem_free (b); + return error; +} + static clib_error_t * test_lisp_parse_lcaf () { @@ -610,6 +654,7 @@ done: _(lisp_msg_push_ecm) \ _(lisp_msg_parse) \ _(lisp_msg_parse_mapping_record) \ + _(lisp_parse_map_reply) \ _(lisp_parse_lcaf) \ _(lisp_map_register) diff --git a/src/tests/vnet/lisp-cp/test_lisp_types.c b/src/tests/vnet/lisp-cp/test_lisp_types.c index fa34a3c6..21575015 100644 --- a/src/tests/vnet/lisp-cp/test_lisp_types.c +++ b/src/tests/vnet/lisp-cp/test_lisp_types.c @@ -18,9 +18,6 @@ #include #include -/* FIXME */ -#include - #define _assert(e) \ error = CLIB_ERROR_ASSERT (e); \ if (error) \ @@ -265,7 +262,6 @@ done: } #endif -#if 0 /* uncomment this once VNI is supported */ static clib_error_t * test_write_mac_in_lcaf (void) { clib_error_t * error = 0; @@ -276,13 +272,12 @@ static clib_error_t * test_write_mac_in_lcaf (void) gid_address_t g = { .mac = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6}, - .vni = 0x30, + .vni = 0x01020304, .vni_mask = 0x10, .type = GID_ADDR_MAC, }; u16 len = gid_address_put (b, &g); - _assert (8 == len); u8 expected[] = { @@ -290,20 +285,20 @@ static clib_error_t * test_write_mac_in_lcaf (void) 0x00, /* reserved1 */ 0x00, /* flags */ 0x02, /* LCAF type = Instance ID */ - 0x20, /* IID/VNI mask len */ - 0x00, 0x0a, /* length */ + 0x10, /* IID/IID mask len */ + 0x00, 0x0c, /* length */ 0x01, 0x02, 0x03, 0x04, /* Instance ID / VNI */ - 0x00, 0x06, /* AFI = MAC */ + 0x40, 0x05, /* AFI = MAC */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06 /* MAC */ - } + }; + _assert (sizeof (expected) == len); _assert (0 == memcmp (expected, b, len)); done: clib_mem_free (b); return error; } -#endif static clib_error_t * test_mac_address_write (void) { @@ -417,6 +412,32 @@ done: return error; } +static clib_error_t * +test_src_dst_deser_bad_afi (void) +{ + clib_error_t * error = 0; + + u8 expected_data[] = + { + 0x40, 0x03, 0x00, 0x00, /* AFI = LCAF, reserved1, flags */ + 0x0c, 0x00, 0x00, 0x14, /* LCAF type = source/dest key, rsvd, length */ + 0x00, 0x00, 0x00, 0x00, /* reserved; source-ML, Dest-ML */ + + 0xde, 0xad, /* AFI = bad value */ + 0x11, 0x22, 0x33, 0x44, + 0x55, 0x66, /* source */ + + 0x40, 0x05, /* AFI = MAC */ + 0x10, 0x21, 0x32, 0x43, + 0x54, 0x65, /* destination */ + }; + + gid_address_t p; + _assert (~0 == gid_address_parse (expected_data, &p)); +done: + return error; +} + static clib_error_t * test_src_dst_serdes (void) { @@ -537,6 +558,8 @@ done: _(mac_address_write) \ _(gid_address_write) \ _(src_dst_serdes) \ + _(write_mac_in_lcaf) \ + _(src_dst_deser_bad_afi) \ _(src_dst_with_vni_serdes) int run_tests (void) diff --git a/src/tests/vnet/lisp-gpe/test.c b/src/tests/vnet/lisp-gpe/test.c deleted file mode 100644 index dde633ae..00000000 --- a/src/tests/vnet/lisp-gpe/test.c +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -int main(int argc, char **argv) { - return 0; -} diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index 69c8c7cc..18e2437d 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -205,6 +205,18 @@ vlib_buffer_advance (vlib_buffer_t * b, word l) b->current_length -= l; } +/** \brief Check if there is enough space in buffer to advance + + @param b - (vlib_buffer_t *) pointer to the buffer + @param l - (word) size to check + @return - 0 if there is less space than 'l' in buffer +*/ +always_inline u8 +vlib_buffer_has_space (vlib_buffer_t * b, word l) +{ + return b->current_length >= l; +} + /** \brief Reset current header & length to state they were in when packet was received. diff --git a/src/vnet.am b/src/vnet.am index 25b84616..9f3aedba 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -662,19 +662,6 @@ nobase_include_HEADERS += \ API_FILES += vnet/lisp-gpe/lisp_gpe.api -if ENABLE_TESTS -TESTS += test_test - -test_test_SOURCES = tests/vnet/lisp-gpe/test.c - -test_test_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG - -test_test_LDADD = $(LIBOBJS) - -noinst_PROGRAMS += $(TESTS) -check_PROGRAMS += $(TESTS) -endif - ######################################## # DHCP client ######################################## diff --git a/src/vnet/lisp-cp/control.c b/src/vnet/lisp-cp/control.c index 6408b297..c0093301 100644 --- a/src/vnet/lisp-cp/control.c +++ b/src/vnet/lisp-cp/control.c @@ -44,13 +44,6 @@ typedef struct u8 smr_invoked; } map_request_args_t; -typedef struct -{ - u64 nonce; - u8 is_rloc_probe; - mapping_t *mappings; -} map_records_arg_t; - u8 vnet_lisp_get_map_request_mode (void) { @@ -3485,7 +3478,7 @@ done: vec_free (itr_rlocs); } -static map_records_arg_t * +map_records_arg_t * parse_map_reply (vlib_buffer_t * b) { locator_t probed; @@ -3501,6 +3494,11 @@ parse_map_reply (vlib_buffer_t * b) mrep_hdr = vlib_buffer_get_current (b); a->nonce = MREP_NONCE (mrep_hdr); a->is_rloc_probe = MREP_RLOC_PROBE (mrep_hdr); + if (!vlib_buffer_has_space (b, sizeof (*mrep_hdr))) + { + clib_mem_free (a); + return 0; + } vlib_buffer_pull (b, sizeof (*mrep_hdr)); for (i = 0; i < MREP_REC_COUNT (mrep_hdr); i++) diff --git a/src/vnet/lisp-cp/control.h b/src/vnet/lisp-cp/control.h index eae8a184..cb98eb09 100644 --- a/src/vnet/lisp-cp/control.h +++ b/src/vnet/lisp-cp/control.h @@ -273,6 +273,13 @@ typedef struct u8 key_id; } vnet_lisp_add_del_mapping_args_t; +typedef struct +{ + u64 nonce; + u8 is_rloc_probe; + mapping_t *mappings; +} map_records_arg_t; + int vnet_lisp_map_cache_add_del (vnet_lisp_add_del_mapping_args_t * a, u32 * map_index); @@ -332,6 +339,8 @@ int vnet_lisp_map_register_enable_disable (u8 is_enable); u8 vnet_lisp_map_register_state_get (void); u8 vnet_lisp_rloc_probe_state_get (void); +map_records_arg_t *parse_map_reply (vlib_buffer_t * b); + always_inline mapping_t * lisp_get_petr_mapping (lisp_cp_main_t * lcm) { diff --git a/src/vnet/lisp-cp/lisp_msg_serdes.c b/src/vnet/lisp-cp/lisp_msg_serdes.c index eee1885c..6c0a7219 100644 --- a/src/vnet/lisp-cp/lisp_msg_serdes.c +++ b/src/vnet/lisp-cp/lisp_msg_serdes.c @@ -312,6 +312,8 @@ lisp_msg_parse_loc (vlib_buffer_t * b, locator_t * loc) if (len == ~0) return ~0; + if (!vlib_buffer_has_space (b, sizeof (len))) + return ~0; vlib_buffer_pull (b, len); return len; @@ -326,6 +328,9 @@ lisp_msg_parse_mapping_record (vlib_buffer_t * b, gid_address_t * eid, int i = 0, len = 0, llen = 0; h = vlib_buffer_get_current (b); + if (!vlib_buffer_has_space (b, sizeof (mapping_record_hdr_t))) + return ~0; + vlib_buffer_pull (b, sizeof (mapping_record_hdr_t)); memset (eid, 0, sizeof (*eid)); @@ -333,6 +338,9 @@ lisp_msg_parse_mapping_record (vlib_buffer_t * b, gid_address_t * eid, if (len == ~0) return len; + if (!vlib_buffer_has_space (b, sizeof (len))) + return ~0; + vlib_buffer_pull (b, len); if (GID_ADDR_IP_PREFIX == gid_address_type (eid)) gid_address_ippref_len (eid) = MAP_REC_EID_PLEN (h); diff --git a/src/vnet/lisp-cp/lisp_types.c b/src/vnet/lisp-cp/lisp_types.c index ad3a4bdf..31a80081 100644 --- a/src/vnet/lisp-cp/lisp_types.c +++ b/src/vnet/lisp-cp/lisp_types.c @@ -657,12 +657,19 @@ fid_addr_parse (u8 * p, fid_address_t * a) return ip_address_parse (p, afi, ip_addr); case FID_ADDR_NSH: - ASSERT (0); break; } return ~0; } +#define INC(dst, exp) \ +do { \ + u16 _sum = (exp); \ + if ((u16)~0 == _sum) \ + return ~0; \ + dst += _sum; \ +} while (0); + u16 sd_parse (u8 * p, void *a) { @@ -677,8 +684,8 @@ sd_parse (u8 * p, void *a) sd_hdr = (lcaf_src_dst_hdr_t *) (p + size); size += sizeof (sd_hdr[0]); - size += fid_addr_parse (p + size, src); - size += fid_addr_parse (p + size, dst); + INC (size, fid_addr_parse (p + size, src)); + INC (size, fid_addr_parse (p + size, dst)); if (fid_addr_type (src) == FID_ADDR_IP_PREF) { @@ -704,7 +711,7 @@ try_parse_src_dst_lcaf (u8 * p, gid_address_t * a) if (LCAF_SOURCE_DEST != lcaf_type (&lcaf)) return ~0; - size += sd_parse (p + size, a); + INC (size, sd_parse (p + size, a)); return size; } @@ -724,13 +731,10 @@ vni_parse (u8 * p, void *a) u16 afi = clib_net_to_host_u16 (*((u16 *) (p + size))); if (LISP_AFI_LCAF == afi) { - u16 len = try_parse_src_dst_lcaf (p + size, g); - if ((u16) ~ 0 == len) - return ~0; - size += len; + INC (size, try_parse_src_dst_lcaf (p + size, g)); } else - size += gid_address_parse (p + size, g); + INC (size, gid_address_parse (p + size, g)); return size; } @@ -757,7 +761,7 @@ lcaf_parse (void *offset, gid_address_t * addr) clib_warning ("Unsupported LCAF type: %u", type); return ~0; } - size += (*lcaf_parse_fcts[type]) (offset + size, lcaf); + INC (size, (*lcaf_parse_fcts[type]) (offset + size, lcaf)); return sizeof (u16) + size; } @@ -1419,10 +1423,9 @@ u32 gid_address_parse (u8 * offset, gid_address_t * a) { lisp_afi_e afi; - int len = 0; + u16 len = 0; - if (!a) - return 0; + ASSERT (a); /* NOTE: since gid_address_parse may be called by vni_parse, we can't 0 * the gid address here */ @@ -1458,7 +1461,7 @@ gid_address_parse (u8 * offset, gid_address_t * a) clib_warning ("LISP AFI %d not supported!", afi); return ~0; } - return len; + return (len == (u16) ~ 0) ? ~0 : len; } void diff --git a/src/vpp.am b/src/vpp.am index 8cdc60d9..d8b3e4ec 100644 --- a/src/vpp.am +++ b/src/vpp.am @@ -100,7 +100,6 @@ bin_test_ha_SOURCES = \ bin_test_ha_LDADD = \ libvlibmemoryclient.la \ - libvlibapi.la \ libsvm.la \ libvppinfra.la \ -lpthread -lm -lrt diff --git a/src/vpp/api/test_client.c b/src/vpp/api/test_client.c index 551bdab9..231b3c18 100644 --- a/src/vpp/api/test_client.c +++ b/src/vpp/api/test_client.c @@ -541,13 +541,6 @@ static void vl_api_create_loopback_instance_reply_t_handler ntohl (mp->retval), ntohl (mp->sw_if_index)); } -static void -vl_api_sr_tunnel_add_del_reply_t_handler (vl_api_sr_tunnel_add_del_reply_t * - mp) -{ - fformat (stdout, "sr tunnel add/del reply %d\n", ntohl (mp->retval)); -} - static void vl_api_l2_patch_add_del_reply_t_handler (vl_api_l2_patch_add_del_reply_t * mp) { @@ -949,7 +942,6 @@ add_ip4_neighbor (test_main_t * tm, int add_del) mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_ADD_DEL); mp->client_index = tm->my_client_index; mp->context = 0xdeadbeef; - mp->vrf_id = ntohl (11); mp->sw_if_index = ntohl (6); mp->is_add = add_del; @@ -972,7 +964,6 @@ add_ip6_neighbor (test_main_t * tm, int add_del) mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_ADD_DEL); mp->client_index = tm->my_client_index; mp->context = 0xdeadbeef; - mp->vrf_id = ntohl (11); mp->sw_if_index = ntohl (6); mp->is_add = add_del; mp->is_ipv6 = 1; @@ -1055,9 +1046,7 @@ dhcp_set_proxy (test_main_t * tm, int ipv6) mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_CONFIG); mp->client_index = tm->my_client_index; mp->context = 0xdeadbeef; - mp->vrf_id = ntohl (0); mp->is_ipv6 = ipv6; - mp->insert_circuit_id = 1; mp->is_add = 1; mp->dhcp_server[0] = 0x20; mp->dhcp_server[1] = 0x01; diff --git a/src/vppinfra/test_tw_timer.c b/src/vppinfra/test_tw_timer.c index a0287b89..26499509 100644 --- a/src/vppinfra/test_tw_timer.c +++ b/src/vppinfra/test_tw_timer.c @@ -138,7 +138,7 @@ test2_single (tw_timer_test_main_t * tm) tw_timer_wheel_init_2t_1w_2048sl (&tm->single_wheel, expired_timer_single_callback, - 1.0 /* timer interval */ ); + 1.0 /* timer interval */ , ~0); /* Prime offset */ initial_wheel_offset = 757; @@ -266,7 +266,7 @@ test2_double (tw_timer_test_main_t * tm) tw_timer_wheel_init_16t_2w_512sl (&tm->double_wheel, expired_timer_double_callback, - 1.0 /* timer interval */ ); + 1.0 /* timer interval */ , ~0); /* Prime offset */ initial_wheel_offset = 757; @@ -387,7 +387,7 @@ test1_single (tw_timer_test_main_t * tm) tw_timer_wheel_init_2t_1w_2048sl (&tm->single_wheel, expired_timer_single_callback, - 1.0 /* timer interval */ ); + 1.0 /* timer interval */ , ~0); /* * Prime offset, to make sure that the wheel starts in a @@ -454,7 +454,7 @@ test1_double (tw_timer_test_main_t * tm) tw_timer_wheel_init_16t_2w_512sl (&tm->double_wheel, expired_timer_double_callback, - 1.0 /* timer interval */ ); + 1.0 /* timer interval */ , ~0); /* * Prime offset, to make sure that the wheel starts in a -- cgit 1.2.3-korg From 5d73eecd63018db69b10bf56adeec9cc5cf92790 Mon Sep 17 00:00:00 2001 From: Pablo Camarillo Date: Mon, 24 Apr 2017 17:51:56 +0200 Subject: First commit SR MPLS Change-Id: I961685a2a0e4c314049444c64eb6ccf877c278dd Signed-off-by: Pablo Camarillo --- MAINTAINERS | 6 +- doxygen/user_doc.md | 1 + .../srv6-sample-localsid/srv6_localsid_sample.h | 4 +- .../srv6_sample_localsid_doc.md | 2 +- src/plugins/ioam/ip6/ioam_cache.h | 2 +- .../ioam/ip6/ioam_cache_tunnel_select_node.c | 2 +- src/plugins/ioam/udp-ping/udp_ping_node.c | 2 +- src/scripts/vnet/sr/left-linux-ping.sh | 3 - src/scripts/vnet/sr/leftpeer.conf | 27 - src/scripts/vnet/sr/mcast | 58 - src/scripts/vnet/sr/right-linux-ping.sh | 4 - src/scripts/vnet/sr/rightpeer.conf | 22 - src/scripts/vnet/sr/sr_mpls | 11 + src/scripts/vnet/sr/srlocal.sh | 4 - src/vnet.am | 32 +- src/vnet/dpo/mpls_label_dpo.c | 2 +- src/vnet/fib/fib_entry.h | 8 +- src/vnet/sr/dir.dox | 25 - src/vnet/sr/ietf_draft_05.txt | 1564 ---------- src/vnet/sr/sr.api | 168 - src/vnet/sr/sr.c | 57 - src/vnet/sr/sr.h | 323 -- src/vnet/sr/sr_api.c | 244 -- src/vnet/sr/sr_doc.md | 55 - src/vnet/sr/sr_localsid.c | 1492 --------- src/vnet/sr/sr_localsid.md | 58 - src/vnet/sr/sr_packet.h | 159 - src/vnet/sr/sr_policy.md | 56 - src/vnet/sr/sr_policy_rewrite.c | 3227 -------------------- src/vnet/sr/sr_steering.c | 573 ---- src/vnet/sr/sr_steering.md | 11 - src/vnet/srmpls/dir.dox | 22 + src/vnet/srmpls/sr.h | 152 + src/vnet/srmpls/sr_doc.md | 87 + src/vnet/srmpls/sr_mpls_policy.c | 569 ++++ src/vnet/srmpls/sr_mpls_steering.c | 453 +++ src/vnet/srv6/dir.dox | 25 + src/vnet/srv6/ietf_draft_05.txt | 1564 ++++++++++ src/vnet/srv6/sr.api | 168 + src/vnet/srv6/sr.c | 57 + src/vnet/srv6/sr.h | 325 ++ src/vnet/srv6/sr_api.c | 244 ++ src/vnet/srv6/sr_doc.md | 55 + src/vnet/srv6/sr_localsid.c | 1492 +++++++++ src/vnet/srv6/sr_localsid.md | 58 + src/vnet/srv6/sr_packet.h | 159 + src/vnet/srv6/sr_policy.md | 56 + src/vnet/srv6/sr_policy_rewrite.c | 3227 ++++++++++++++++++++ src/vnet/srv6/sr_steering.c | 573 ++++ src/vnet/srv6/sr_steering.md | 11 + src/vnet/vnet_all_api_h.h | 2 +- src/vpp/api/api.c | 2 +- src/vpp/api/custom_dump.c | 2 +- src/vpp/api/vpe.api | 2 +- 54 files changed, 9349 insertions(+), 8158 deletions(-) delete mode 100755 src/scripts/vnet/sr/left-linux-ping.sh delete mode 100644 src/scripts/vnet/sr/leftpeer.conf delete mode 100644 src/scripts/vnet/sr/mcast delete mode 100755 src/scripts/vnet/sr/right-linux-ping.sh delete mode 100644 src/scripts/vnet/sr/rightpeer.conf create mode 100644 src/scripts/vnet/sr/sr_mpls delete mode 100755 src/scripts/vnet/sr/srlocal.sh delete mode 100755 src/vnet/sr/dir.dox delete mode 100755 src/vnet/sr/ietf_draft_05.txt delete mode 100644 src/vnet/sr/sr.api delete mode 100755 src/vnet/sr/sr.c delete mode 100755 src/vnet/sr/sr.h delete mode 100644 src/vnet/sr/sr_api.c delete mode 100644 src/vnet/sr/sr_doc.md delete mode 100755 src/vnet/sr/sr_localsid.c delete mode 100644 src/vnet/sr/sr_localsid.md delete mode 100755 src/vnet/sr/sr_packet.h delete mode 100644 src/vnet/sr/sr_policy.md delete mode 100755 src/vnet/sr/sr_policy_rewrite.c delete mode 100755 src/vnet/sr/sr_steering.c delete mode 100644 src/vnet/sr/sr_steering.md create mode 100755 src/vnet/srmpls/dir.dox create mode 100755 src/vnet/srmpls/sr.h create mode 100644 src/vnet/srmpls/sr_doc.md create mode 100755 src/vnet/srmpls/sr_mpls_policy.c create mode 100755 src/vnet/srmpls/sr_mpls_steering.c create mode 100755 src/vnet/srv6/dir.dox create mode 100755 src/vnet/srv6/ietf_draft_05.txt create mode 100644 src/vnet/srv6/sr.api create mode 100755 src/vnet/srv6/sr.c create mode 100755 src/vnet/srv6/sr.h create mode 100644 src/vnet/srv6/sr_api.c create mode 100644 src/vnet/srv6/sr_doc.md create mode 100755 src/vnet/srv6/sr_localsid.c create mode 100644 src/vnet/srv6/sr_localsid.md create mode 100755 src/vnet/srv6/sr_packet.h create mode 100644 src/vnet/srv6/sr_policy.md create mode 100755 src/vnet/srv6/sr_policy_rewrite.c create mode 100755 src/vnet/srv6/sr_steering.c create mode 100644 src/vnet/srv6/sr_steering.md (limited to 'src/vpp/api') diff --git a/MAINTAINERS b/MAINTAINERS index bdc33abe..2f198319 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -76,9 +76,11 @@ VNET IPv4 and IPv6 LPM M: Dave Barach F: src/vnet/ip/ -VNET IPv6 Segment Routing +VNET Segment Routing (IPv6 and MPLS) M: Pablo Camarillo -F: src/vnet/sr/ +F: src/vnet/srv6/ +F: src/vnet/srmpls/ +F: src/examples/srv6-sample-localsid/ VNET IPSec M: Sergio Gonzalez Monroy diff --git a/doxygen/user_doc.md b/doxygen/user_doc.md index 29df6156..d052c53b 100644 --- a/doxygen/user_doc.md +++ b/doxygen/user_doc.md @@ -15,3 +15,4 @@ Several modules provide operational, dataplane-user focused documentation. - @subpage qos_doc - @subpage span_doc - @subpage srv6_doc +- @subpage srmpls_doc diff --git a/src/examples/srv6-sample-localsid/srv6_localsid_sample.h b/src/examples/srv6-sample-localsid/srv6_localsid_sample.h index 474b5de2..ef74ea3e 100644 --- a/src/examples/srv6-sample-localsid/srv6_localsid_sample.h +++ b/src/examples/srv6-sample-localsid/srv6_localsid_sample.h @@ -17,8 +17,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md b/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md index 78e91ab3..cd717db8 100644 --- a/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md +++ b/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md @@ -26,5 +26,5 @@ Notice that the plugin only 'defines' a new SRv6 LocalSID behavior, but the exis ## Graph node The current graph node uses the function 'end_srh_processing' to do the Segment Routing Endpoint behavior. Notice that it does not allow the cleanup of a Segment Routing header (as per the SRv6 behavior specs). -This function is identical to the one found in /src/vnet/sr/sr_localsid.c +This function is identical to the one found in /src/vnet/srv6/sr_localsid.c In case that by some other reason you want to do decapsulation, or SRH clean_up you can use the functions 'end_decaps_srh_processing' or 'end_psp_srh_processing' respectively. diff --git a/src/plugins/ioam/ip6/ioam_cache.h b/src/plugins/ioam/ip6/ioam_cache.h index 3f69fa72..e668ad7f 100644 --- a/src/plugins/ioam/ip6/ioam_cache.h +++ b/src/plugins/ioam/ip6/ioam_cache.h @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c index 0cf742c9..ca06607d 100644 --- a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c +++ b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/plugins/ioam/udp-ping/udp_ping_node.c b/src/plugins/ioam/udp-ping/udp_ping_node.c index 84759b0f..e1a57955 100644 --- a/src/plugins/ioam/udp-ping/udp_ping_node.c +++ b/src/plugins/ioam/udp-ping/udp_ping_node.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include typedef enum { diff --git a/src/scripts/vnet/sr/left-linux-ping.sh b/src/scripts/vnet/sr/left-linux-ping.sh deleted file mode 100755 index 55b83506..00000000 --- a/src/scripts/vnet/sr/left-linux-ping.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -ifconfig eth2 inet6 add db02::1/64 -route -A inet6 add db04::1/128 gw db02::2 diff --git a/src/scripts/vnet/sr/leftpeer.conf b/src/scripts/vnet/sr/leftpeer.conf deleted file mode 100644 index 9591d968..00000000 --- a/src/scripts/vnet/sr/leftpeer.conf +++ /dev/null @@ -1,27 +0,0 @@ -comment { test sr segment chunk-offset on } -test sr hmac validate on - -comment { trunk to rightpeer } -set int ip address GigabitEthernet2/3/0 db03::2/64 -enable ip6 interface GigabitEthernet2/3/0 -set int state GigabitEthernet2/3/0 up - -comment { subscriber left-linux-ping } -set int ip address GigabitEthernet2/2/0 db02::2/64 -enable ip6 interface GigabitEthernet2/2/0 -set int state GigabitEthernet2/2/0 up - -sr hmac id 2 key Gozzer -sr hmac id 3 key Hoser - -sr tunnel src db01::1 dst db04::1/128 next db03::1 next db04::1 tag db02::2 clean key Gozzer InPE 1 - -comment { sr unaware service chaining to db03::5 } -comment { sr tunnel src db01::1 dst db04::1/128 next db03::1 next db03::5 next db04::1 tag db02::2 clean key Gozzer InPE 1 } - -comment { tap connect srlocal hwaddr random } -comment { set int ip6 table tap-0 1 } -comment { set int ip address tap-0 db04::99/64 } -comment { enable ip6 interface tap-0 } -comment { set int state tap-0 up } -comment { ip route add table 1 db02::0/64 lookup in table 0 } diff --git a/src/scripts/vnet/sr/mcast b/src/scripts/vnet/sr/mcast deleted file mode 100644 index 50e73efa..00000000 --- a/src/scripts/vnet/sr/mcast +++ /dev/null @@ -1,58 +0,0 @@ - -loop create -loop create -loop create -loop create - -set int state loop0 up -set int state loop1 up -set int state loop2 up -set int state loop3 up - -set int ip address loop0 2001::1/64 -set int ip address loop1 2001:1::1/64 -set int ip address loop2 2001:2::1/64 -set int ip address loop3 2001:3::1/64 - -set ip6 neighbor loop1 2001:1::2 00:00:dd:ee:cc:d1 -set ip6 neighbor loop2 2001:2::2 00:00:dd:ee:cc:d2 -set ip6 neighbor loop3 2001:3::2 00:00:dd:ee:cc:d3 - -ip route 3001::1/128 via 2001:1::2 loop1 -ip route 3001::2/128 via 2001:2::2 loop2 -ip route 3001::3/128 via 2001:3::2 loop3 - -sr tunnel name SR1 src aaaa::2:1 dst ff19::1/128 next 3001::1 clean -sr tunnel name SR2 src aaaa::2:2 dst ff19::2/128 next 3001::2 clean -sr tunnel name SR3 src aaaa::2:3 dst ff19::3/128 next 3001::3 clean - -sr policy name MCAST1 tunnel SR1 tunnel SR2 tunnel SR3 - -sr multicast-map address ff18::1 sr-policy MCAST1 - -packet-generator new { - name x - limit 1 - node ethernet-input - size 64-64 - no-recycle - data { - IP6: 1.2.3 -> 4.5.6 - ICMP: 3002::2 -> ff18::1 - ICMP echo_request - incrementing 100 - } -} -trace add pg-input 100 - -sr multicast-map del address ff18::1 sr-policy MCAST1 -sr policy del name MCAST1 tunnel SR1 tunnel SR2 tunnel SR3 - -ip route del 3001::1/128 via 2001:1::2 loop1 -ip route del 3001::2/128 via 2001:2::2 loop2 -ip route del 3001::3/128 via 2001:3::2 loop3 - -sr tunnel del name SR1 src aaaa::2:1 dst ff19::1/128 next 3001::1 clean -sr tunnel del name SR2 src aaaa::2:2 dst ff19::2/128 next 3001::2 clean -sr tunnel del name SR3 src aaaa::2:3 dst ff19::3/128 next 3001::3 clean - diff --git a/src/scripts/vnet/sr/right-linux-ping.sh b/src/scripts/vnet/sr/right-linux-ping.sh deleted file mode 100755 index 029368db..00000000 --- a/src/scripts/vnet/sr/right-linux-ping.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -ifconfig eth1 inet6 add db04::1/64 -route -A inet6 add db02::1/128 gw db04::2 diff --git a/src/scripts/vnet/sr/rightpeer.conf b/src/scripts/vnet/sr/rightpeer.conf deleted file mode 100644 index 6da7a7af..00000000 --- a/src/scripts/vnet/sr/rightpeer.conf +++ /dev/null @@ -1,22 +0,0 @@ -comment { trunk to leftpeer } -set int ip address GigabitEthernet2/0/0 db03::1/64 -enable ip6 interface GigabitEthernet2/0/0 -set int state GigabitEthernet2/0/0 up - -comment { subscriber right-linux-ping } -set int ip address GigabitEthernet2/2/0 db04::2/64 - -comment { next address to fake out ND on shared LAN segment } -set int ip address GigabitEthernet2/2/0 db02::13/64 - -enable ip6 interface GigabitEthernet2/2/0 -set int state GigabitEthernet2/2/0 up - -sr tunnel src db04::1 dst db02::1/128 next db03::2 next db02::1 tag db04::2 clean - -tap connect srlocal hwaddr random -set int ip6 table tap-0 1 -set int ip address tap-0 db04::99/64 -enable ip6 interface tap-0 -set int state tap-0 up -ip route add table 1 db02::0/64 lookup in table 0 diff --git a/src/scripts/vnet/sr/sr_mpls b/src/scripts/vnet/sr/sr_mpls new file mode 100644 index 00000000..4646372a --- /dev/null +++ b/src/scripts/vnet/sr/sr_mpls @@ -0,0 +1,11 @@ +set interface mpls local0 enable +sr mpls policy add bsid 20001 next 16001 next 16002 next 16003 +sr mpls steer l3 a::/112 via sr policy bsid 20001 + +loop create +set int state loop0 up + +set int ip address loop0 11.0.0.1/24 +set ip arp loop0 11.0.0.2 00:00:11:aa:bb:cc + +mpls local-label 16001 via 11.0.0.2 loop0 out-label 16001 diff --git a/src/scripts/vnet/sr/srlocal.sh b/src/scripts/vnet/sr/srlocal.sh deleted file mode 100755 index 2f568408..00000000 --- a/src/scripts/vnet/sr/srlocal.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -ifconfig srlocal inet6 add db04::1/64 -route -6 add db02::0/64 gw db04::99 diff --git a/src/vnet.am b/src/vnet.am index 6e35df87..121d1a9c 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -697,21 +697,31 @@ nobase_include_HEADERS += \ # ipv6 segment routing ######################################## -if WITH_LIBSSL libvnet_la_SOURCES += \ - vnet/sr/sr.c \ - vnet/sr/sr_localsid.c \ - vnet/sr/sr_policy_rewrite.c \ - vnet/sr/sr_steering.c \ - vnet/sr/sr_api.c -endif + vnet/srv6/sr.c \ + vnet/srv6/sr_localsid.c \ + vnet/srv6/sr_policy_rewrite.c \ + vnet/srv6/sr_steering.c \ + vnet/srv6/sr_api.c nobase_include_HEADERS += \ - vnet/sr/sr_packet.h \ - vnet/sr/sr.h \ - vnet/sr/sr.api.h + vnet/srv6/sr_packet.h \ + vnet/srv6/sr.h \ + vnet/srv6/sr.api.h + +API_FILES += vnet/srv6/sr.api + +######################################## +# mpls segment routing +######################################## + +libvnet_la_SOURCES += \ + vnet/srmpls/sr_mpls_policy.c \ + vnet/srmpls/sr_mpls_steering.c -API_FILES += vnet/sr/sr.api + +nobase_include_HEADERS += \ + vnet/srmpls/sr.h ######################################## # IPFIX / netflow v10 diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c index 4d84b900..18479531 100644 --- a/src/vnet/dpo/mpls_label_dpo.c +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -171,7 +171,7 @@ mpls_label_paint (vlib_buffer_t * b0, hdr0 = vlib_buffer_get_current(b0); - if (PREDICT_TRUE(1 == mld0->mld_n_labels)) + if (1 == mld0->mld_n_labels) { /* optimise for the common case of one label */ *hdr0 = mld0->mld_hdr[0]; diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index 2196079b..aa1000e0 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -48,6 +48,10 @@ typedef enum fib_source_t_ { * that is from confiiguration on an interface, not a 'ip route' command */ FIB_SOURCE_INTERFACE, + /** + * SRv6 and SR-MPLS + */ + FIB_SOURCE_SR, /** * A high priority source a plugin can use */ @@ -64,10 +68,6 @@ typedef enum fib_source_t_ { * LISP */ FIB_SOURCE_LISP, - /** - * SRv6 - */ - FIB_SOURCE_SR, /** * IPv[46] Mapping */ diff --git a/src/vnet/sr/dir.dox b/src/vnet/sr/dir.dox deleted file mode 100755 index 3f539a58..00000000 --- a/src/vnet/sr/dir.dox +++ /dev/null @@ -1,25 +0,0 @@ -/* - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - @dir - @brief Segment Routing code - - An implementation of Segment Routing as per: - draft-ietf-6man-segment-routing-header-05 - - @see ietf_draft_05.txt - -*/ \ No newline at end of file diff --git a/src/vnet/sr/ietf_draft_05.txt b/src/vnet/sr/ietf_draft_05.txt deleted file mode 100755 index e9bff04f..00000000 --- a/src/vnet/sr/ietf_draft_05.txt +++ /dev/null @@ -1,1564 +0,0 @@ -Network Working Group S. Previdi, Ed. -Internet-Draft C. Filsfils -Intended status: Standards Track Cisco Systems, Inc. -Expires: August 5, 2017 B. Field - Comcast - I. Leung - Rogers Communications - J. Linkova - Google - E. Aries - Facebook - T. Kosugi - NTT - E. Vyncke - Cisco Systems, Inc. - D. Lebrun - Universite Catholique de Louvain - February 1, 2017 - - - IPv6 Segment Routing Header (SRH) - draft-ietf-6man-segment-routing-header-05 - -Abstract - - Segment Routing (SR) allows a node to steer a packet through a - controlled set of instructions, called segments, by prepending an SR - header to the packet. A segment can represent any instruction, - topological or service-based. SR allows to enforce a flow through - any path (topological, or application/service based) while - maintaining per-flow state only at the ingress node to the SR domain. - - Segment Routing can be applied to the IPv6 data plane with the - addition of a new type of Routing Extension Header. This draft - describes the Segment Routing Extension Header Type and how it is - used by SR capable nodes. - -Requirements Language - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in RFC 2119 [RFC2119]. - -Status of This Memo - - This Internet-Draft is submitted in full conformance with the - provisions of BCP 78 and BCP 79. - - - - -Previdi, et al. Expires August 5, 2017 [Page 1] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - Internet-Drafts are working documents of the Internet Engineering - Task Force (IETF). Note that other groups may also distribute - working documents as Internet-Drafts. The list of current Internet- - Drafts is at http://datatracker.ietf.org/drafts/current/. - - Internet-Drafts are draft documents valid for a maximum of six months - and may be updated, replaced, or obsoleted by other documents at any - time. It is inappropriate to use Internet-Drafts as reference - material or to cite them other than as "work in progress." - - This Internet-Draft will expire on August 5, 2017. - -Copyright Notice - - Copyright (c) 2017 IETF Trust and the persons identified as the - document authors. All rights reserved. - - This document is subject to BCP 78 and the IETF Trust's Legal - Provisions Relating to IETF Documents - (http://trustee.ietf.org/license-info) in effect on the date of - publication of this document. Please review these documents - carefully, as they describe your rights and restrictions with respect - to this document. Code Components extracted from this document must - include Simplified BSD License text as described in Section 4.e of - the Trust Legal Provisions and are provided without warranty as - described in the Simplified BSD License. - -Table of Contents - - 1. Segment Routing Documents . . . . . . . . . . . . . . . . . . 3 - 2. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3 - 2.1. Data Planes supporting Segment Routing . . . . . . . . . 4 - 2.2. Segment Routing (SR) Domain . . . . . . . . . . . . . . . 4 - 2.2.1. SR Domain in a Service Provider Network . . . . . . . 5 - 2.2.2. SR Domain in a Overlay Network . . . . . . . . . . . 6 - 3. Segment Routing Extension Header (SRH) . . . . . . . . . . . 7 - 3.1. SRH TLVs . . . . . . . . . . . . . . . . . . . . . . . . 9 - 3.1.1. Ingress Node TLV . . . . . . . . . . . . . . . . . . 10 - 3.1.2. Egress Node TLV . . . . . . . . . . . . . . . . . . . 11 - 3.1.3. Opaque Container TLV . . . . . . . . . . . . . . . . 11 - 3.1.4. Padding TLV . . . . . . . . . . . . . . . . . . . . . 12 - 3.1.5. HMAC TLV . . . . . . . . . . . . . . . . . . . . . . 13 - 3.2. SRH and RFC2460 behavior . . . . . . . . . . . . . . . . 14 - 4. SRH Procedures . . . . . . . . . . . . . . . . . . . . . . . 14 - 4.1. Source SR Node . . . . . . . . . . . . . . . . . . . . . 14 - 4.2. Transit Node . . . . . . . . . . . . . . . . . . . . . . 15 - 4.3. SR Segment Endpoint Node . . . . . . . . . . . . . . . . 16 - 5. Security Considerations . . . . . . . . . . . . . . . . . . . 16 - - - -Previdi, et al. Expires August 5, 2017 [Page 2] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - 5.1. Threat model . . . . . . . . . . . . . . . . . . . . . . 17 - 5.1.1. Source routing threats . . . . . . . . . . . . . . . 17 - 5.1.2. Applicability of RFC 5095 to SRH . . . . . . . . . . 17 - 5.1.3. Service stealing threat . . . . . . . . . . . . . . . 18 - 5.1.4. Topology disclosure . . . . . . . . . . . . . . . . . 18 - 5.1.5. ICMP Generation . . . . . . . . . . . . . . . . . . . 18 - 5.2. Security fields in SRH . . . . . . . . . . . . . . . . . 19 - 5.2.1. Selecting a hash algorithm . . . . . . . . . . . . . 20 - 5.2.2. Performance impact of HMAC . . . . . . . . . . . . . 21 - 5.2.3. Pre-shared key management . . . . . . . . . . . . . . 21 - 5.3. Deployment Models . . . . . . . . . . . . . . . . . . . . 22 - 5.3.1. Nodes within the SR domain . . . . . . . . . . . . . 22 - 5.3.2. Nodes outside of the SR domain . . . . . . . . . . . 22 - 5.3.3. SR path exposure . . . . . . . . . . . . . . . . . . 23 - 5.3.4. Impact of BCP-38 . . . . . . . . . . . . . . . . . . 23 - 6. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 24 - 7. Manageability Considerations . . . . . . . . . . . . . . . . 24 - 8. Contributors . . . . . . . . . . . . . . . . . . . . . . . . 24 - 9. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 24 - 10. References . . . . . . . . . . . . . . . . . . . . . . . . . 25 - 10.1. Normative References . . . . . . . . . . . . . . . . . . 25 - 10.2. Informative References . . . . . . . . . . . . . . . . . 25 - Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 27 - -1. Segment Routing Documents - - Segment Routing terminology is defined in - [I-D.ietf-spring-segment-routing]. - - Segment Routing use cases are described in [RFC7855] and - [I-D.ietf-spring-ipv6-use-cases]. - - Segment Routing protocol extensions are defined in - [I-D.ietf-isis-segment-routing-extensions], and - [I-D.ietf-ospf-ospfv3-segment-routing-extensions]. - -2. Introduction - - Segment Routing (SR), defined in [I-D.ietf-spring-segment-routing], - allows a node to steer a packet through a controlled set of - instructions, called segments, by prepending an SR header to the - packet. A segment can represent any instruction, topological or - service-based. SR allows to enforce a flow through any path - (topological or service/application based) while maintaining per-flow - state only at the ingress node to the SR domain. Segments can be - derived from different components: IGP, BGP, Services, Contexts, - Locators, etc. The list of segment forming the path is called the - Segment List and is encoded in the packet header. - - - -Previdi, et al. Expires August 5, 2017 [Page 3] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - SR allows the use of strict and loose source based routing paradigms - without requiring any additional signaling protocols in the - infrastructure hence delivering an excellent scalability property. - - The source based routing model described in - [I-D.ietf-spring-segment-routing] is inherited from the ones proposed - by [RFC1940] and [RFC2460]. The source based routing model offers - the support for explicit routing capability. - -2.1. Data Planes supporting Segment Routing - - Segment Routing (SR), can be instantiated over MPLS - ([I-D.ietf-spring-segment-routing-mpls]) and IPv6. This document - defines its instantiation over the IPv6 data-plane based on the use- - cases defined in [I-D.ietf-spring-ipv6-use-cases]. - - This document defines a new type of Routing Header (originally - defined in [RFC2460]) called the Segment Routing Header (SRH) in - order to convey the Segment List in the packet header as defined in - [I-D.ietf-spring-segment-routing]. Mechanisms through which segment - are known and advertised are outside the scope of this document. - - A segment is materialized by an IPv6 address. A segment identifies a - topological instruction or a service instruction. A segment can be - either: - - o global: a global segment represents an instruction supported by - all nodes in the SR domain and it is instantiated through an IPv6 - address globally known in the SR domain. - - o local: a local segment represents an instruction supported only by - the node who originates it and it is instantiated through an IPv6 - address that is known only by the local node. - -2.2. Segment Routing (SR) Domain - - We define the concept of the Segment Routing Domain (SR Domain) as - the set of nodes participating into the source based routing model. - These nodes may be connected to the same physical infrastructure - (e.g.: a Service Provider's network) as well as nodes remotely - connected to each other (e.g.: an enterprise VPN or an overlay). - - A non-exhaustive list of examples of SR Domains is: - - o The network of an operator, service provider, content provider, - enterprise including nodes, links and Autonomous Systems. - - - - - -Previdi, et al. Expires August 5, 2017 [Page 4] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - o A set of nodes connected as an overlay over one or more transit - providers. The overlay nodes exchange SR-enabled traffic with - segments belonging solely to the overlay routers (the SR domain). - None of the segments in the SR-enabled packets exchanged by the - overlay belong to the transit networks - - The source based routing model through its instantiation of the - Segment Routing Header (SRH) defined in this document equally applies - to all the above examples. - - It is assumed in this document that the SRH is added to the packet by - its source, consistently with the source routing model defined in - [RFC2460]. For example: - - o At the node originating the packet (host, server). - - o At the ingress node of an SR domain where the ingress node - receives an IPv6 packet and encapsulates it into an outer IPv6 - header followed by a Segment Routing header. - -2.2.1. SR Domain in a Service Provider Network - - The following figure illustrates an SR domain consisting of an - operator's network infrastructure. - - (-------------------------- Operator 1 -----------------------) - ( ) - ( (-----AS 1-----) (-------AS 2-------) (----AS 3-------) ) - ( ( ) ( ) ( ) ) - A1--(--(--11---13--14-)--(-21---22---23--24-)--(-31---32---34--)--)--Z1 - ( ( /|\ /|\ /| ) ( |\ /|\ /|\ /| ) ( |\ /|\ /| \ ) ) - A2--(--(/ | \/ | \/ | ) ( | \/ | \/ | \/ | ) ( | \/ | \/ | \)--)--Z2 - ( ( | /\ | /\ | ) ( | /\ | /\ | /\ | ) ( | /\ | /\ | ) ) - ( ( |/ \|/ \| ) ( |/ \|/ \|/ \| ) ( |/ \|/ \| ) ) - A3--(--(--15---17--18-)--(-25---26---27--28-)--(-35---36---38--)--)--Z3 - ( ( ) ( ) ( ) ) - ( (--------------) (------------------) (---------------) ) - ( ) - (-------------------------------------------------------------) - - Figure 1: Service Provider SR Domain - - Figure 1 describes an operator network including several ASes and - delivering connectivity between endpoints. In this scenario, Segment - Routing is used within the operator networks and across the ASes - boundaries (all being under the control of the same operator). In - this case segment routing can be used in order to address use cases - such as end-to-end traffic engineering, fast re-route, egress peer - - - -Previdi, et al. Expires August 5, 2017 [Page 5] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - engineering, data-center traffic engineering as described in - [RFC7855], [I-D.ietf-spring-ipv6-use-cases] and - [I-D.ietf-spring-resiliency-use-cases]. - - Typically, an IPv6 packet received at ingress (i.e.: from outside the - SR domain), is classified according to network operator policies and - such classification results into an outer header with an SRH applied - to the incoming packet. The SRH contains the list of segment - representing the path the packet must take inside the SR domain. - Thus, the SA of the packet is the ingress node, the DA (due to SRH - procedures described in Section 4) is set as the first segment of the - path and the last segment of the path is the egress node of the SR - domain. - - The path may include intra-AS as well as inter-AS segments. It has - to be noted that all nodes within the SR domain are under control of - the same administration. When the packet reaches the egress point of - the SR domain, the outer header and its SRH are removed so that the - destination of the packet is unaware of the SR domain the packet has - traversed. - - The outer header with the SRH is no different from any other - tunneling encapsulation mechanism and allows a network operator to - implement traffic engineering mechanisms so to efficiently steer - traffic across his infrastructure. - -2.2.2. SR Domain in a Overlay Network - - The following figure illustrates an SR domain consisting of an - overlay network over multiple operator's networks. - - (--Operator 1---) (-----Operator 2-----) (--Operator 3---) - ( ) ( ) ( ) - A1--(--11---13--14--)--(--21---22---23--24--)--(-31---32---34--)--C1 - ( /|\ /|\ /| ) ( |\ /|\ /|\ /| ) ( |\ /|\ /| \ ) - A2--(/ | \/ | \/ | ) ( | \/ | \/ | \/ | ) ( | \/ | \/ | \)--C2 - ( | /\ | /\ | ) ( | /\ | /\ | /\ | ) ( | /\ | /\ | ) - ( |/ \|/ \| ) ( |/ \|/ \|/ \| ) ( |/ \|/ \| ) - A3--(--15---17--18--)--(--25---26---27--28--)--(-35---36---38--)--C3 - ( ) ( | | | ) ( ) - (---------------) (--|----|---------|--) (---------------) - | | | - B1 B2 B3 - - Figure 2: Overlay SR Domain - - - - - - -Previdi, et al. Expires August 5, 2017 [Page 6] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - Figure 2 describes an overlay consisting of nodes connected to three - different network operators and forming a single overlay network - where Segment routing packets are exchanged. - - The overlay consists of nodes A1, A2, A3, B1, B2, B3, C1, C2 and C3. - These nodes are connected to their respective network operator and - form an overlay network. - - Each node may originate packets with an SRH which contains, in the - segment list of the SRH or in the DA, segments identifying other - overlay nodes. This implies that packets with an SRH may traverse - operator's networks but, obviously, these SRHs cannot contain an - address/segment of the transit operators 1, 2 and 3. The SRH - originated by the overlay can only contain address/segment under the - administration of the overlay (e.g. address/segments supported by A1, - A2, A3, B1, B2, B3, C1,C2 or C3). - - In this model, the operator network nodes are transit nodes and, - according to [RFC2460], MUST NOT inspect the routing extension header - since they are not the DA of the packet. - - It is a common practice in operators networks to filter out, at - ingress, any packet whose DA is the address of an internal node and - it is also possible that an operator would filter out any packet - destined to an internal address and having an extension header in it. - - This common practice does not impact the SR-enabled traffic between - the overlay nodes as the intermediate transit networks never see a - destination address belonging to their infrastructure. These SR- - enabled overlay packets will thus never be filtered by the transit - operators. - - In all cases, transit packets (i.e.: packets whose DA is outside the - domain of the operator's network) will be forwarded accordingly - without introducing any security concern in the operator's network. - This is similar to tunneled packets. - -3. Segment Routing Extension Header (SRH) - - A new type of the Routing Header (originally defined in [RFC2460]) is - defined: the Segment Routing Header (SRH) which has a new Routing - Type, (suggested value 4) to be assigned by IANA. - - The Segment Routing Header (SRH) is defined as follows: - - - - - - - -Previdi, et al. Expires August 5, 2017 [Page 7] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next Header | Hdr Ext Len | Routing Type | Segments Left | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | First Segment | Flags | RESERVED | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Segment List[0] (128 bits IPv6 address) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | | - ... - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Segment List[n] (128 bits IPv6 address) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - // // - // Optional Type Length Value objects (variable) // - // // - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - where: - - o Next Header: 8-bit selector. Identifies the type of header - immediately following the SRH. - - o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH - header in 8-octet units, not including the first 8 octets. - - o Routing Type: TBD, to be assigned by IANA (suggested value: 4). - - o Segments Left. Defined in [RFC2460], it contains the index, in - the Segment List, of the next segment to inspect. Segments Left - is decremented at each segment. - - o First Segment: contains the index, in the Segment List, of the - first segment of the path which is in fact the last element of the - Segment List. - - o Flags: 8 bits of flags. Following flags are defined: - - - - -Previdi, et al. Expires August 5, 2017 [Page 8] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - 0 1 2 3 4 5 6 7 - +-+-+-+-+-+-+-+-+ - |U|P|O|A|H| U | - +-+-+-+-+-+-+-+-+ - - U: Unused and for future use. SHOULD be unset on transmission - and MUST be ignored on receipt. - - P-flag: Protected flag. Set when the packet has been rerouted - through FRR mechanism by an SR endpoint node. - - O-flag: OAM flag. When set, it indicates that this packet is - an operations and management (OAM) packet. - - A-flag: Alert flag. If present, it means important Type Length - Value (TLV) objects are present. See Section 3.1 for details - on TLVs objects. - - H-flag: HMAC flag. If set, the HMAC TLV is present and is - encoded as the last TLV of the SRH. In other words, the last - 36 octets of the SRH represent the HMAC information. See - Section 3.1.5 for details on the HMAC TLV. - - o RESERVED: SHOULD be unset on transmission and MUST be ignored on - receipt. - - o Segment List[n]: 128 bit IPv6 addresses representing the nth - segment in the Segment List. The Segment List is encoded starting - from the last segment of the path. I.e., the first element of the - segment list (Segment List [0]) contains the last segment of the - path while the last segment of the Segment List (Segment List[n]) - contains the first segment of the path. The index contained in - "Segments Left" identifies the current active segment. - - o Type Length Value (TLV) are described in Section 3.1. - -3.1. SRH TLVs - - This section defines TLVs of the Segment Routing Header. - - Type Length Value (TLV) contain optional information that may be used - by the node identified in the DA of the packet. It has to be noted - that the information carried in the TLVs is not intended to be used - by the routing layer. Typically, TLVs carry information that is - consumed by other components (e.g.: OAM) than the routing function. - - Each TLV has its own length, format and semantic. The code-point - allocated (by IANA) to each TLV defines both the format and the - - - -Previdi, et al. Expires August 5, 2017 [Page 9] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - semantic of the information carried in the TLV. Multiple TLVs may be - encoded in the same SRH. - - The "Length" field of the TLV is primarily used to skip the TLV while - inspecting the SRH in case the node doesn't support or recognize the - TLV codepoint. The "Length" defines the TLV length in octets and not - including the "Type" and "Length" fields. - - The primary scope of TLVs is to give the receiver of the packet - information related to the source routed path (e.g.: where the packet - entered in the SR domain and where it is expected to exit). - - Additional TLVs may be defined in the future. - -3.1.1. Ingress Node TLV - - The Ingress Node TLV is optional and identifies the node this packet - traversed when entered the SR domain. The Ingress Node TLV has - following format: - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | RESERVED | Flags | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Ingress Node (16 octets) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - where: - - o Type: to be assigned by IANA (suggested value 1). - - o Length: 18. - - o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be - ignored on receipt. - - o Flags: 8 bits. No flags are defined in this document. - - o Ingress Node: 128 bits. Defines the node where the packet is - expected to enter the SR domain. In the encapsulation case - described in Section 2.2.1, this information corresponds to the SA - of the encapsulating header. - - - - - -Previdi, et al. Expires August 5, 2017 [Page 10] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - -3.1.2. Egress Node TLV - - The Egress Node TLV is optional and identifies the node this packet - is expected to traverse when exiting the SR domain. The Egress Node - TLV has following format: - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | RESERVED | Flags | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Egress Node (16 octets) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - where: - - o Type: to be assigned by IANA (suggested value 2). - - o Length: 18. - - o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be - ignored on receipt. - - o Flags: 8 bits. No flags are defined in this document. - - o Egress Node: 128 bits. Defines the node where the packet is - expected to exit the SR domain. In the encapsulation case - described in Section 2.2.1, this information corresponds to the - last segment of the SRH in the encapsulating header. - -3.1.3. Opaque Container TLV - - The Opaque Container TLV is optional and has the following format: - - - - - - - - - - - - - - - -Previdi, et al. Expires August 5, 2017 [Page 11] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | RESERVED | Flags | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Opaque Container (16 octets) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - where: - - o Type: to be assigned by IANA (suggested value 3). - - o Length: 18. - - o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be - ignored on receipt. - - o Flags: 8 bits. No flags are defined in this document. - - o Opaque Container: 128 bits of opaque data not relevant for the - routing layer. Typically, this information is consumed by a non- - routing component of the node receiving the packet (i.e.: the node - in the DA). - -3.1.4. Padding TLV - - The Padding TLV is optional and with the purpose of aligning the SRH - on a 8 octet boundary. The Padding TLV has the following format: - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Padding (variable) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - // Padding (variable) // - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - where: - - o Type: to be assigned by IANA (suggested value 4). - - o Length: 1 to 7 - - - - - - -Previdi, et al. Expires August 5, 2017 [Page 12] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - o Padding: from 1 to 7 octets of padding. Padding bits have no - semantic. They SHOULD be set to 0 on transmission and MUST be - ignored on receipt. - - The following applies to the Padding TLV: - - o Padding TLV is optional and MAY only appear once in the SRH. If - present, it MUST have a length between 1 and 7 octets. - - o The Padding TLV is used in order to align the SRH total length on - the 8 octet boundary. - - o When present, the Padding TLV MUST appear as the last TLV before - the HMAC TLV (if HMAC TLV is present). - - o When present, the Padding TLV MUST have a length from 1 to 7 in - order to align the SRH total lenght on a 8-octet boundary. - - o When a router inspecting the SRH encounters the Padding TLV, it - MUST assume that no other TLV (other than the HMAC) follow the - Padding TLV. - -3.1.5. HMAC TLV - - HMAC TLV is optional and contains the HMAC information. The HMAC TLV - has the following format: - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | RESERVED | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | HMAC Key ID (4 octets) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | // - | HMAC (32 octets) // - | // - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - where: - - o Type: to be assigned by IANA (suggested value 5). - - o Length: 38. - - o RESERVED: 2 octets. SHOULD be unset on transmission and MUST be - ignored on receipt. - - - - -Previdi, et al. Expires August 5, 2017 [Page 13] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - o HMAC Key ID: 4 octets. - - o HMAC: 32 octets. - - o HMAC and HMAC Key ID usage is described in Section 5 - - The Following applies to the HMAC TLV: - - o When present, the HMAC TLV MUST be encoded as the last TLV of the - SRH. - - o If the HMAC TLV is present, the SRH H-Flag (Figure 4) MUST be set. - - o When the H-flag is set in the SRH, the router inspecting the SRH - MUST find the HMAC TLV in the last 38 octets of the SRH. - -3.2. SRH and RFC2460 behavior - - The SRH being a new type of the Routing Header, it also has the same - properties: - - SHOULD only appear once in the packet. - - Only the router whose address is in the DA field of the packet - header MUST inspect the SRH. - - Therefore, Segment Routing in IPv6 networks implies that the segment - identifier (i.e.: the IPv6 address of the segment) is moved into the - DA of the packet. - - The DA of the packet changes at each segment termination/completion - and therefore the final DA of the packet MUST be encoded as the last - segment of the path. - -4. SRH Procedures - - In this section we describe the different procedures on the SRH. - -4.1. Source SR Node - - A Source SR Node can be any node originating an IPv6 packet with its - IPv6 and Segment Routing Headers. This include either: - - A host originating an IPv6 packet. - - An SR domain ingress router encapsulating a received IPv6 packet - into an outer IPv6 header followed by an SRH. - - - - -Previdi, et al. Expires August 5, 2017 [Page 14] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - The mechanism through which a Segment List is derived is outside of - the scope of this document. As an example, the Segment List may be - obtained through: - - Local path computation. - - Local configuration. - - Interaction with a centralized controller delivering the path. - - Any other mechanism. - - The following are the steps of the creation of the SRH: - - Next Header and Hdr Ext Len fields are set according to [RFC2460]. - - Routing Type field is set as TBD (to be allocated by IANA, - suggested value 4). - - The Segment List is built with the FIRST segment of the path - encoded in the LAST element of the Segment List. Subsequent - segments are encoded on top of the first segment. Finally, the - LAST segment of the path is encoded in the FIRST element of the - Segment List. In other words, the Segment List is encoded in the - reverse order of the path. - - The final DA of the packet is encoded as the last segment of the - path (encoded in the first element of the Segment List). - - The DA of the packet is set with the value of the first segment - (found in the last element of the segment list). - - The Segments Left field is set to n-1 where n is the number of - elements in the Segment List. - - The First Segment field is set to n-1 where n is the number of - elements in the Segment List. - - The packet is sent out towards the first segment (i.e.: - represented in the packet DA). - - HMAC TLV may be set according to Section 5. - -4.2. Transit Node - - According to [RFC2460], the only node who is allowed to inspect the - Routing Extension Header (and therefore the SRH), is the node - corresponding to the DA of the packet. Any other transit node MUST - - - -Previdi, et al. Expires August 5, 2017 [Page 15] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - NOT inspect the underneath routing header and MUST forward the packet - towards the DA and according to the IPv6 routing table. - - In the example case described in Section 2.2.2, when SR capable nodes - are connected through an overlay spanning multiple third-party - infrastructure, it is safe to send SRH packets (i.e.: packet having a - Segment Routing Header) between each other overlay/SR-capable nodes - as long as the segment list does not include any of the transit - provider nodes. In addition, as a generic security measure, any - service provider will block any packet destined to one of its - internal routers, especially if these packets have an extended header - in it. - -4.3. SR Segment Endpoint Node - - The SR segment endpoint node is the node whose address is in the DA. - The segment endpoint node inspects the SRH and does: - - 1. IF DA = myself (segment endpoint) - 2. IF Segments Left > 0 THEN - decrement Segments Left - update DA with Segment List[Segments Left] - 3. ELSE continue IPv6 processing of the packet - End of processing. - 4. Forward the packet out - -5. Security Considerations - - This section analyzes the security threat model, the security issues - and proposed solutions related to the new Segment Routing Header. - - The Segment Routing Header (SRH) is simply another type of the - routing header as described in RFC 2460 [RFC2460] and is: - - o Added by an SR edge router when entering the segment routing - domain or by the originating host itself. The source host can - even be outside the SR domain; - - o inspected and acted upon when reaching the destination address of - the IP header per RFC 2460 [RFC2460]. - - Per RFC2460 [RFC2460], routers on the path that simply forward an - IPv6 packet (i.e. the IPv6 destination address is none of theirs) - will never inspect and process the content of the SRH. Routers whose - one interface IPv6 address equals the destination address field of - the IPv6 packet MUST parse the SRH and, if supported and if the local - configuration allows it, MUST act accordingly to the SRH content. - - - - -Previdi, et al. Expires August 5, 2017 [Page 16] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - According to RFC2460 [RFC2460], the default behavior of a non SR- - capable router upon receipt of an IPv6 packet with SRH destined to an - address of its, is to: - - o ignore the SRH completely if the Segment Left field is 0 and - proceed to process the next header in the IPv6 packet; - - o discard the IPv6 packet if Segment Left field is greater than 0, - it MAY send a Parameter Problem ICMP message back to the Source - Address. - -5.1. Threat model - -5.1.1. Source routing threats - - Using an SRH is similar to source routing, therefore it has some - well-known security issues as described in RFC4942 [RFC4942] section - 2.1.1 and RFC5095 [RFC5095]: - - o amplification attacks: where a packet could be forged in such a - way to cause looping among a set of SR-enabled routers causing - unnecessary traffic, hence a Denial of Service (DoS) against - bandwidth; - - o reflection attack: where a hacker could force an intermediate node - to appear as the immediate attacker, hence hiding the real - attacker from naive forensic; - - o bypass attack: where an intermediate node could be used as a - stepping stone (for example in a De-Militarized Zone) to attack - another host (for example in the datacenter or any back-end - server). - -5.1.2. Applicability of RFC 5095 to SRH - - First of all, the reader must remember this specific part of section - 1 of RFC5095 [RFC5095], "A side effect is that this also eliminates - benign RH0 use-cases; however, such applications may be facilitated - by future Routing Header specifications.". In short, it is not - forbidden to create new secure type of Routing Header; for example, - RFC 6554 (RPL) [RFC6554] also creates a new Routing Header type for a - specific application confined in a single network. - - In the segment routing architecture described in - [I-D.ietf-spring-segment-routing] there are basically two kinds of - nodes (routers and hosts): - - - - - -Previdi, et al. Expires August 5, 2017 [Page 17] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - o nodes within the SR domain, which is within one single - administrative domain, i.e., where all nodes are trusted anyway - else the damage caused by those nodes could be worse than - amplification attacks: traffic interception, man-in-the-middle - attacks, more server DoS by dropping packets, and so on. - - o nodes outside of the SR domain, which is outside of the - administrative segment routing domain hence they cannot be trusted - because there is no physical security for those nodes, i.e., they - can be replaced by hostile nodes or can be coerced in wrong - behaviors. - - The main use case for SR consists of the single administrative domain - where only trusted nodes with SR enabled and configured participate - in SR: this is the same model as in RFC6554 [RFC6554]. All non- - trusted nodes do not participate as either SR processing is not - enabled by default or because they only process SRH from nodes within - their domain. - - Moreover, all SR nodes ignore SRH created by outsiders based on - topology information (received on a peering or internal interface) or - on presence and validity of the HMAC field. Therefore, if - intermediate nodes ONLY act on valid and authorized SRH (such as - within a single administrative domain), then there is no security - threat similar to RH-0. Hence, the RFC 5095 [RFC5095] attacks are - not applicable. - -5.1.3. Service stealing threat - - Segment routing is used for added value services, there is also a - need to prevent non-participating nodes to use those services; this - is called 'service stealing prevention'. - -5.1.4. Topology disclosure - - The SRH may also contains IPv6 addresses of some intermediate SR- - nodes in the path towards the destination, this obviously reveals - those addresses to the potentially hostile attackers if those - attackers are able to intercept packets containing SRH. On the other - hand, if the attacker can do a traceroute whose probes will be - forwarded along the SR path, then there is little learned by - intercepting the SRH itself. - -5.1.5. ICMP Generation - - Per section 4.4 of RFC2460 [RFC2460], when destination nodes (i.e. - where the destination address is one of theirs) receive a Routing - Header with unsupported Routing Type, the required behavior is: - - - -Previdi, et al. Expires August 5, 2017 [Page 18] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - o If Segments Left is zero, the node must ignore the Routing header - and proceed to process the next header in the packet. - - o If Segments Left is non-zero, the node must discard the packet and - send an ICMP Parameter Problem, Code 0, message to the packet's - Source Address, pointing to the unrecognized Routing Type. - - This required behavior could be used by an attacker to force the - generation of ICMP message by any node. The attacker could send - packets with SRH (with Segment Left set to 0) destined to a node not - supporting SRH. Per RFC2460 [RFC2460], the destination node could - generate an ICMP message, causing a local CPU utilization and if the - source of the offending packet with SRH was spoofed could lead to a - reflection attack without any amplification. - - It must be noted that this is a required behavior for any unsupported - Routing Type and not limited to SRH packets. So, it is not specific - to SRH and the usual rate limiting for ICMP generation is required - anyway for any IPv6 implementation and has been implemented and - deployed for many years. - -5.2. Security fields in SRH - - This section summarizes the use of specific fields in the SRH. They - are based on a key-hashed message authentication code (HMAC). - - The security-related fields in the SRH are instantiated by the HMAC - TLV, containing: - - o HMAC Key-id, 32 bits wide; - - o HMAC, 256 bits wide (optional, exists only if HMAC Key-id is not - 0). - - The HMAC field is the output of the HMAC computation (per RFC 2104 - [RFC2104]) using a pre-shared key identified by HMAC Key-id and of - the text which consists of the concatenation of: - - o the source IPv6 address; - - o First Segment field; - - o an octet of bit flags; - - o HMAC Key-id; - - o all addresses in the Segment List. - - - - -Previdi, et al. Expires August 5, 2017 [Page 19] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - The purpose of the HMAC TLV is to verify the validity, the integrity - and the authorization of the SRH itself. If an outsider of the SR - domain does not have access to a current pre-shared secret, then it - cannot compute the right HMAC field and the first SR router on the - path processing the SRH and configured to check the validity of the - HMAC will simply reject the packet. - - The HMAC TLV is located at the end of the SRH simply because only the - router on the ingress of the SR domain needs to process it, then all - other SR nodes can ignore it (based on local policy) because they - trust the upstream router. This is to speed up forwarding operations - because SR routers which do not validate the SRH do not need to parse - the SRH until the end. - - The HMAC Key-id field allows for the simultaneous existence of - several hash algorithms (SHA-256, SHA3-256 ... or future ones) as - well as pre-shared keys. The HMAC Key-id field is opaque, i.e., it - has neither syntax nor semantic except as an index to the right - combination of pre-shared key and hash algorithm and except that a - value of 0 means that there is no HMAC field. Having an HMAC Key-id - field allows for pre-shared key roll-over when two pre-shared keys - are supported for a while when all SR nodes converged to a fresher - pre-shared key. It could also allow for interoperation among - different SR domains if allowed by local policy and assuming a - collision-free HMAC Key Id allocation. - - When a specific SRH is linked to a time-related service (such as - turbo-QoS for a 1-hour period) where the DA, Segment ID (SID) are - identical, then it is important to refresh the shared-secret - frequently as the HMAC validity period expires only when the HMAC - Key-id and its associated shared-secret expires. - -5.2.1. Selecting a hash algorithm - - The HMAC field in the HMAC TLV is 256 bit wide. Therefore, the HMAC - MUST be based on a hash function whose output is at least 256 bits. - If the output of the hash function is 256, then this output is simply - inserted in the HMAC field. If the output of the hash function is - larger than 256 bits, then the output value is truncated to 256 by - taking the least-significant 256 bits and inserting them in the HMAC - field. - - SRH implementations can support multiple hash functions but MUST - implement SHA-2 [FIPS180-4] in its SHA-256 variant. - - NOTE: SHA-1 is currently used by some early implementations used for - quick interoperations testing, the 160-bit hash value must then be - - - - -Previdi, et al. Expires August 5, 2017 [Page 20] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - right-hand padded with 96 bits set to 0. The authors understand that - this is not secure but is ok for limited tests. - -5.2.2. Performance impact of HMAC - - While adding an HMAC to each and every SR packet increases the - security, it has a performance impact. Nevertheless, it must be - noted that: - - o the HMAC field is used only when SRH is added by a device (such as - a home set-up box) which is outside of the segment routing domain. - If the SRH is added by a router in the trusted segment routing - domain, then, there is no need for an HMAC field, hence no - performance impact. - - o when present, the HMAC field MUST only be checked and validated by - the first router of the segment routing domain, this router is - named 'validating SR router'. Downstream routers may not inspect - the HMAC field. - - o this validating router can also have a cache of to improve the performance. It is not the - same use case as in IPsec where HMAC value was unique per packet, - in SRH, the HMAC value is unique per flow. - - o Last point, hash functions such as SHA-2 have been optimized for - security and performance and there are multiple implementations - with good performance. - - With the above points in mind, the performance impact of using HMAC - is minimized. - -5.2.3. Pre-shared key management - - The field HMAC Key-id allows for: - - o key roll-over: when there is a need to change the key (the hash - pre-shared secret), then multiple pre-shared keys can be used - simultaneously. The validating routing can have a table of for the currently active and future - keys. - - o different algorithms: by extending the previous table to , the validating router - can also support simultaneously several hash algorithms (see - section Section 5.2.1) - - The pre-shared secret distribution can be done: - - - -Previdi, et al. Expires August 5, 2017 [Page 21] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - o in the configuration of the validating routers, either by static - configuration or any SDN oriented approach; - - o dynamically using a trusted key distribution such as [RFC6407] - - The intent of this document is NOT to define yet-another-key- - distribution-protocol. - -5.3. Deployment Models - -5.3.1. Nodes within the SR domain - - An SR domain is defined as a set of interconnected routers where all - routers at the perimeter are configured to add and act on SRH. Some - routers inside the SR domain can also act on SRH or simply forward - IPv6 packets. - - The routers inside an SR domain can be trusted to generate SRH and to - process SRH received on interfaces that are part of the SR domain. - These nodes MUST drop all SRH packets received on an interface that - is not part of the SR domain and containing an SRH whose HMAC field - cannot be validated by local policies. This includes obviously - packet with an SRH generated by a non-cooperative SR domain. - - If the validation fails, then these packets MUST be dropped, ICMP - error messages (parameter problem) SHOULD be generated (but rate - limited) and SHOULD be logged. - -5.3.2. Nodes outside of the SR domain - - Nodes outside of the SR domain cannot be trusted for physical - security; hence, they need to request by some trusted means (outside - of the scope of this document) a complete SRH for each new connection - (i.e. new destination address). The received SRH MUST include an - HMAC TLV which is computed correctly (see Section 5.2). - - When an outside node sends a packet with an SRH and towards an SR - domain ingress node, the packet MUST contain the HMAC TLV (with a - Key-id and HMAC fields) and the the destination address MUST be an - address of an SR domain ingress node . - - The ingress SR router, i.e., the router with an interface address - equals to the destination address, MUST verify the HMAC TLV. - - If the validation is successful, then the packet is simply forwarded - as usual for an SR packet. As long as the packet travels within the - SR domain, no further HMAC check needs to be done. Subsequent - - - - -Previdi, et al. Expires August 5, 2017 [Page 22] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - routers in the SR domain MAY verify the HMAC TLV when they process - the SRH (i.e. when they are the destination). - - If the validation fails, then this packet MUST be dropped, an ICMP - error message (parameter problem) SHOULD be generated (but rate - limited) and SHOULD be logged. - -5.3.3. SR path exposure - - As the intermediate SR nodes addresses appears in the SRH, if this - SRH is visible to an outsider then he/she could reuse this knowledge - to launch an attack on the intermediate SR nodes or get some insider - knowledge on the topology. This is especially applicable when the - path between the source node and the first SR domain ingress router - is on the public Internet. - - The first remark is to state that 'security by obscurity' is never - enough; in other words, the security policy of the SR domain MUST - assume that the internal topology and addressing is known by the - attacker. A simple traceroute will also give the same information - (with even more information as all intermediate nodes between SID - will also be exposed). IPsec Encapsulating Security Payload - [RFC4303] cannot be use to protect the SRH as per RFC4303 the ESP - header must appear after any routing header (including SRH). - - To prevent a user to leverage the gained knowledge by intercepting - SRH, it it recommended to apply an infrastructure Access Control List - (iACL) at the edge of the SR domain. This iACL will drop all packets - from outside the SR-domain whose destination is any address of any - router inside the domain. This security policy should be tuned for - local operations. - -5.3.4. Impact of BCP-38 - - BCP-38 [RFC2827], also known as "Network Ingress Filtering", checks - whether the source address of packets received on an interface is - valid for this interface. The use of loose source routing such as - SRH forces packets to follow a path which differs from the expected - routing. Therefore, if BCP-38 was implemented in all routers inside - the SR domain, then SR packets could be received by an interface - which is not expected one and the packets could be dropped. - - As an SR domain is usually a subset of one administrative domain, and - as BCP-38 is only deployed at the ingress routers of this - administrative domain and as packets arriving at those ingress - routers have been normally forwarded using the normal routing - information, then there is no reason why this ingress router should - - - - -Previdi, et al. Expires August 5, 2017 [Page 23] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - drop the SRH packet based on BCP-38. Routers inside the domain - commonly do not apply BCP-38; so, this is not a problem. - -6. IANA Considerations - - This document makes the following registrations in the Internet - Protocol Version 6 (IPv6) Parameters "Routing Type" registry - maintained by IANA: - - Suggested Description Reference - Value - ---------------------------------------------------------- - 4 Segment Routing Header (SRH) This document - - In addition, this document request IANA to create and maintain a new - Registry: "Segment Routing Header Type-Value Objects". The following - code-points are requested from the registry: - - Registry: Segment Routing Header Type-Value Objects - - Suggested Description Reference - Value - ----------------------------------------------------- - 1 Ingress Node TLV This document - 2 Egress Node TLV This document - 3 Opaque Container TLV This document - 4 Padding TLV This document - 5 HMAC TLV This document - -7. Manageability Considerations - - TBD - -8. Contributors - - Dave Barach, John Leddy, John Brzozowski, Pierre Francois, Nagendra - Kumar, Mark Townsley, Christian Martin, Roberta Maglione, James - Connolly, Aloys Augustin contributed to the content of this document. - -9. Acknowledgements - - The authors would like to thank Ole Troan, Bob Hinden, Fred Baker, - Brian Carpenter, Alexandru Petrescu and Punit Kumar Jaiswal for their - comments to this document. - - - - - - - -Previdi, et al. Expires August 5, 2017 [Page 24] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - -10. References - -10.1. Normative References - - [FIPS180-4] - National Institute of Standards and Technology, "FIPS - 180-4 Secure Hash Standard (SHS)", March 2012, - . - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, - DOI 10.17487/RFC2119, March 1997, - . - - [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 - (IPv6) Specification", RFC 2460, DOI 10.17487/RFC2460, - December 1998, . - - [RFC4303] Kent, S., "IP Encapsulating Security Payload (ESP)", - RFC 4303, DOI 10.17487/RFC4303, December 2005, - . - - [RFC5095] Abley, J., Savola, P., and G. Neville-Neil, "Deprecation - of Type 0 Routing Headers in IPv6", RFC 5095, - DOI 10.17487/RFC5095, December 2007, - . - - [RFC6407] Weis, B., Rowles, S., and T. Hardjono, "The Group Domain - of Interpretation", RFC 6407, DOI 10.17487/RFC6407, - October 2011, . - -10.2. Informative References - - [I-D.ietf-isis-segment-routing-extensions] - Previdi, S., Filsfils, C., Bashandy, A., Gredler, H., - Litkowski, S., Decraene, B., and j. jefftant@gmail.com, - "IS-IS Extensions for Segment Routing", draft-ietf-isis- - segment-routing-extensions-09 (work in progress), October - 2016. - - [I-D.ietf-ospf-ospfv3-segment-routing-extensions] - Psenak, P., Previdi, S., Filsfils, C., Gredler, H., - Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3 - Extensions for Segment Routing", draft-ietf-ospf-ospfv3- - segment-routing-extensions-07 (work in progress), October - 2016. - - - - -Previdi, et al. Expires August 5, 2017 [Page 25] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - [I-D.ietf-spring-ipv6-use-cases] - Brzozowski, J., Leddy, J., Townsley, W., Filsfils, C., and - R. Maglione, "IPv6 SPRING Use Cases", draft-ietf-spring- - ipv6-use-cases-08 (work in progress), January 2017. - - [I-D.ietf-spring-resiliency-use-cases] - Filsfils, C., Previdi, S., Decraene, B., and R. Shakir, - "Resiliency use cases in SPRING networks", draft-ietf- - spring-resiliency-use-cases-08 (work in progress), October - 2016. - - [I-D.ietf-spring-segment-routing] - Filsfils, C., Previdi, S., Decraene, B., Litkowski, S., - and R. Shakir, "Segment Routing Architecture", draft-ietf- - spring-segment-routing-10 (work in progress), November - 2016. - - [I-D.ietf-spring-segment-routing-mpls] - Filsfils, C., Previdi, S., Bashandy, A., Decraene, B., - Litkowski, S., Horneffer, M., Shakir, R., - jefftant@gmail.com, j., and E. Crabbe, "Segment Routing - with MPLS data plane", draft-ietf-spring-segment-routing- - mpls-06 (work in progress), January 2017. - - [RFC1940] Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D. - Zappala, "Source Demand Routing: Packet Format and - Forwarding Specification (Version 1)", RFC 1940, - DOI 10.17487/RFC1940, May 1996, - . - - [RFC2104] Krawczyk, H., Bellare, M., and R. Canetti, "HMAC: Keyed- - Hashing for Message Authentication", RFC 2104, - DOI 10.17487/RFC2104, February 1997, - . - - [RFC2827] Ferguson, P. and D. Senie, "Network Ingress Filtering: - Defeating Denial of Service Attacks which employ IP Source - Address Spoofing", BCP 38, RFC 2827, DOI 10.17487/RFC2827, - May 2000, . - - [RFC4942] Davies, E., Krishnan, S., and P. Savola, "IPv6 Transition/ - Co-existence Security Considerations", RFC 4942, - DOI 10.17487/RFC4942, September 2007, - . - - - - - - - -Previdi, et al. Expires August 5, 2017 [Page 26] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - [RFC6554] Hui, J., Vasseur, JP., Culler, D., and V. Manral, "An IPv6 - Routing Header for Source Routes with the Routing Protocol - for Low-Power and Lossy Networks (RPL)", RFC 6554, - DOI 10.17487/RFC6554, March 2012, - . - - [RFC7855] Previdi, S., Ed., Filsfils, C., Ed., Decraene, B., - Litkowski, S., Horneffer, M., and R. Shakir, "Source - Packet Routing in Networking (SPRING) Problem Statement - and Requirements", RFC 7855, DOI 10.17487/RFC7855, May - 2016, . - -Authors' Addresses - - Stefano Previdi (editor) - Cisco Systems, Inc. - Via Del Serafico, 200 - Rome 00142 - Italy - - Email: sprevidi@cisco.com - - - Clarence Filsfils - Cisco Systems, Inc. - Brussels - BE - - Email: cfilsfil@cisco.com - - - Brian Field - Comcast - 4100 East Dry Creek Road - Centennial, CO 80122 - US - - Email: Brian_Field@cable.comcast.com - - - Ida Leung - Rogers Communications - 8200 Dixie Road - Brampton, ON L6T 0C1 - CA - - Email: Ida.Leung@rci.rogers.com - - - - -Previdi, et al. Expires August 5, 2017 [Page 27] - -Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 - - - Jen Linkova - Google - 1600 Amphitheatre Parkway - Mountain View, CA 94043 - US - - Email: furry@google.com - - - Ebben Aries - Facebook - US - - Email: exa@fb.com - - - Tomoya Kosugi - NTT - 3-9-11, Midori-Cho Musashino-Shi, - Tokyo 180-8585 - JP - - Email: kosugi.tomoya@lab.ntt.co.jp - - - Eric Vyncke - Cisco Systems, Inc. - De Kleetlaann 6A - Diegem 1831 - Belgium - - Email: evyncke@cisco.com - - - David Lebrun - Universite Catholique de Louvain - Place Ste Barbe, 2 - Louvain-la-Neuve, 1348 - Belgium - - Email: david.lebrun@uclouvain.be - - - - - - - - - - -Previdi, et al. Expires August 5, 2017 [Page 28] \ No newline at end of file diff --git a/src/vnet/sr/sr.api b/src/vnet/sr/sr.api deleted file mode 100644 index 9e900741..00000000 --- a/src/vnet/sr/sr.api +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** \brief IPv6 SR LocalSID add/del request - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param is_del Boolean of whether its a delete instruction - @param localsid_addr IPv6 address of the localsid - @param end_psp Boolean of whether decapsulation is allowed in this function - @param behavior Type of behavior (function) for this localsid - @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table. - @param vlan_index Only for L2 xconnect. Outgoing VLAN tag. - @param fib_table FIB table in which we should install the localsid entry - @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. -*/ -autoreply define sr_localsid_add_del -{ - u32 client_index; - u32 context; - u8 is_del; - u8 localsid_addr[16]; - u8 end_psp; - u8 behavior; - u32 sw_if_index; - u32 vlan_index; - u32 fib_table; - u8 nh_addr[16]; -}; - -/** \brief IPv6 SR policy add - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param bsid is the bindingSID of the SR Policy - @param weight is the weight of the sid list. optional. - @param is_encap is the behavior of the SR policy. (0.SRH insert // 1.Encapsulation) - @param type is the type of the SR policy. (0.Default // 1.Spray) - @param fib_table is the VRF where to install the FIB entry for the BSID - @param segments is a vector of IPv6 address composing the segment list -*/ -autoreply define sr_policy_add -{ - u32 client_index; - u32 context; - u8 bsid_addr[16]; - u32 weight; - u8 is_encap; - u8 type; - u32 fib_table; - u8 n_segments; - u8 segments[0]; -}; - -/** \brief IPv6 SR policy modification - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param bsid is the bindingSID of the SR Policy - @param sr_policy_index is the index of the SR policy - @param fib_table is the VRF where to install the FIB entry for the BSID - @param operation is the operation to perform (among the top ones) - @param segments is a vector of IPv6 address composing the segment list - @param sl_index is the index of the Segment List to modify/delete - @param weight is the weight of the sid list. optional. - @param is_encap Mode. Encapsulation or SRH insertion. -*/ -autoreply define sr_policy_mod -{ - u32 client_index; - u32 context; - u8 bsid_addr[16]; - u32 sr_policy_index; - u32 fib_table; - u8 operation; - u32 sl_index; - u32 weight; - u8 n_segments; - u8 segments[0]; -}; - -/** \brief IPv6 SR policy deletion - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param bsid is the bindingSID of the SR Policy - @param index is the index of the SR policy -*/ -autoreply define sr_policy_del -{ - u32 client_index; - u32 context; - u8 bsid_addr[16]; - u32 sr_policy_index; -}; - -/** \brief IPv6 SR steering add/del - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param is_del - @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index) - @param sr_policy is the index of the SR Policy (alt to bsid) - @param table_id is the VRF where to install the FIB entry for the BSID - @param prefix is the IPv4/v6 address for L3 traffic type - @param mask_width is the mask for L3 traffic type - @param sw_if_index is the incoming interface for L2 traffic - @param traffic_type describes the type of traffic -*/ -autoreply define sr_steering_add_del -{ - u32 client_index; - u32 context; - u8 is_del; - u8 bsid_addr[16]; - u32 sr_policy_index; - u32 table_id; - u8 prefix_addr[16]; - u32 mask_width; - u32 sw_if_index; - u8 traffic_type; -}; - -/** \brief Dump the list of SR LocalSIDs - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request -*/ -/**define sr_localsids_dump -{ - u32 client_index; - u32 context; -};*/ - -/** \brief Details about a single SR LocalSID - @param context - returned sender context, to match reply w/ request - @param localsid_addr IPv6 address of the localsid - @param behavior Type of behavior (function) for this localsid - @param end_psp Boolean of whether decapsulation is allowed in this function - @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table. - @param vlan_index Only for L2 xconnect. Outgoing VLAN tag. - @param fib_table FIB table in which we should install the localsid entry - @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. -*/ -/**manual_endian define sr_localsid_details -{ - u32 context; - u8 localsid_addr[16]; - u8 behavior; - u8 end_psp; - u32 sw_if_index; - u32 vlan_index; - u32 fib_table; - u8 nh_addr[16]; -};*/ - -/* - * fd.io coding-style-patch-verification: ON - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/sr/sr.c b/src/vnet/sr/sr.c deleted file mode 100755 index 34344fce..00000000 --- a/src/vnet/sr/sr.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * sr.c: ipv6 segment routing - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file - * @brief Segment Routing initialization - * - */ - -#include -#include -#include -#include -#include -#include - -ip6_sr_main_t sr_main; - -/** - * @brief no-op lock function. - * The lifetime of the SR entry is managed by the control plane - */ -void -sr_dpo_lock (dpo_id_t * dpo) -{ -} - -/** - * @brief no-op unlock function. - * The lifetime of the SR entry is managed by the control plane - */ -void -sr_dpo_unlock (dpo_id_t * dpo) -{ -} - -/* -* fd.io coding-style-patch-verification: ON -* -* Local Variables: -* eval: (c-set-style "gnu") -* End: -*/ diff --git a/src/vnet/sr/sr.h b/src/vnet/sr/sr.h deleted file mode 100755 index b832c0fc..00000000 --- a/src/vnet/sr/sr.h +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file - * @brief Segment Routing data structures definitions - * - */ - -#ifndef included_vnet_sr_h -#define included_vnet_sr_h - -#include -#include -#include -#include - -#include -#include - -#define IPv6_DEFAULT_HEADER_LENGTH 40 -#define IPv6_DEFAULT_HOP_LIMIT 64 -#define IPv6_DEFAULT_MAX_MASK_WIDTH 128 - -#define SR_BEHAVIOR_END 1 -#define SR_BEHAVIOR_X 2 -#define SR_BEHAVIOR_D_FIRST 3 /* Unused. Separator in between regular and D */ -#define SR_BEHAVIOR_DX2 4 -#define SR_BEHAVIOR_DX6 5 -#define SR_BEHAVIOR_DX4 6 -#define SR_BEHAVIOR_DT6 7 -#define SR_BEHAVIOR_DT4 8 -#define SR_BEHAVIOR_LAST 9 /* Must always be the last one */ - -#define SR_STEER_L2 2 -#define SR_STEER_IPV4 4 -#define SR_STEER_IPV6 6 - -#define SR_FUNCTION_SIZE 4 -#define SR_ARGUMENT_SIZE 4 - -#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1 - -/** - * @brief SR Segment List (SID list) - */ -typedef struct -{ - ip6_address_t *segments; /**< SIDs (key) */ - - u32 weight; /**< SID list weight (wECMP / UCMP) */ - - u8 *rewrite; /**< Precomputed rewrite header */ - u8 *rewrite_bsid; /**< Precomputed rewrite header for bindingSID */ - - dpo_id_t bsid_dpo; /**< DPO for Encaps/Insert for BSID */ - dpo_id_t ip6_dpo; /**< DPO for Encaps/Insert IPv6 */ - dpo_id_t ip4_dpo; /**< DPO for Encaps IPv6 */ -} ip6_sr_sl_t; - -/* SR policy types */ -#define SR_POLICY_TYPE_DEFAULT 0 -#define SR_POLICY_TYPE_SPRAY 1 -/** - * @brief SR Policy - */ -typedef struct -{ - u32 *segments_lists; /**< SID lists indexes (vector) */ - - ip6_address_t bsid; /**< BindingSID (key) */ - - u8 type; /**< Type (default is 0) */ - /* SR Policy specific DPO */ - /* IF Type = DEFAULT Then Load Balancer DPO among SID lists */ - /* IF Type = SPRAY then Spray DPO with all SID lists */ - dpo_id_t bsid_dpo; /**< SR Policy specific DPO - BSID */ - dpo_id_t ip4_dpo; /**< SR Policy specific DPO - IPv6 */ - dpo_id_t ip6_dpo; /**< SR Policy specific DPO - IPv4 */ - - u32 fib_table; /**< FIB table */ - - u8 is_encap; /**< Mode (0 is SRH insert, 1 Encaps) */ -} ip6_sr_policy_t; - -/** - * @brief SR LocalSID - */ -typedef struct -{ - ip6_address_t localsid; /**< LocalSID IPv6 address */ - - char end_psp; /**< Combined with End.PSP? */ - - u16 behavior; /**< Behavior associated to this localsid */ - - union - { - u32 sw_if_index; /**< xconnect only */ - u32 vrf_index; /**< vrf only */ - }; - - u32 fib_table; /**< FIB table where localsid is registered */ - - u32 vlan_index; /**< VLAN tag (not an index) */ - - ip46_address_t next_hop; /**< Next_hop for xconnect usage only */ - - u32 nh_adj; /**< Next_adj for xconnect usage only */ - - void *plugin_mem; /**< Memory to be used by the plugin callback functions */ -} ip6_sr_localsid_t; - -typedef int (sr_plugin_callback_t) (ip6_sr_localsid_t * localsid); - -/** - * @brief SR LocalSID behavior registration - */ -typedef struct -{ - u16 sr_localsid_function_number; /**< SR LocalSID plugin function (>SR_BEHAVIOR_LAST) */ - - u8 *function_name; /**< Function name. (key). */ - - u8 *keyword_str; /**< Behavior keyword (i.e. End.X) */ - - u8 *def_str; /**< Behavior definition (i.e. Endpoint with cross-connect) */ - - u8 *params_str; /**< Behavior parameters (i.e. ) */ - - dpo_type_t dpo; /**< DPO type registration */ - - format_function_t *ls_format; /**< LocalSID format function */ - - unformat_function_t *ls_unformat; /**< LocalSID unformat function */ - - sr_plugin_callback_t *creation; /**< Function within plugin that will be called after localsid creation*/ - - sr_plugin_callback_t *removal; /**< Function within plugin that will be called before localsid removal */ -} sr_localsid_fn_registration_t; - -/** - * @brief Steering db key - * - * L3 is IPv4/IPv6 + mask - * L2 is sf_if_index + vlan - */ -typedef struct -{ - union - { - struct - { - ip46_address_t prefix; /**< IP address of the prefix */ - u32 mask_width; /**< Mask width of the prefix */ - u32 fib_table; /**< VRF of the prefix */ - } l3; - struct - { - u32 sw_if_index; /**< Incoming software interface */ - } l2; - }; - u8 traffic_type; /**< Traffic type (IPv4, IPv6, L2) */ - u8 padding[3]; -} sr_steering_key_t; - -typedef struct -{ - sr_steering_key_t classify; /**< Traffic classification */ - u32 sr_policy; /**< SR Policy index */ -} ip6_sr_steering_policy_t; - -/** - * @brief Segment Routing main datastructure - */ -typedef struct -{ - /* L2-input -> SR rewrite next index */ - u32 l2_sr_policy_rewrite_index; - - /* SR SID lists */ - ip6_sr_sl_t *sid_lists; - - /* SR policies */ - ip6_sr_policy_t *sr_policies; - - /* Hash table mapping BindingSID to SR policy */ - mhash_t sr_policies_index_hash; - - /* Pool of SR localsid instances */ - ip6_sr_localsid_t *localsids; - - /* Hash table mapping LOC:FUNC to SR LocalSID instance */ - mhash_t sr_localsids_index_hash; - - /* Pool of SR steer policies instances */ - ip6_sr_steering_policy_t *steer_policies; - - /* Hash table mapping steering rules to SR steer instance */ - mhash_t sr_steer_policies_hash; - - /* L2 steering ifaces - sr_policies */ - u32 *sw_iface_sr_policies; - - /* Spray DPO */ - dpo_type_t sr_pr_spray_dpo_type; - - /* Plugin functions */ - sr_localsid_fn_registration_t *plugin_functions; - - /* Find plugin function by name */ - uword *plugin_functions_by_key; - - /* Counters */ - vlib_combined_counter_main_t sr_ls_valid_counters; - vlib_combined_counter_main_t sr_ls_invalid_counters; - - /* SR Policies FIBs */ - u32 fib_table_ip6; - u32 fib_table_ip4; - - /* convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; -} ip6_sr_main_t; - -ip6_sr_main_t sr_main; - -extern vlib_node_registration_t sr_policy_rewrite_encaps_node; -extern vlib_node_registration_t sr_policy_rewrite_insert_node; -extern vlib_node_registration_t sr_localsid_node; -extern vlib_node_registration_t sr_localsid_d_node; - -void sr_dpo_lock (dpo_id_t * dpo); -void sr_dpo_unlock (dpo_id_t * dpo); - -int sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name, - u8 * keyword_str, u8 * def_str, - u8 * params_str, dpo_type_t * dpo, - format_function_t * ls_format, - unformat_function_t * ls_unformat, - sr_plugin_callback_t * creation_fn, - sr_plugin_callback_t * removal_fn); - -int -sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments, - u32 weight, u8 behavior, u32 fib_table, u8 is_encap); -int -sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table, - u8 operation, ip6_address_t * segments, u32 sl_index, - u32 weight); -int sr_policy_del (ip6_address_t * bsid, u32 index); - -int sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, - char end_psp, u8 behavior, u32 sw_if_index, - u32 vlan_index, u32 fib_table, ip46_address_t * nh_addr, - void *ls_plugin_mem); - -int -sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index, - u32 table_id, ip46_address_t * prefix, u32 mask_width, - u32 sw_if_index, u8 traffic_type); - -/** - * @brief SR rewrite string computation for SRH insertion (inline) - * - * @param sl is a vector of IPv6 addresses composing the Segment List - * - * @return precomputed rewrite string for SRH insertion - */ -static inline u8 * -ip6_sr_compute_rewrite_string_insert (ip6_address_t * sl) -{ - ip6_sr_header_t *srh; - ip6_address_t *addrp, *this_address; - u32 header_length = 0; - u8 *rs = NULL; - - header_length = 0; - header_length += sizeof (ip6_sr_header_t); - header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t); - - vec_validate (rs, header_length - 1); - - srh = (ip6_sr_header_t *) rs; - srh->type = ROUTING_HEADER_TYPE_SR; - srh->segments_left = vec_len (sl); - srh->first_segment = vec_len (sl); - srh->length = ((sizeof (ip6_sr_header_t) + - ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1; - srh->flags = 0x00; - srh->reserved = 0x0000; - addrp = srh->segments + vec_len (sl); - vec_foreach (this_address, sl) - { - clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); - addrp--; - } - return rs; -} - - -#endif /* included_vnet_sr_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/sr/sr_api.c b/src/vnet/sr/sr_api.c deleted file mode 100644 index f4e1c346..00000000 --- a/src/vnet/sr/sr_api.c +++ /dev/null @@ -1,244 +0,0 @@ -/* - *------------------------------------------------------------------ - * sr_api.c - ipv6 segment routing api - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include -#include - -#include -#include -#include - -#include - -#define vl_typedefs /* define message structures */ -#include -#undef vl_typedefs - -#define vl_endianfun /* define message structures */ -#include -#undef vl_endianfun - -/* instantiate all the print functions we know about */ -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) -#define vl_printfun -#include -#undef vl_printfun - -#include - -#define foreach_vpe_api_msg \ -_(SR_LOCALSID_ADD_DEL, sr_localsid_add_del) \ -_(SR_POLICY_DEL, sr_policy_del) \ -_(SR_STEERING_ADD_DEL, sr_steering_add_del) -//_(SR_LOCALSIDS, sr_localsids_dump) -//_(SR_LOCALSID_BEHAVIORS, sr_localsid_behaviors_dump) - -static void vl_api_sr_localsid_add_del_t_handler - (vl_api_sr_localsid_add_del_t * mp) -{ - vl_api_sr_localsid_add_del_reply_t *rmp; - int rv = 0; -/* - * int sr_cli_localsid (char is_del, ip6_address_t *localsid_addr, - * char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, u32 fib_table, - * ip46_address_t *nh_addr, void *ls_plugin_mem) - */ - rv = sr_cli_localsid (mp->is_del, - (ip6_address_t *) & mp->localsid_addr, - mp->end_psp, - mp->behavior, - ntohl (mp->sw_if_index), - ntohl (mp->vlan_index), - ntohl (mp->fib_table), - (ip46_address_t *) & mp->nh_addr, NULL); - - REPLY_MACRO (VL_API_SR_LOCALSID_ADD_DEL_REPLY); -} - -static void -vl_api_sr_policy_add_t_handler (vl_api_sr_policy_add_t * mp) -{ - vl_api_sr_policy_add_reply_t *rmp; - ip6_address_t *segments = 0, *seg; - ip6_address_t *this_address = (ip6_address_t *) mp->segments; - - int i; - for (i = 0; i < mp->n_segments; i++) - { - vec_add2 (segments, seg, 1); - clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); - this_address++; - } - -/* - * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, - * u32 weight, u8 behavior, u32 fib_table, u8 is_encap) - */ - int rv = 0; - rv = sr_policy_add ((ip6_address_t *) & mp->bsid_addr, - segments, - ntohl (mp->weight), - mp->type, ntohl (mp->fib_table), mp->is_encap); - - REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY); -} - -static void -vl_api_sr_policy_mod_t_handler (vl_api_sr_policy_mod_t * mp) -{ - vl_api_sr_policy_mod_reply_t *rmp; - - ip6_address_t *segments = 0, *seg; - ip6_address_t *this_address = (ip6_address_t *) mp->segments; - - int i; - for (i = 0; i < mp->n_segments; i++) - { - vec_add2 (segments, seg, 1); - clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); - this_address++; - } - - int rv = 0; -/* - * int - * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table, - * u8 operation, ip6_address_t *segments, u32 sl_index, - * u32 weight, u8 is_encap) - */ - rv = sr_policy_mod ((ip6_address_t *) & mp->bsid_addr, - ntohl (mp->sr_policy_index), - ntohl (mp->fib_table), - mp->operation, - segments, ntohl (mp->sl_index), ntohl (mp->weight)); - - REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY); -} - -static void -vl_api_sr_policy_del_t_handler (vl_api_sr_policy_del_t * mp) -{ - vl_api_sr_policy_del_reply_t *rmp; - int rv = 0; -/* - * int - * sr_policy_del (ip6_address_t *bsid, u32 index) - */ - rv = sr_policy_del ((ip6_address_t *) & mp->bsid_addr, - ntohl (mp->sr_policy_index)); - - REPLY_MACRO (VL_API_SR_POLICY_DEL_REPLY); -} - -static void vl_api_sr_steering_add_del_t_handler - (vl_api_sr_steering_add_del_t * mp) -{ - vl_api_sr_steering_add_del_reply_t *rmp; - int rv = 0; -/* - * int - * sr_steering_policy(int is_del, ip6_address_t *bsid, u32 sr_policy_index, - * u32 table_id, ip46_address_t *prefix, u32 mask_width, u32 sw_if_index, - * u8 traffic_type) - */ - rv = sr_steering_policy (mp->is_del, - (ip6_address_t *) & mp->bsid_addr, - ntohl (mp->sr_policy_index), - ntohl (mp->table_id), - (ip46_address_t *) & mp->prefix_addr, - ntohl (mp->mask_width), - ntohl (mp->sw_if_index), mp->traffic_type); - - REPLY_MACRO (VL_API_SR_STEERING_ADD_DEL_REPLY); -} - -/* - * sr_api_hookup - * Add vpe's API message handlers to the table. - * vlib has alread mapped shared memory and - * added the client registration handlers. - * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() - */ -#define vl_msg_name_crc_list -#include -#undef vl_msg_name_crc_list - -static void -setup_message_id_table (api_main_t * am) -{ -#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); - foreach_vl_msg_name_crc_sr; -#undef _ -} - -static clib_error_t * -sr_api_hookup (vlib_main_t * vm) -{ - api_main_t *am = &api_main; - -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vpe_api_msg; -#undef _ - - /* - * Manually register the sr policy add msg, so we trace - * enough bytes to capture a typical segment list - */ - vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD, - "sr_policy_add", - vl_api_sr_policy_add_t_handler, - vl_noop_handler, - vl_api_sr_policy_add_t_endian, - vl_api_sr_policy_add_t_print, 256, 1); - - /* - * Manually register the sr policy mod msg, so we trace - * enough bytes to capture a typical segment list - */ - vl_msg_api_set_handlers (VL_API_SR_POLICY_MOD, - "sr_policy_mod", - vl_api_sr_policy_mod_t_handler, - vl_noop_handler, - vl_api_sr_policy_mod_t_endian, - vl_api_sr_policy_mod_t_print, 256, 1); - - /* - * Set up the (msg_name, crc, message-id) table - */ - setup_message_id_table (am); - - return 0; -} - -VLIB_API_INIT_FUNCTION (sr_api_hookup); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/sr/sr_doc.md b/src/vnet/sr/sr_doc.md deleted file mode 100644 index fd92bdf2..00000000 --- a/src/vnet/sr/sr_doc.md +++ /dev/null @@ -1,55 +0,0 @@ -# SRv6: Segment Routing for IPv6 {#srv6_doc} - -This is a memo intended to contain documentation of the VPP SRv6 implementation. -Everything that is not directly obvious should come here. -For any feedback on content that should be explained please mailto:pcamaril@cisco.com - -## Segment Routing - -Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior. - -Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era. - -Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements. - -Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design. - -Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network. - -**The implementation of Segment Routing in VPP only covers the IPv6 data plane (SRv6).** - -## Segment Routing terminology - -* Segment Routing Header (SRH): IPv6 routing extension header of type 'Segment Routing'. (draft-ietf-6man-segment-routing-header-05) -* SegmentID (SID): is an IPv6 address. -* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse. -* SR Policy: defines the SRH that will be applied to a packet. A packet steered into an SR policy may either receive the SRH by IPv6 header encapsulation (as recommended in draft-ietf-6man-rfc2460bis) or it could be inserted within an existing IPv6 header. An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights. -* Local SID: is a SID associated with a processing function on the local node, which may go from advancing to the next SID in the SRH, to complex user-defined behaviors. When a FIB lookup, either in the main FIB or in a specific VRF, returns a match on a local SID, the associated function is performed. -* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with an IPv6 DA corresponding to a BindingSID, then the SR policy will be applied to such packet. - -## SRv6 Features in VPP - -The SRv6 Network Programming (*draft-filsfils-spring-srv6-network-programming*) defines the SRv6 architecture. - -VPP supports the following SRv6 LocalSID functions: End, End.X, End.DX6, End.DT6, End.DX4, End.DT4, End.DX2, End.B6, End.B6.Encaps. - -For further information and how to configure each specific function: @subpage srv6_localsid_doc - - -The Segment Routing Policy (*draft-filsfils-spring-segment-routing-policy*) defines SR Policies. - -VPP supports SRv6 Policies with T.Insert and T.Encaps behaviors. - -For further information on how to create SR Policies: @subpage srv6_policy_doc - -For further information on how to steer traffic into SR Policies: @subpage srv6_steering_doc - -## SRv6 LocalSID development framework - -One of the *'key'* concepts about SRv6 is network programmability. This is why an SRv6 LocalSID is associated with an specific function. - -However, the trully way to enable network programmability is allowing any developer **easily** create his own SRv6 LocalSID function. That is the reason why we have added some API calls such that any developer can code his own SRv6 LocalSID behaviors as plugins an add them to the running SRv6 code. - -The principle is that the developer only codes the behavior -the graph node-. However all the FIB handling, SR LocalSID instantiation and so on are done by the VPP SRv6 code. - -For more information please refer to: @subpage srv6_plugin_doc diff --git a/src/vnet/sr/sr_localsid.c b/src/vnet/sr/sr_localsid.c deleted file mode 100755 index 32fc5f82..00000000 --- a/src/vnet/sr/sr_localsid.c +++ /dev/null @@ -1,1492 +0,0 @@ -/* - * sr_localsid.c: ipv6 segment routing Endpoint behaviors - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file - * @brief Processing of packets with a SRH - * - * CLI to define new Segment Routing End processing functions. - * Graph node to support such functions. - * - * Each function associates an SRv6 segment (IPv6 address) with an specific - * Segment Routing function. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -/** - * @brief Dynamically added SR localsid DPO type - */ -static dpo_type_t sr_localsid_dpo_type; -static dpo_type_t sr_localsid_d_dpo_type; - -/** - * @brief SR localsid add/del - * - * Function to add or delete SR LocalSIDs. - * - * @param is_del Boolean of whether its a delete instruction - * @param localsid_addr IPv6 address of the localsid - * @param is_decap Boolean of whether decapsulation is allowed in this function - * @param behavior Type of behavior (function) for this localsid - * @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table. - * @param vlan_index Only for L2 xconnect. Outgoing VLAN tag. - * @param fib_table FIB table in which we should install the localsid entry - * @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. - * - * @return 0 on success, error otherwise. - */ -int -sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, - char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, - u32 fib_table, ip46_address_t * nh_addr, void *ls_plugin_mem) -{ - ip6_sr_main_t *sm = &sr_main; - uword *p; - int rv; - - ip6_sr_localsid_t *ls = 0; - - dpo_id_t dpo = DPO_INVALID; - - /* Search for the item */ - p = mhash_get (&sm->sr_localsids_index_hash, localsid_addr); - - if (p) - { - if (is_del) - { - /* Retrieve localsid */ - ls = pool_elt_at_index (sm->localsids, p[0]); - /* Delete FIB entry */ - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = 128, - .fp_addr = { - .ip6 = *localsid_addr, - } - }; - - fib_table_entry_delete (fib_table_find (FIB_PROTOCOL_IP6, - fib_table), - &pfx, FIB_SOURCE_SR); - - /* In case it is a Xconnect iface remove the (OIF, NHOP) adj */ - if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX6 - || ls->behavior == SR_BEHAVIOR_DX4) - adj_unlock (ls->nh_adj); - - if (ls->behavior >= SR_BEHAVIOR_LAST) - { - sr_localsid_fn_registration_t *plugin = 0; - plugin = pool_elt_at_index (sm->plugin_functions, - ls->behavior - SR_BEHAVIOR_LAST); - - /* Callback plugin removal function */ - rv = plugin->removal (ls); - } - - /* Delete localsid registry */ - pool_put (sm->localsids, ls); - mhash_unset (&sm->sr_localsids_index_hash, localsid_addr, NULL); - return 1; - } - else /* create with function already existing; complain */ - return -1; - } - else - /* delete; localsid does not exist; complain */ - if (is_del) - return -2; - - /* Check whether there exists a FIB entry with such address */ - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = 128, - }; - - pfx.fp_addr.as_u64[0] = localsid_addr->as_u64[0]; - pfx.fp_addr.as_u64[1] = localsid_addr->as_u64[1]; - - /* Lookup the FIB index associated to the table id provided */ - u32 fib_index = fib_table_find (FIB_PROTOCOL_IP6, fib_table); - if (fib_index == ~0) - return -3; - - /* Lookup the localsid in such FIB table */ - fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx); - if (FIB_NODE_INDEX_INVALID != fei) - return -4; //There is an entry for such address (the localsid addr) - - /* Create a new localsid registry */ - pool_get (sm->localsids, ls); - memset (ls, 0, sizeof (*ls)); - - clib_memcpy (&ls->localsid, localsid_addr, sizeof (ip6_address_t)); - ls->end_psp = end_psp; - ls->behavior = behavior; - ls->nh_adj = (u32) ~ 0; - ls->fib_table = fib_table; - switch (behavior) - { - case SR_BEHAVIOR_END: - break; - case SR_BEHAVIOR_X: - ls->sw_if_index = sw_if_index; - clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t)); - break; - case SR_BEHAVIOR_DX4: - ls->sw_if_index = sw_if_index; - clib_memcpy (&ls->next_hop.ip4, &nh_addr->ip4, sizeof (ip4_address_t)); - break; - case SR_BEHAVIOR_DX6: - ls->sw_if_index = sw_if_index; - clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t)); - break; - case SR_BEHAVIOR_DT6: - ls->vrf_index = sw_if_index; - break; - case SR_BEHAVIOR_DX2: - ls->sw_if_index = sw_if_index; - ls->vlan_index = vlan_index; - break; - } - - /* Figure out the adjacency magic for Xconnect variants */ - if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX4 - || ls->behavior == SR_BEHAVIOR_DX6) - { - adj_index_t nh_adj_index = ADJ_INDEX_INVALID; - - /* Retrieve the adjacency corresponding to the (OIF, next_hop) */ - if (ls->behavior == SR_BEHAVIOR_DX6 || ls->behavior == SR_BEHAVIOR_X) - nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6, - nh_addr, sw_if_index); - - else if (ls->behavior == SR_BEHAVIOR_DX4) - nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4, - nh_addr, sw_if_index); - - /* Check for ADJ creation error. If so panic */ - if (nh_adj_index == ADJ_INDEX_INVALID) - { - pool_put (sm->localsids, ls); - return -5; - } - - ls->nh_adj = nh_adj_index; - } - - /* Set DPO */ - if (ls->behavior == SR_BEHAVIOR_END || ls->behavior == SR_BEHAVIOR_X) - dpo_set (&dpo, sr_localsid_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); - else if (ls->behavior > SR_BEHAVIOR_D_FIRST - && ls->behavior < SR_BEHAVIOR_LAST) - dpo_set (&dpo, sr_localsid_d_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); - else if (ls->behavior >= SR_BEHAVIOR_LAST) - { - sr_localsid_fn_registration_t *plugin = 0; - plugin = pool_elt_at_index (sm->plugin_functions, - ls->behavior - SR_BEHAVIOR_LAST); - /* Copy the unformat memory result */ - ls->plugin_mem = ls_plugin_mem; - /* Callback plugin creation function */ - rv = plugin->creation (ls); - if (rv) - { - pool_put (sm->localsids, ls); - return -6; - } - dpo_set (&dpo, plugin->dpo, DPO_PROTO_IP6, ls - sm->localsids); - } - - /* Set hash key for searching localsid by address */ - mhash_set (&sm->sr_localsids_index_hash, localsid_addr, ls - sm->localsids, - NULL); - - fib_table_entry_special_dpo_add (fib_index, &pfx, FIB_SOURCE_SR, - FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); - dpo_reset (&dpo); - - /* Set counter to zero */ - vlib_validate_combined_counter (&(sm->sr_ls_valid_counters), - ls - sm->localsids); - vlib_validate_combined_counter (&(sm->sr_ls_invalid_counters), - ls - sm->localsids); - - vlib_zero_combined_counter (&(sm->sr_ls_valid_counters), - ls - sm->localsids); - vlib_zero_combined_counter (&(sm->sr_ls_invalid_counters), - ls - sm->localsids); - - return 0; -} - -/** - * @brief SR LocalSID CLI function. - * - * @see sr_cli_localsid - */ -static clib_error_t * -sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vnet_main_t *vnm = vnet_get_main (); - ip6_sr_main_t *sm = &sr_main; - u32 sw_if_index = (u32) ~ 0, vlan_index = (u32) ~ 0, fib_index = 0; - int is_del = 0; - int end_psp = 0; - ip6_address_t resulting_address; - ip46_address_t next_hop; - char address_set = 0; - char behavior = 0; - void *ls_plugin_mem = 0; - - int rv; - - memset (&resulting_address, 0, sizeof (ip6_address_t)); - ip46_address_reset (&next_hop); - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "del")) - is_del = 1; - else if (!address_set - && unformat (input, "address %U", unformat_ip6_address, - &resulting_address)) - address_set = 1; - else if (!address_set - && unformat (input, "addr %U", unformat_ip6_address, - &resulting_address)) - address_set = 1; - else if (unformat (input, "fib-table %u", &fib_index)); - else if (vlan_index == (u32) ~ 0 - && unformat (input, "vlan %u", &vlan_index)); - else if (!behavior && unformat (input, "behavior")) - { - if (unformat (input, "end.x %U %U", - unformat_vnet_sw_interface, vnm, &sw_if_index, - unformat_ip6_address, &next_hop.ip6)) - behavior = SR_BEHAVIOR_X; - else if (unformat (input, "end.dx6 %U %U", - unformat_vnet_sw_interface, vnm, &sw_if_index, - unformat_ip6_address, &next_hop.ip6)) - behavior = SR_BEHAVIOR_DX6; - else if (unformat (input, "end.dx4 %U %U", - unformat_vnet_sw_interface, vnm, &sw_if_index, - unformat_ip4_address, &next_hop.ip4)) - behavior = SR_BEHAVIOR_DX4; - else if (unformat (input, "end.dx2 %U", - unformat_vnet_sw_interface, vnm, &sw_if_index)) - behavior = SR_BEHAVIOR_DX2; - else if (unformat (input, "end.dt6 %u", &sw_if_index)) - behavior = SR_BEHAVIOR_DT6; - else if (unformat (input, "end.dt4 %u", &sw_if_index)) - behavior = SR_BEHAVIOR_DT4; - else - { - /* Loop over all the plugin behavior format functions */ - sr_localsid_fn_registration_t *plugin = 0, **vec_plugins = 0; - sr_localsid_fn_registration_t **plugin_it = 0; - - /* Create a vector out of the plugin pool as recommended */ - /* *INDENT-OFF* */ - pool_foreach (plugin, sm->plugin_functions, - { - vec_add1 (vec_plugins, plugin); - }); - /* *INDENT-ON* */ - - vec_foreach (plugin_it, vec_plugins) - { - if (unformat - (input, "%U", (*plugin_it)->ls_unformat, &ls_plugin_mem)) - { - behavior = (*plugin_it)->sr_localsid_function_number; - break; - } - } - } - - if (!behavior) - { - if (unformat (input, "end")) - behavior = SR_BEHAVIOR_END; - else - break; - } - } - else if (!end_psp && unformat (input, "psp")) - end_psp = 1; - else - break; - } - - if (!behavior && end_psp) - behavior = SR_BEHAVIOR_END; - - if (!address_set) - return clib_error_return (0, - "Error: SRv6 LocalSID address is mandatory."); - if (!is_del && !behavior) - return clib_error_return (0, - "Error: SRv6 LocalSID behavior is mandatory."); - if (vlan_index != (u32) ~ 0) - return clib_error_return (0, - "Error: SRv6 End.DX2 with rewrite VLAN tag not supported by now."); - if (end_psp && !(behavior == SR_BEHAVIOR_END || behavior == SR_BEHAVIOR_X)) - return clib_error_return (0, - "Error: SRv6 PSP only compatible with End and End.X"); - - rv = sr_cli_localsid (is_del, &resulting_address, end_psp, behavior, - sw_if_index, vlan_index, fib_index, &next_hop, - ls_plugin_mem); - - switch (rv) - { - case 0: - break; - case 1: - return 0; - case -1: - return clib_error_return (0, - "Identical localsid already exists. Requested localsid not created."); - case -2: - return clib_error_return (0, - "The requested localsid could not be deleted. SR localsid not found"); - case -3: - return clib_error_return (0, "FIB table %u does not exist", fib_index); - case -4: - return clib_error_return (0, "There is already one FIB entry for the" - "requested localsid non segment routing related"); - case -5: - return clib_error_return (0, - "Could not create ARP/ND entry for such next_hop. Internal error."); - case -6: - return clib_error_return (0, - "Error on the plugin based localsid creation."); - default: - return clib_error_return (0, "BUG: sr localsid returns %d", rv); - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (sr_localsid_command, static) = { - .path = "sr localsid", - .short_help = "sr localsid (del) address XX:XX::YY:YY" - "(fib-table 8) behavior STRING", - .long_help = - "Create SR LocalSID and binds it to a particular behavior\n" - "Arguments:\n" - "\tlocalSID IPv6_addr(128b) LocalSID IPv6 address\n" - "\t(fib-table X) Optional. VRF where to install SRv6 localsid\n" - "\tbehavior STRING Specifies the behavior\n" - "\n\tBehaviors:\n" - "\tEnd\t-> Endpoint.\n" - "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n" - "\t\tParameters: ' '\n" - "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" - "\t\tParameters: ''\n" - "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n" - "\t\tParameters: ' '\n" - "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" - "\t\tParameters: ' '\n" - "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n" - "\t\tParameters: ''\n" - "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n" - "\t\tParameters: ''\n", - .function = sr_cli_localsid_command_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief CLI function to 'show' all SR LocalSIDs on console. - */ -static clib_error_t * -show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vnet_main_t *vnm = vnet_get_main (); - ip6_sr_main_t *sm = &sr_main; - ip6_sr_localsid_t **localsid_list = 0; - ip6_sr_localsid_t *ls; - int i; - - vlib_cli_output (vm, "SRv6 - My LocalSID Table:"); - vlib_cli_output (vm, "========================="); - /* *INDENT-OFF* */ - pool_foreach (ls, sm->localsids, ({ vec_add1 (localsid_list, ls); })); - /* *INDENT-ON* */ - for (i = 0; i < vec_len (localsid_list); i++) - { - ls = localsid_list[i]; - switch (ls->behavior) - { - case SR_BEHAVIOR_END: - vlib_cli_output (vm, "\tAddress: \t%U\n\tBehavior: \tEnd", - format_ip6_address, &ls->localsid); - break; - case SR_BEHAVIOR_X: - vlib_cli_output (vm, - "\tAddress: \t%U\n\tBehavior: \tX (Endpoint with Layer-3 cross-connect)" - "\n\tIface: \t%U\n\tNext hop: \t%U", - format_ip6_address, &ls->localsid, - format_vnet_sw_if_index_name, vnm, ls->sw_if_index, - format_ip6_address, &ls->next_hop.ip6); - break; - case SR_BEHAVIOR_DX4: - vlib_cli_output (vm, - "\tAddress: \t%U\n\tBehavior: \tDX4 (Endpoint with decapsulation and IPv4 cross-connect)" - "\n\tIface: \t%U\n\tNext hop: \t%U", - format_ip6_address, &ls->localsid, - format_vnet_sw_if_index_name, vnm, ls->sw_if_index, - format_ip4_address, &ls->next_hop.ip4); - break; - case SR_BEHAVIOR_DX6: - vlib_cli_output (vm, - "\tAddress: \t%U\n\tBehavior: \tDX6 (Endpoint with decapsulation and IPv6 cross-connect)" - "\n\tIface: \t%U\n\tNext hop: \t%U", - format_ip6_address, &ls->localsid, - format_vnet_sw_if_index_name, vnm, ls->sw_if_index, - format_ip6_address, &ls->next_hop.ip6); - break; - case SR_BEHAVIOR_DX2: - if (ls->vlan_index == (u32) ~ 0) - vlib_cli_output (vm, - "\tAddress: \t%U\n\tBehavior: \tDX2 (Endpoint with decapulation and Layer-2 cross-connect)" - "\n\tIface: \t%U", format_ip6_address, - &ls->localsid, format_vnet_sw_if_index_name, vnm, - ls->sw_if_index); - else - vlib_cli_output (vm, - "Unsupported yet. (DX2 with egress VLAN rewrite)"); - break; - case SR_BEHAVIOR_DT6: - vlib_cli_output (vm, - "\tAddress: \t%U\n\tBehavior: \tDT6 (Endpoint with decapsulation and specific IPv6 table lookup)" - "\n\tTable: %u", format_ip6_address, &ls->localsid, - ls->fib_table); - break; - case SR_BEHAVIOR_DT4: - vlib_cli_output (vm, - "\tAddress: \t%U\n\tBehavior: \tDT4 (Endpoint with decapsulation and specific IPv4 table lookup)" - "\n\tTable: \t%u", format_ip6_address, - &ls->localsid, ls->fib_table); - break; - default: - if (ls->behavior >= SR_BEHAVIOR_LAST) - { - sr_localsid_fn_registration_t *plugin = - pool_elt_at_index (sm->plugin_functions, - ls->behavior - SR_BEHAVIOR_LAST); - - vlib_cli_output (vm, "\tAddress: \t%U\n" - "\tBehavior: \t%s (%s)\n\t%U", - format_ip6_address, &ls->localsid, - plugin->keyword_str, plugin->def_str, - plugin->ls_format, ls->plugin_mem); - } - else - //Should never get here... - vlib_cli_output (vm, "Internal error"); - break; - } - if (ls->end_psp) - vlib_cli_output (vm, "\tPSP: \tTrue\n"); - - /* Print counters */ - vlib_counter_t valid, invalid; - vlib_get_combined_counter (&(sm->sr_ls_valid_counters), i, &valid); - vlib_get_combined_counter (&(sm->sr_ls_invalid_counters), i, &invalid); - vlib_cli_output (vm, "\tGood traffic: \t[%Ld packets : %Ld bytes]\n", - valid.packets, valid.bytes); - vlib_cli_output (vm, "\tBad traffic: \t[%Ld packets : %Ld bytes]\n", - invalid.packets, invalid.bytes); - vlib_cli_output (vm, "--------------------"); - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_sr_localsid_command, static) = { - .path = "show sr localsids", - .short_help = "show sr localsids", - .function = show_sr_localsid_command_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Function to 'clear' ALL SR localsid counters - */ -static clib_error_t * -clear_sr_localsid_counters_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - - vlib_clear_combined_counters (&(sm->sr_ls_valid_counters)); - vlib_clear_combined_counters (&(sm->sr_ls_invalid_counters)); - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (clear_sr_localsid_counters_command, static) = { - .path = "clear sr localsid counters", - .short_help = "clear sr localsid counters", - .function = clear_sr_localsid_counters_command_fn, -}; -/* *INDENT-ON* */ - -/************************ SR LocalSID graphs node ****************************/ -/** - * @brief SR localsid node trace - */ -typedef struct -{ - u32 localsid_index; - ip6_address_t src, out_dst; - u8 sr[256]; - u8 num_segments; - u8 segments_left; - //With SRv6 header update include flags here. -} sr_localsid_trace_t; - -#define foreach_sr_localsid_error \ -_(NO_INNER_HEADER, "(SR-Error) No inner IP header") \ -_(NO_MORE_SEGMENTS, "(SR-Error) No more segments") \ -_(NO_SRH, "(SR-Error) No SR header") \ -_(NO_PSP, "(SR-Error) PSP Not available (segments left > 0)") \ -_(NOT_LS, "(SR-Error) Decaps not available (segments left > 0)") \ -_(L2, "(SR-Error) SRv6 decapsulated a L2 frame without dest") - -typedef enum -{ -#define _(sym,str) SR_LOCALSID_ERROR_##sym, - foreach_sr_localsid_error -#undef _ - SR_LOCALSID_N_ERROR, -} sr_localsid_error_t; - -static char *sr_localsid_error_strings[] = { -#define _(sym,string) string, - foreach_sr_localsid_error -#undef _ -}; - -#define foreach_sr_localsid_next \ -_(ERROR, "error-drop") \ -_(IP6_LOOKUP, "ip6-lookup") \ -_(IP4_LOOKUP, "ip4-lookup") \ -_(IP6_REWRITE, "ip6-rewrite") \ -_(IP4_REWRITE, "ip4-rewrite") \ -_(INTERFACE_OUTPUT, "interface-output") - -typedef enum -{ -#define _(s,n) SR_LOCALSID_NEXT_##s, - foreach_sr_localsid_next -#undef _ - SR_LOCALSID_N_NEXT, -} sr_localsid_next_t; - -/** - * @brief SR LocalSID graph node trace function - * - * @see sr_localsid - */ -u8 * -format_sr_localsid_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - ip6_sr_main_t *sm = &sr_main; - sr_localsid_trace_t *t = va_arg (*args, sr_localsid_trace_t *); - - ip6_sr_localsid_t *ls = - pool_elt_at_index (sm->localsids, t->localsid_index); - - s = - format (s, "SR-LOCALSID:\n\tLocalsid: %U\n", format_ip6_address, - &ls->localsid); - switch (ls->behavior) - { - case SR_BEHAVIOR_END: - s = format (s, "\tBehavior: End\n"); - break; - case SR_BEHAVIOR_DX6: - s = format (s, "\tBehavior: Decapsulation with IPv6 L3 xconnect\n"); - break; - case SR_BEHAVIOR_DX4: - s = format (s, "\tBehavior: Decapsulation with IPv4 L3 xconnect\n"); - break; - case SR_BEHAVIOR_X: - s = format (s, "\tBehavior: IPv6 L3 xconnect\n"); - break; - case SR_BEHAVIOR_DT6: - s = format (s, "\tBehavior: Decapsulation with IPv6 Table lookup\n"); - break; - case SR_BEHAVIOR_DT4: - s = format (s, "\tBehavior: Decapsulation with IPv4 Table lookup\n"); - break; - case SR_BEHAVIOR_DX2: - s = format (s, "\tBehavior: Decapsulation with L2 xconnect\n"); - break; - default: - s = format (s, "\tBehavior: defined in plugin\n"); //TODO - break; - } - if (t->num_segments != 0xFF) - { - if (t->num_segments > 0) - { - s = format (s, "\tSegments left: %d\n", t->num_segments); - s = format (s, "\tSID list: [in ietf order]"); - int i = 0; - for (i = 0; i < t->num_segments; i++) - { - s = format (s, "\n\t-> %U", format_ip6_address, - (ip6_address_t *) & t->sr[i * - sizeof (ip6_address_t)]); - } - } - } - return s; -} - -/** - * @brief Function doing End processing. - */ -static_always_inline void -end_srh_processing (vlib_node_runtime_t * node, - vlib_buffer_t * b0, - ip6_header_t * ip0, - ip6_sr_header_t * sr0, - ip6_sr_localsid_t * ls0, u32 * next0) -{ - ip6_address_t *new_dst0; - - if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR)) - { - if (PREDICT_TRUE (sr0->segments_left != 0)) - { - sr0->segments_left -= 1; - new_dst0 = (ip6_address_t *) (sr0->segments); - new_dst0 += sr0->segments_left; - ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; - ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; - - if (ls0->behavior == SR_BEHAVIOR_X) - { - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; - *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; - } - } - else - { - *next0 = SR_LOCALSID_NEXT_ERROR; - b0->error = node->errors[SR_LOCALSID_ERROR_NO_MORE_SEGMENTS]; - } - } - else - { - /* Error. Routing header of type != SR */ - *next0 = SR_LOCALSID_NEXT_ERROR; - b0->error = node->errors[SR_LOCALSID_ERROR_NO_SRH]; - } -} - -/* - * @brief Function doing SRH processing for D* variants - */ -//FixME. I must crosscheck that next_proto matches the localsid -static_always_inline void -end_decaps_srh_processing (vlib_node_runtime_t * node, - vlib_buffer_t * b0, - ip6_header_t * ip0, - ip6_sr_header_t * sr0, - ip6_sr_localsid_t * ls0, u32 * next0) -{ - /* Compute the size of the IPv6 header with all Ext. headers */ - u8 next_proto; - ip6_ext_header_t *next_ext_header; - u16 total_size = 0; - - next_proto = ip0->protocol; - next_ext_header = (void *) (ip0 + 1); - total_size = sizeof (ip6_header_t); - while (ip6_ext_hdr (next_proto)) - { - total_size += ip6_ext_header_len (next_ext_header); - next_proto = next_ext_header->next_hdr; - next_ext_header = ip6_ext_next_header (next_ext_header); - } - - /* Ensure this is the last segment. Otherwise drop. */ - if (sr0 && sr0->segments_left != 0) - { - *next0 = SR_LOCALSID_NEXT_ERROR; - b0->error = node->errors[SR_LOCALSID_ERROR_NOT_LS]; - return; - } - - switch (next_proto) - { - case IP_PROTOCOL_IPV6: - /* Encap-End IPv6. Pop outer IPv6 header. */ - if (ls0->behavior == SR_BEHAVIOR_DX6) - { - vlib_buffer_advance (b0, total_size); - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; - *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; - return; - } - else if (ls0->behavior == SR_BEHAVIOR_DT6) - { - vlib_buffer_advance (b0, total_size); - vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table; - return; - } - break; - case IP_PROTOCOL_IP_IN_IP: - /* Encap-End IPv4. Pop outer IPv6 header */ - if (ls0->behavior == SR_BEHAVIOR_DX4) - { - vlib_buffer_advance (b0, total_size); - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; - *next0 = SR_LOCALSID_NEXT_IP4_REWRITE; - return; - } - else if (ls0->behavior == SR_BEHAVIOR_DT4) - { - vlib_buffer_advance (b0, total_size); - vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table; - *next0 = SR_LOCALSID_NEXT_IP4_LOOKUP; - return; - } - break; - case IP_PROTOCOL_IP6_NONXT: - /* L2 encaps */ - if (ls0->behavior == SR_BEHAVIOR_DX2) - { - vlib_buffer_advance (b0, total_size); - vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->sw_if_index; - *next0 = SR_LOCALSID_NEXT_INTERFACE_OUTPUT; - return; - } - break; - } - *next0 = SR_LOCALSID_NEXT_ERROR; - b0->error = node->errors[SR_LOCALSID_ERROR_NO_INNER_HEADER]; - return; -} - -/** - * @brief Function doing End processing with PSP - */ -static_always_inline void -end_psp_srh_processing (vlib_node_runtime_t * node, - vlib_buffer_t * b0, - ip6_header_t * ip0, - ip6_ext_header_t * prev0, - ip6_sr_header_t * sr0, - ip6_sr_localsid_t * ls0, u32 * next0) -{ - u32 new_l0, sr_len; - u64 *copy_dst0, *copy_src0; - u32 copy_len_u64s0 = 0; - int i; - - if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR)) - { - if (PREDICT_TRUE (sr0->segments_left == 1)) - { - ip0->dst_address.as_u64[0] = sr0->segments->as_u64[0]; - ip0->dst_address.as_u64[1] = sr0->segments->as_u64[1]; - - /* Remove the SRH taking care of the rest of IPv6 ext header */ - if (prev0) - prev0->next_hdr = sr0->protocol; - else - ip0->protocol = sr0->protocol; - - sr_len = ip6_ext_header_len (sr0); - vlib_buffer_advance (b0, sr_len); - new_l0 = clib_net_to_host_u16 (ip0->payload_length) - sr_len; - ip0->payload_length = clib_host_to_net_u16 (new_l0); - copy_src0 = (u64 *) ip0; - copy_dst0 = copy_src0 + (sr0->length + 1); - /* number of 8 octet units to copy - * By default in absence of extension headers it is equal to length of ip6 header - * With extension headers it number of 8 octet units of ext headers preceding - * SR header - */ - copy_len_u64s0 = - (((u8 *) sr0 - (u8 *) ip0) - sizeof (ip6_header_t)) >> 3; - copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0]; - copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0]; - copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0]; - copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0]; - copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0]; - - for (i = copy_len_u64s0 - 1; i >= 0; i--) - { - copy_dst0[i] = copy_src0[i]; - } - - if (ls0->behavior == SR_BEHAVIOR_X) - { - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; - *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; - } - return; - } - } - /* Error. Routing header of type != SR */ - *next0 = SR_LOCALSID_NEXT_ERROR; - b0->error = node->errors[SR_LOCALSID_ERROR_NO_PSP]; -} - -/** - * @brief SR LocalSID graph node. Supports all default SR Endpoint variants - */ -static uword -sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, *from, *to_next; - ip6_sr_main_t *sm = &sr_main; - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - next_index = node->cached_next_index; - u32 thread_index = vlib_get_thread_index (); - - while (n_left_from > 0) - { - u32 n_left_to_next; - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Quad - Loop */ - while (n_left_from >= 8 && n_left_to_next >= 4) - { - u32 bi0, bi1, bi2, bi3; - vlib_buffer_t *b0, *b1, *b2, *b3; - ip6_header_t *ip0, *ip1, *ip2, *ip3; - ip6_ext_header_t *prev0, *prev1, *prev2, *prev3; - ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; - u32 next0, next1, next2, next3; - next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP; - ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p4, *p5, *p6, *p7; - - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - p6 = vlib_get_buffer (vm, from[6]); - p7 = vlib_get_buffer (vm, from[7]); - - /* Prefetch the buffer header and packet for the N+4 loop iteration */ - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - vlib_prefetch_buffer_header (p6, LOAD); - vlib_prefetch_buffer_header (p7, LOAD); - - CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - to_next[2] = bi2 = from[2]; - to_next[3] = bi3 = from[3]; - from += 4; - to_next += 4; - n_left_from -= 4; - n_left_to_next -= 4; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - b2 = vlib_get_buffer (vm, bi2); - b3 = vlib_get_buffer (vm, bi3); - - ls0 = - pool_elt_at_index (sm->localsids, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - ls1 = - pool_elt_at_index (sm->localsids, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - ls2 = - pool_elt_at_index (sm->localsids, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - ls3 = - pool_elt_at_index (sm->localsids, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - ip2 = vlib_buffer_get_current (b2); - ip3 = vlib_buffer_get_current (b3); - - ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); - ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE); - ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE); - ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE); - - end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0); - end_decaps_srh_processing (node, b1, ip1, sr1, ls1, &next1); - end_decaps_srh_processing (node, b2, ip2, sr2, ls2, &next2); - end_decaps_srh_processing (node, b3, ip3, sr3, ls3, &next3); - - //TODO: trace. - - vlib_increment_combined_counter - (((next0 == - SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids, - 1, vlib_buffer_length_in_chain (vm, b0)); - - vlib_increment_combined_counter - (((next1 == - SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids, - 1, vlib_buffer_length_in_chain (vm, b1)); - - vlib_increment_combined_counter - (((next2 == - SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids, - 1, vlib_buffer_length_in_chain (vm, b2)); - - vlib_increment_combined_counter - (((next3 == - SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids, - 1, vlib_buffer_length_in_chain (vm, b3)); - - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, bi2, bi3, - next0, next1, next2, next3); - } - - /* Single loop for potentially the last three packets */ - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0; - ip6_ext_header_t *prev0; - ip6_sr_header_t *sr0; - u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP; - ip6_sr_localsid_t *ls0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - ip0 = vlib_buffer_get_current (b0); - - /* Lookup the SR End behavior based on IP DA (adj) */ - ls0 = - pool_elt_at_index (sm->localsids, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - - /* Find SRH as well as previous header */ - ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); - - /* SRH processing and End variants */ - end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0); - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_localsid_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->num_segments = 0; - tr->localsid_index = ls0 - sm->localsids; - - if (ip0 == vlib_buffer_get_current (b0)) - { - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->out_dst.as_u8)); - if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE - && sr0->type == ROUTING_HEADER_TYPE_SR) - { - clib_memcpy (tr->sr, sr0->segments, sr0->length * 8); - tr->num_segments = - sr0->length * 8 / sizeof (ip6_address_t); - tr->segments_left = sr0->segments_left; - } - } - else - tr->num_segments = 0xFF; - } - - /* Increase the counters */ - vlib_increment_combined_counter - (((next0 == - SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids, - 1, vlib_buffer_length_in_chain (vm, b0)); - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return from_frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_localsid_d_node) = { - .function = sr_localsid_d_fn, - .name = "sr-localsid-d", - .vector_size = sizeof (u32), - .format_trace = format_sr_localsid_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = SR_LOCALSID_N_ERROR, - .error_strings = sr_localsid_error_strings, - .n_next_nodes = SR_LOCALSID_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_LOCALSID_NEXT_##s] = n, - foreach_sr_localsid_next -#undef _ - }, -}; -/* *INDENT-ON* */ - -/** - * @brief SR LocalSID graph node. Supports all default SR Endpoint variants - */ -static uword -sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, *from, *to_next; - ip6_sr_main_t *sm = &sr_main; - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - next_index = node->cached_next_index; - u32 thread_index = vlib_get_thread_index (); - - while (n_left_from > 0) - { - u32 n_left_to_next; - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Quad - Loop */ - while (n_left_from >= 8 && n_left_to_next >= 4) - { - u32 bi0, bi1, bi2, bi3; - vlib_buffer_t *b0, *b1, *b2, *b3; - ip6_header_t *ip0, *ip1, *ip2, *ip3; - ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; - ip6_ext_header_t *prev0, *prev1, *prev2, *prev3; - u32 next0, next1, next2, next3; - next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP; - ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p4, *p5, *p6, *p7; - - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - p6 = vlib_get_buffer (vm, from[6]); - p7 = vlib_get_buffer (vm, from[7]); - - /* Prefetch the buffer header and packet for the N+2 loop iteration */ - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - vlib_prefetch_buffer_header (p6, LOAD); - vlib_prefetch_buffer_header (p7, LOAD); - - CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - to_next[2] = bi2 = from[2]; - to_next[3] = bi3 = from[3]; - from += 4; - to_next += 4; - n_left_from -= 4; - n_left_to_next -= 4; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - b2 = vlib_get_buffer (vm, bi2); - b3 = vlib_get_buffer (vm, bi3); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - ip2 = vlib_buffer_get_current (b2); - ip3 = vlib_buffer_get_current (b3); - - ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); - ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE); - ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE); - ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE); - - ls0 = - pool_elt_at_index (sm->localsids, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - ls1 = - pool_elt_at_index (sm->localsids, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - ls2 = - pool_elt_at_index (sm->localsids, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - ls3 = - pool_elt_at_index (sm->localsids, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - - if (ls0->end_psp) - end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0); - else - end_srh_processing (node, b0, ip0, sr0, ls0, &next0); - - if (ls1->end_psp) - end_psp_srh_processing (node, b1, ip1, prev1, sr1, ls1, &next1); - else - end_srh_processing (node, b1, ip1, sr1, ls1, &next1); - - if (ls2->end_psp) - end_psp_srh_processing (node, b2, ip2, prev2, sr2, ls2, &next2); - else - end_srh_processing (node, b2, ip2, sr2, ls2, &next2); - - if (ls3->end_psp) - end_psp_srh_processing (node, b3, ip3, prev3, sr3, ls3, &next3); - else - end_srh_processing (node, b3, ip3, sr3, ls3, &next3); - - //TODO: proper trace. - - vlib_increment_combined_counter - (((next0 == - SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids, - 1, vlib_buffer_length_in_chain (vm, b0)); - - vlib_increment_combined_counter - (((next1 == - SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids, - 1, vlib_buffer_length_in_chain (vm, b1)); - - vlib_increment_combined_counter - (((next2 == - SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids, - 1, vlib_buffer_length_in_chain (vm, b2)); - - vlib_increment_combined_counter - (((next3 == - SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids, - 1, vlib_buffer_length_in_chain (vm, b3)); - - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, bi2, bi3, - next0, next1, next2, next3); - } - - /* Single loop for potentially the last three packets */ - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0 = 0; - ip6_ext_header_t *prev0; - ip6_sr_header_t *sr0; - u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP; - ip6_sr_localsid_t *ls0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - ip0 = vlib_buffer_get_current (b0); - ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); - - /* Lookup the SR End behavior based on IP DA (adj) */ - ls0 = - pool_elt_at_index (sm->localsids, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - - /* SRH processing */ - if (ls0->end_psp) - end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0); - else - end_srh_processing (node, b0, ip0, sr0, ls0, &next0); - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_localsid_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->num_segments = 0; - tr->localsid_index = ls0 - sm->localsids; - - if (ip0 == vlib_buffer_get_current (b0)) - { - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->out_dst.as_u8)); - if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE - && sr0->type == ROUTING_HEADER_TYPE_SR) - { - clib_memcpy (tr->sr, sr0->segments, sr0->length * 8); - tr->num_segments = - sr0->length * 8 / sizeof (ip6_address_t); - tr->segments_left = sr0->segments_left; - } - } - else - { - tr->num_segments = 0xFF; - } - } - - vlib_increment_combined_counter - (((next0 == - SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : - &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids, - 1, vlib_buffer_length_in_chain (vm, b0)); - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return from_frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_localsid_node) = { - .function = sr_localsid_fn, - .name = "sr-localsid", - .vector_size = sizeof (u32), - .format_trace = format_sr_localsid_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = SR_LOCALSID_N_ERROR, - .error_strings = sr_localsid_error_strings, - .n_next_nodes = SR_LOCALSID_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_LOCALSID_NEXT_##s] = n, - foreach_sr_localsid_next -#undef _ - }, -}; -/* *INDENT-ON* */ - -static u8 * -format_sr_dpo (u8 * s, va_list * args) -{ - index_t index = va_arg (*args, index_t); - CLIB_UNUSED (u32 indent) = va_arg (*args, u32); - - return (format (s, "SR: localsid_index:[%d]", index)); -} - -const static dpo_vft_t sr_loc_vft = { - .dv_lock = sr_dpo_lock, - .dv_unlock = sr_dpo_unlock, - .dv_format = format_sr_dpo, -}; - -const static char *const sr_loc_ip6_nodes[] = { - "sr-localsid", - NULL, -}; - -const static char *const *const sr_loc_nodes[DPO_PROTO_NUM] = { - [DPO_PROTO_IP6] = sr_loc_ip6_nodes, -}; - -const static char *const sr_loc_d_ip6_nodes[] = { - "sr-localsid-d", - NULL, -}; - -const static char *const *const sr_loc_d_nodes[DPO_PROTO_NUM] = { - [DPO_PROTO_IP6] = sr_loc_d_ip6_nodes, -}; - - -/*************************** SR LocalSID plugins ******************************/ -/** - * @brief SR LocalSID plugin registry - */ -int -sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name, - u8 * keyword_str, u8 * def_str, - u8 * params_str, dpo_type_t * dpo, - format_function_t * ls_format, - unformat_function_t * ls_unformat, - sr_plugin_callback_t * creation_fn, - sr_plugin_callback_t * removal_fn) -{ - ip6_sr_main_t *sm = &sr_main; - uword *p; - - sr_localsid_fn_registration_t *plugin; - - /* Did this function exist? If so update it */ - p = hash_get_mem (sm->plugin_functions_by_key, fn_name); - if (p) - { - plugin = pool_elt_at_index (sm->plugin_functions, p[0]); - } - /* Else create a new one and set hash key */ - else - { - pool_get (sm->plugin_functions, plugin); - hash_set_mem (sm->plugin_functions_by_key, fn_name, - plugin - sm->plugin_functions); - } - - memset (plugin, 0, sizeof (*plugin)); - - plugin->sr_localsid_function_number = (plugin - sm->plugin_functions); - plugin->sr_localsid_function_number += SR_BEHAVIOR_LAST; - plugin->ls_format = ls_format; - plugin->ls_unformat = ls_unformat; - plugin->creation = creation_fn; - plugin->removal = removal_fn; - clib_memcpy (&plugin->dpo, dpo, sizeof (dpo_type_t)); - plugin->function_name = format (0, "%s%c", fn_name, 0); - plugin->keyword_str = format (0, "%s%c", keyword_str, 0); - plugin->def_str = format (0, "%s%c", def_str, 0); - plugin->params_str = format (0, "%s%c", params_str, 0); - - return plugin->sr_localsid_function_number; -} - -/** - * @brief CLI function to 'show' all available SR LocalSID behaviors - */ -static clib_error_t * -show_sr_localsid_behaviors_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - sr_localsid_fn_registration_t *plugin; - sr_localsid_fn_registration_t **plugins_vec = 0; - int i; - - vlib_cli_output (vm, - "SR LocalSIDs behaviors:\n-----------------------\n\n"); - - /* *INDENT-OFF* */ - pool_foreach (plugin, sm->plugin_functions, - ({ vec_add1 (plugins_vec, plugin); })); - /* *INDENT-ON* */ - - /* Print static behaviors */ - vlib_cli_output (vm, "Default behaviors:\n" - "\tEnd\t-> Endpoint.\n" - "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n" - "\t\tParameters: ' '\n" - "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" - "\t\tParameters: ''\n" - "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n" - "\t\tParameters: ' '\n" - "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" - "\t\tParameters: ' '\n" - "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n" - "\t\tParameters: ''\n" - "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n" - "\t\tParameters: ''\n"); - vlib_cli_output (vm, "Plugin behaviors:\n"); - for (i = 0; i < vec_len (plugins_vec); i++) - { - plugin = plugins_vec[i]; - vlib_cli_output (vm, "\t%s\t-> %s.\n", plugin->keyword_str, - plugin->def_str); - vlib_cli_output (vm, "\t\tParameters: '%s'\n", plugin->params_str); - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_sr_localsid_behaviors_command, static) = { - .path = "show sr localsids behaviors", - .short_help = "show sr localsids behaviors", - .function = show_sr_localsid_behaviors_command_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief SR LocalSID initialization - */ -clib_error_t * -sr_localsids_init (vlib_main_t * vm) -{ - /* Init memory for function keys */ - ip6_sr_main_t *sm = &sr_main; - mhash_init (&sm->sr_localsids_index_hash, sizeof (uword), - sizeof (ip6_address_t)); - /* Init SR behaviors DPO type */ - sr_localsid_dpo_type = dpo_register_new_type (&sr_loc_vft, sr_loc_nodes); - /* Init SR behaviors DPO type */ - sr_localsid_d_dpo_type = - dpo_register_new_type (&sr_loc_vft, sr_loc_d_nodes); - /* Init memory for localsid plugins */ - sm->plugin_functions_by_key = hash_create_string (0, sizeof (uword)); - return 0; -} - -VLIB_INIT_FUNCTION (sr_localsids_init); -/* -* fd.io coding-style-patch-verification: ON -* -* Local Variables: -* eval: (c-set-style "gnu") -* End: -*/ diff --git a/src/vnet/sr/sr_localsid.md b/src/vnet/sr/sr_localsid.md deleted file mode 100644 index 340af4a3..00000000 --- a/src/vnet/sr/sr_localsid.md +++ /dev/null @@ -1,58 +0,0 @@ -# SR LocalSIDs {#srv6_localsid_doc} - -A local SID is associated to a Segment Routing behavior -or function- on the current node. - -The most basic behavior is called END. It simply activates the next SID in the current packet, by decrementing the Segments Left value and updating the IPv6 DA. - -A local END SID is instantiated using the following CLI: - - sr localsid (del) address XX::YY behavior end - -This creates a new entry in the main FIB for IPv6 address XX::YY. All packets whose IPv6 DA matches this FIB entry are redirected to the sr-localsid node, where they are processed as described above. - -Other examples of local SIDs are the following: - - sr localsid (del) address XX::YY behavior end - sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a - sr localsid (del) address XX::YY behavior end.dx6 GE0/1/0 2001::a - sr localsid (del) address XX::YY behavior end.dx4 GE0/1/0 10.0.0.1 - sr localsid (del) address XX::YY behavior end.dx2 GigabitE0/11/0 - sr localsid (del) address XX::YY behavior end.dt6 5 - sr localsid (del) address XX::YY behavior end.dt6 5 - -Note that all of these behaviors match the definitions of the SRv6 architecture (*draft-filsfils-spring-srv6-network-programming*). Please refer to this document for a detailed description of each behavior. - -Note also that you can configure the PSP flavor of the End and End.X behaviors by typing: - - sr localsid (del) address XX::YY behavior end psp - sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a psp - -Help on the available local SID behaviors and their usage can be obtained with: - - help sr localsid - -Alternatively they can be obtained using. - - show sr localsids behavior - -The difference in between those two commands is that the first one will only display the SR LocalSID behaviors that are built-in VPP, while the latter will display those behaviors plus the ones added with the SR LocalSID Development Framework. - - -VPP keeps a 'My LocalSID Table' where it stores all the SR local SIDs instantiated as well as their parameters. Every time a new local SID is instantiated, a new entry is added to this table. In addition, counters for correctly and incorrectly processed traffic are maintained for each local SID. The counters store both the number of packets and bytes. - -The contents of the 'My LocalSID Table' is shown with: - - vpp# show sr localsid - SRv6 - My LocalSID Table: - ========================= - Address: c3::1 - Behavior: DX6 (Endpoint with decapsulation and IPv6 cross-connect) - Iface: GigabitEthernet0/5/0 - Next hop: b:c3::b - Good traffic: [51277 packets : 5332808 bytes] - Bad traffic: [0 packets : 0 bytes] - -------------------- - -The traffic counters can be reset with: - - vpp# clear sr localsid counters diff --git a/src/vnet/sr/sr_packet.h b/src/vnet/sr/sr_packet.h deleted file mode 100755 index 7af4ad4d..00000000 --- a/src/vnet/sr/sr_packet.h +++ /dev/null @@ -1,159 +0,0 @@ -#ifndef included_vnet_sr_packet_h -#define included_vnet_sr_packet_h - -#include - -/* - * ipv6 segment-routing header format - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * The Segment Routing Header (SRH) is defined as follows: - * - * 0 1 2 3 - * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | Next Header | Hdr Ext Len | Routing Type | Segments Left | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | First Segment | Flags | RESERVED | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | Segment List[0] (128 bits IPv6 address) | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | | - * ... - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | Segment List[n] (128 bits IPv6 address) | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * // // - * // Optional Type Length Value objects (variable) // - * // // - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * - * where: - * - * o Next Header: 8-bit selector. Identifies the type of header - * immediately following the SRH. - * - * o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH - * header in 8-octet units, not including the first 8 octets. - * - * o Routing Type: TBD, to be assigned by IANA (suggested value: 4). - * - * o Segments Left. Defined in [RFC2460], it contains the index, in - * the Segment List, of the next segment to inspect. Segments Left - * is decremented at each segment. - * - * o First Segment: contains the index, in the Segment List, of the - * first segment of the path which is in fact the last element of the - * Segment List. - * - * o Flags: 8 bits of flags. Following flags are defined: - * - * 0 1 2 3 4 5 6 7 - * +-+-+-+-+-+-+-+-+ - * |U|P|O|A|H| U | - * +-+-+-+-+-+-+-+-+ - * - * U: Unused and for future use. SHOULD be unset on transmission - * and MUST be ignored on receipt. - * - * P-flag: Protected flag. Set when the packet has been rerouted - * through FRR mechanism by an SR endpoint node. - * - * O-flag: OAM flag. When set, it indicates that this packet is - * an operations and management (OAM) packet. - * - * A-flag: Alert flag. If present, it means important Type Length - * Value (TLV) objects are present. See Section 3.1 for details - * on TLVs objects. - * - * H-flag: HMAC flag. If set, the HMAC TLV is present and is - * encoded as the last TLV of the SRH. In other words, the last - * 36 octets of the SRH represent the HMAC information. See - * Section 3.1.5 for details on the HMAC TLV. - * - * o RESERVED: SHOULD be unset on transmission and MUST be ignored on - * receipt. - * - * o Segment List[n]: 128 bit IPv6 addresses representing the nth - * segment in the Segment List. The Segment List is encoded starting - * from the last segment of the path. I.e., the first element of the - * segment list (Segment List [0]) contains the last segment of the - * path while the last segment of the Segment List (Segment List[n]) - * contains the first segment of the path. The index contained in - * "Segments Left" identifies the current active segment. - * - * o Type Length Value (TLV) are described in Section 3.1. - * - */ - -#ifndef IPPROTO_IPV6_ROUTE -#define IPPROTO_IPV6_ROUTE 43 -#endif - -#define ROUTING_HEADER_TYPE_SR 4 - -typedef struct -{ - /* Protocol for next header. */ - u8 protocol; - /* - * Length of routing header in 8 octet units, - * not including the first 8 octets - */ - u8 length; - - /* Type of routing header; type 4 = segement routing */ - u8 type; - - /* Next segment in the segment list */ - u8 segments_left; - - /* Pointer to the first segment in the header */ - u8 first_segment; - - /* Flag bits */ -#define IP6_SR_HEADER_FLAG_PROTECTED (0x40) -#define IP6_SR_HEADER_FLAG_OAM (0x20) -#define IP6_SR_HEADER_FLAG_ALERT (0x10) -#define IP6_SR_HEADER_FLAG_HMAC (0x80) - - /* values 0x0, 0x4 - 0x7 are reserved */ - u8 flags; - u16 reserved; - - /* The segment elts */ - ip6_address_t segments[0]; -} __attribute__ ((packed)) ip6_sr_header_t; - -/* -* fd.io coding-style-patch-verification: ON -* -* Local Variables: -* eval: (c-set-style "gnu") -* End: -*/ - -#endif /* included_vnet_sr_packet_h */ diff --git a/src/vnet/sr/sr_policy.md b/src/vnet/sr/sr_policy.md deleted file mode 100644 index 521b8461..00000000 --- a/src/vnet/sr/sr_policy.md +++ /dev/null @@ -1,56 +0,0 @@ -# Creating a SR Policy {#srv6_policy_doc} - -An SR Policy is defined by a Binding SID and a weighted set of Segment Lists. - -A new SR policy is created with a first SID list using: - - sr policy add bsid 2001::1 next A1:: next B1:: next C1:: (weight 5) (fib-table 3) - -* The weight parameter is only used if more than one SID list is associated with the policy. -* The fib-table parameter specifies in which table (VRF) the Binding SID is to be installed. - -An SR policy is deleted with: - - sr policy del bsid 2001::1 - sr policy del index 1 - -The existing SR policies are listed with: - - show sr policies - -## Adding/Removing SID Lists from an SR policy - -An additional SID list is associated with an existing SR policy with: - - sr policy mod bsid 2001::1 add sl next A2:: next B2:: next C2:: (weight 3) - sr policy mod index 3 add sl next A2:: next B2:: next C2:: (weight 3) - -Conversely, a SID list can be removed from an SR policy with: - - sr policy mod bsid 2001::1 del sl index 1 - sr policy mod index 3 del sl index 1 - -Note that this cannot be used to remove the last SID list of a policy. - -The weight of a SID list can also be modified with: - - sr policy mod bsid 2001::1 mod sl index 1 weight 4 - sr policy mod index 3 mod sl index 1 weight 4 - -## SR Policies: Spray policies - -Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them. - -SID list weights are ignored with this type of policies. - -A Spray policy is instantiated by appending the keyword **spray** to a regular SR policy command, as in: - - sr policy add bsid 2001::1 next A1:: next B1:: next C1:: spray - -Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node. - -## Encapsulation SR policies - -In case the user decides to create an SR policy an IPv6 Source Address must be specified for the encapsulated traffic. In order to do so the user might use the following command: - - set sr encaps source addr XXXX::YYYY diff --git a/src/vnet/sr/sr_policy_rewrite.c b/src/vnet/sr/sr_policy_rewrite.c deleted file mode 100755 index c4024070..00000000 --- a/src/vnet/sr/sr_policy_rewrite.c +++ /dev/null @@ -1,3227 +0,0 @@ -/* - * sr_policy_rewrite.c: ipv6 sr policy creation - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file - * @brief SR policy creation and application - * - * Create an SR policy. - * An SR policy can be either of 'default' type or 'spray' type - * An SR policy has attached a list of SID lists. - * In case the SR policy is a default one it will load balance among them. - * An SR policy has associated a BindingSID. - * In case any packet arrives with IPv6 DA == BindingSID then the SR policy - * associated to such bindingSID will be applied to such packet. - * - * SR policies can be applied either by using IPv6 encapsulation or - * SRH insertion. Both methods can be found on this file. - * - * Traffic input usually is IPv6 packets. However it is possible to have - * IPv4 packets or L2 frames. (that are encapsulated into IPv6 with SRH) - * - * This file provides the appropiates VPP graph nodes to do any of these - * methods. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -/** - * @brief SR policy rewrite trace - */ -typedef struct -{ - ip6_address_t src, dst; -} sr_policy_rewrite_trace_t; - -/* Graph arcs */ -#define foreach_sr_policy_rewrite_next \ -_(IP6_LOOKUP, "ip6-lookup") \ -_(ERROR, "error-drop") - -typedef enum -{ -#define _(s,n) SR_POLICY_REWRITE_NEXT_##s, - foreach_sr_policy_rewrite_next -#undef _ - SR_POLICY_REWRITE_N_NEXT, -} sr_policy_rewrite_next_t; - -/* SR rewrite errors */ -#define foreach_sr_policy_rewrite_error \ -_(INTERNAL_ERROR, "Segment Routing undefined error") \ -_(BSID_ZERO, "BSID with SL = 0") \ -_(COUNTER_TOTAL, "SR steered IPv6 packets") \ -_(COUNTER_ENCAP, "SR: Encaps packets") \ -_(COUNTER_INSERT, "SR: SRH inserted packets") \ -_(COUNTER_BSID, "SR: BindingSID steered packets") - -typedef enum -{ -#define _(sym,str) SR_POLICY_REWRITE_ERROR_##sym, - foreach_sr_policy_rewrite_error -#undef _ - SR_POLICY_REWRITE_N_ERROR, -} sr_policy_rewrite_error_t; - -static char *sr_policy_rewrite_error_strings[] = { -#define _(sym,string) string, - foreach_sr_policy_rewrite_error -#undef _ -}; - -/** - * @brief Dynamically added SR SL DPO type - */ -static dpo_type_t sr_pr_encaps_dpo_type; -static dpo_type_t sr_pr_insert_dpo_type; -static dpo_type_t sr_pr_bsid_encaps_dpo_type; -static dpo_type_t sr_pr_bsid_insert_dpo_type; - -/** - * @brief IPv6 SA for encapsulated packets - */ -static ip6_address_t sr_pr_encaps_src; - -/******************* SR rewrite set encaps IPv6 source addr *******************/ -/* Note: This is temporal. We don't know whether to follow this path or - take the ip address of a loopback interface or even the OIF */ - -static clib_error_t * -set_sr_src_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (input, "addr %U", unformat_ip6_address, &sr_pr_encaps_src)) - return 0; - else - return clib_error_return (0, "No address specified"); - } - return clib_error_return (0, "No address specified"); -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_sr_src_command, static) = { - .path = "set sr encaps source", - .short_help = "set sr encaps source addr ", - .function = set_sr_src_command_fn, -}; -/* *INDENT-ON* */ - -/*********************** SR rewrite string computation ************************/ -/** - * @brief SR rewrite string computation for IPv6 encapsulation (inline) - * - * @param sl is a vector of IPv6 addresses composing the Segment List - * - * @return precomputed rewrite string for encapsulation - */ -static inline u8 * -compute_rewrite_encaps (ip6_address_t * sl) -{ - ip6_header_t *iph; - ip6_sr_header_t *srh; - ip6_address_t *addrp, *this_address; - u32 header_length = 0; - u8 *rs = NULL; - - header_length = 0; - header_length += IPv6_DEFAULT_HEADER_LENGTH; - if (vec_len (sl) > 1) - { - header_length += sizeof (ip6_sr_header_t); - header_length += vec_len (sl) * sizeof (ip6_address_t); - } - - vec_validate (rs, header_length - 1); - - iph = (ip6_header_t *) rs; - iph->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (0 | ((6 & 0xF) << 28)); - iph->src_address.as_u64[0] = sr_pr_encaps_src.as_u64[0]; - iph->src_address.as_u64[1] = sr_pr_encaps_src.as_u64[1]; - iph->payload_length = header_length - IPv6_DEFAULT_HEADER_LENGTH; - iph->protocol = IP_PROTOCOL_IPV6; - iph->hop_limit = IPv6_DEFAULT_HOP_LIMIT; - - srh = (ip6_sr_header_t *) (iph + 1); - iph->protocol = IP_PROTOCOL_IPV6_ROUTE; - srh->protocol = IP_PROTOCOL_IPV6; - srh->type = ROUTING_HEADER_TYPE_SR; - srh->segments_left = vec_len (sl) - 1; - srh->first_segment = vec_len (sl) - 1; - srh->length = ((sizeof (ip6_sr_header_t) + - (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1; - srh->flags = 0x00; - srh->reserved = 0x00; - addrp = srh->segments + vec_len (sl) - 1; - vec_foreach (this_address, sl) - { - clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); - addrp--; - } - iph->dst_address.as_u64[0] = sl->as_u64[0]; - iph->dst_address.as_u64[1] = sl->as_u64[1]; - return rs; -} - -/** - * @brief SR rewrite string computation for SRH insertion (inline) - * - * @param sl is a vector of IPv6 addresses composing the Segment List - * - * @return precomputed rewrite string for SRH insertion - */ -static inline u8 * -compute_rewrite_insert (ip6_address_t * sl) -{ - ip6_sr_header_t *srh; - ip6_address_t *addrp, *this_address; - u32 header_length = 0; - u8 *rs = NULL; - - header_length = 0; - header_length += sizeof (ip6_sr_header_t); - header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t); - - vec_validate (rs, header_length - 1); - - srh = (ip6_sr_header_t *) rs; - srh->type = ROUTING_HEADER_TYPE_SR; - srh->segments_left = vec_len (sl); - srh->first_segment = vec_len (sl); - srh->length = ((sizeof (ip6_sr_header_t) + - ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1; - srh->flags = 0x00; - srh->reserved = 0x0000; - addrp = srh->segments + vec_len (sl); - vec_foreach (this_address, sl) - { - clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); - addrp--; - } - return rs; -} - -/** - * @brief SR rewrite string computation for SRH insertion with BSID (inline) - * - * @param sl is a vector of IPv6 addresses composing the Segment List - * - * @return precomputed rewrite string for SRH insertion with BSID - */ -static inline u8 * -compute_rewrite_bsid (ip6_address_t * sl) -{ - ip6_sr_header_t *srh; - ip6_address_t *addrp, *this_address; - u32 header_length = 0; - u8 *rs = NULL; - - header_length = 0; - header_length += sizeof (ip6_sr_header_t); - header_length += vec_len (sl) * sizeof (ip6_address_t); - - vec_validate (rs, header_length - 1); - - srh = (ip6_sr_header_t *) rs; - srh->type = ROUTING_HEADER_TYPE_SR; - srh->segments_left = vec_len (sl) - 1; - srh->first_segment = vec_len (sl) - 1; - srh->length = ((sizeof (ip6_sr_header_t) + - (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1; - srh->flags = 0x00; - srh->reserved = 0x0000; - addrp = srh->segments + vec_len (sl) - 1; - vec_foreach (this_address, sl) - { - clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); - addrp--; - } - return rs; -} - -/*************************** SR LB helper functions **************************/ -/** - * @brief Creates a Segment List and adds it to an SR policy - * - * Creates a Segment List and adds it to the SR policy. Notice that the SL are - * not necessarily unique. Hence there might be two Segment List within the - * same SR Policy with exactly the same segments and same weight. - * - * @param sr_policy is the SR policy where the SL will be added - * @param sl is a vector of IPv6 addresses composing the Segment List - * @param weight is the weight of the SegmentList (for load-balancing purposes) - * @param is_encap represents the mode (SRH insertion vs Encapsulation) - * - * @return pointer to the just created segment list - */ -static inline ip6_sr_sl_t * -create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight, - u8 is_encap) -{ - ip6_sr_main_t *sm = &sr_main; - ip6_sr_sl_t *segment_list; - - pool_get (sm->sid_lists, segment_list); - memset (segment_list, 0, sizeof (*segment_list)); - - vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists); - - /* Fill in segment list */ - segment_list->weight = - (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT); - segment_list->segments = vec_dup (sl); - - if (is_encap) - { - segment_list->rewrite = compute_rewrite_encaps (sl); - segment_list->rewrite_bsid = segment_list->rewrite; - } - else - { - segment_list->rewrite = compute_rewrite_insert (sl); - segment_list->rewrite_bsid = compute_rewrite_bsid (sl); - } - - /* Create DPO */ - dpo_reset (&segment_list->bsid_dpo); - dpo_reset (&segment_list->ip6_dpo); - dpo_reset (&segment_list->ip4_dpo); - - if (is_encap) - { - dpo_set (&segment_list->ip6_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP6, - segment_list - sm->sid_lists); - dpo_set (&segment_list->ip4_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP4, - segment_list - sm->sid_lists); - dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_encaps_dpo_type, - DPO_PROTO_IP6, segment_list - sm->sid_lists); - } - else - { - dpo_set (&segment_list->ip6_dpo, sr_pr_insert_dpo_type, DPO_PROTO_IP6, - segment_list - sm->sid_lists); - dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_insert_dpo_type, - DPO_PROTO_IP6, segment_list - sm->sid_lists); - } - - return segment_list; -} - -/** - * @brief Updates the Load Balancer after an SR Policy change - * - * @param sr_policy is the modified SR Policy - */ -static inline void -update_lb (ip6_sr_policy_t * sr_policy) -{ - flow_hash_config_t fhc; - u32 *sl_index; - ip6_sr_sl_t *segment_list; - ip6_sr_main_t *sm = &sr_main; - load_balance_path_t path; - path.path_index = FIB_NODE_INDEX_INVALID; - load_balance_path_t *ip4_path_vector = 0; - load_balance_path_t *ip6_path_vector = 0; - load_balance_path_t *b_path_vector = 0; - - /* In case LB does not exist, create it */ - if (!dpo_id_is_valid (&sr_policy->bsid_dpo)) - { - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = 128, - .fp_addr = { - .ip6 = sr_policy->bsid, - } - }; - - /* Add FIB entry for BSID */ - fhc = fib_table_get_flow_hash_config (sr_policy->fib_table, - dpo_proto_to_fib (DPO_PROTO_IP6)); - - dpo_set (&sr_policy->bsid_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6, - load_balance_create (0, DPO_PROTO_IP6, fhc)); - - dpo_set (&sr_policy->ip6_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6, - load_balance_create (0, DPO_PROTO_IP6, fhc)); - - /* Update FIB entry's to point to the LB DPO in the main FIB and hidden one */ - fib_table_entry_special_dpo_update (fib_table_find (FIB_PROTOCOL_IP6, - sr_policy->fib_table), - &pfx, FIB_SOURCE_SR, - FIB_ENTRY_FLAG_EXCLUSIVE, - &sr_policy->bsid_dpo); - - fib_table_entry_special_dpo_update (sm->fib_table_ip6, - &pfx, - FIB_SOURCE_SR, - FIB_ENTRY_FLAG_EXCLUSIVE, - &sr_policy->ip6_dpo); - - if (sr_policy->is_encap) - { - dpo_set (&sr_policy->ip4_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP4, - load_balance_create (0, DPO_PROTO_IP4, fhc)); - - fib_table_entry_special_dpo_update (sm->fib_table_ip4, - &pfx, - FIB_SOURCE_SR, - FIB_ENTRY_FLAG_EXCLUSIVE, - &sr_policy->ip4_dpo); - } - - } - - /* Create the LB path vector */ - //path_vector = vec_new(load_balance_path_t, vec_len(sr_policy->segments_lists)); - vec_foreach (sl_index, sr_policy->segments_lists) - { - segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); - path.path_dpo = segment_list->bsid_dpo; - path.path_weight = segment_list->weight; - vec_add1 (b_path_vector, path); - path.path_dpo = segment_list->ip6_dpo; - vec_add1 (ip6_path_vector, path); - if (sr_policy->is_encap) - { - path.path_dpo = segment_list->ip4_dpo; - vec_add1 (ip4_path_vector, path); - } - } - - /* Update LB multipath */ - load_balance_multipath_update (&sr_policy->bsid_dpo, b_path_vector, - LOAD_BALANCE_FLAG_NONE); - load_balance_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector, - LOAD_BALANCE_FLAG_NONE); - if (sr_policy->is_encap) - load_balance_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector, - LOAD_BALANCE_FLAG_NONE); - - /* Cleanup */ - vec_free (b_path_vector); - vec_free (ip6_path_vector); - vec_free (ip4_path_vector); - -} - -/** - * @brief Updates the Replicate DPO after an SR Policy change - * - * @param sr_policy is the modified SR Policy (type spray) - */ -static inline void -update_replicate (ip6_sr_policy_t * sr_policy) -{ - u32 *sl_index; - ip6_sr_sl_t *segment_list; - ip6_sr_main_t *sm = &sr_main; - load_balance_path_t path; - path.path_index = FIB_NODE_INDEX_INVALID; - load_balance_path_t *b_path_vector = 0; - load_balance_path_t *ip6_path_vector = 0; - load_balance_path_t *ip4_path_vector = 0; - - /* In case LB does not exist, create it */ - if (!dpo_id_is_valid (&sr_policy->bsid_dpo)) - { - dpo_set (&sr_policy->bsid_dpo, DPO_REPLICATE, - DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6)); - - dpo_set (&sr_policy->ip6_dpo, DPO_REPLICATE, - DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6)); - - /* Update FIB entry's DPO to point to SR without LB */ - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = 128, - .fp_addr = { - .ip6 = sr_policy->bsid, - } - }; - fib_table_entry_special_dpo_update (fib_table_find (FIB_PROTOCOL_IP6, - sr_policy->fib_table), - &pfx, FIB_SOURCE_SR, - FIB_ENTRY_FLAG_EXCLUSIVE, - &sr_policy->bsid_dpo); - - fib_table_entry_special_dpo_update (sm->fib_table_ip6, - &pfx, - FIB_SOURCE_SR, - FIB_ENTRY_FLAG_EXCLUSIVE, - &sr_policy->ip6_dpo); - - if (sr_policy->is_encap) - { - dpo_set (&sr_policy->ip4_dpo, DPO_REPLICATE, DPO_PROTO_IP4, - replicate_create (0, DPO_PROTO_IP4)); - - fib_table_entry_special_dpo_update (sm->fib_table_ip4, - &pfx, - FIB_SOURCE_SR, - FIB_ENTRY_FLAG_EXCLUSIVE, - &sr_policy->ip4_dpo); - } - - } - - /* Create the replicate path vector */ - path.path_weight = 1; - vec_foreach (sl_index, sr_policy->segments_lists) - { - segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); - path.path_dpo = segment_list->bsid_dpo; - vec_add1 (b_path_vector, path); - path.path_dpo = segment_list->ip6_dpo; - vec_add1 (ip6_path_vector, path); - if (sr_policy->is_encap) - { - path.path_dpo = segment_list->ip4_dpo; - vec_add1 (ip4_path_vector, path); - } - } - - /* Update replicate multipath */ - replicate_multipath_update (&sr_policy->bsid_dpo, b_path_vector); - replicate_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector); - if (sr_policy->is_encap) - replicate_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector); -} - -/******************************* SR rewrite API *******************************/ -/* Three functions for handling sr policies: - * -> sr_policy_add - * -> sr_policy_del - * -> sr_policy_mod - * All of them are API. CLI function on sr_policy_command_fn */ - -/** - * @brief Create a new SR policy - * - * @param bsid is the bindingSID of the SR Policy - * @param segments is a vector of IPv6 address composing the segment list - * @param weight is the weight of the sid list. optional. - * @param behavior is the behavior of the SR policy. (default//spray) - * @param fib_table is the VRF where to install the FIB entry for the BSID - * @param is_encap (bool) whether SR policy should behave as Encap/SRH Insertion - * - * @return 0 if correct, else error - */ -int -sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments, - u32 weight, u8 behavior, u32 fib_table, u8 is_encap) -{ - ip6_sr_main_t *sm = &sr_main; - ip6_sr_policy_t *sr_policy = 0; - uword *p; - - /* Search for existing keys (BSID) */ - p = mhash_get (&sm->sr_policies_index_hash, bsid); - if (p) - { - /* Add SR policy that already exists; complain */ - return -12; - } - - /* Search collision in FIB entries */ - /* Explanation: It might be possible that some other entity has already - * created a route for the BSID. This in theory is impossible, but in - * practise we could see it. Assert it and scream if needed */ - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = 128, - .fp_addr = { - .ip6 = *bsid, - } - }; - - /* Lookup the FIB index associated to the table selected */ - u32 fib_index = fib_table_find (FIB_PROTOCOL_IP6, - (fib_table != (u32) ~ 0 ? fib_table : 0)); - if (fib_index == ~0) - return -13; - - /* Lookup whether there exists an entry for the BSID */ - fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx); - if (FIB_NODE_INDEX_INVALID != fei) - return -12; //There is an entry for such lookup - - /* Add an SR policy object */ - pool_get (sm->sr_policies, sr_policy); - memset (sr_policy, 0, sizeof (*sr_policy)); - clib_memcpy (&sr_policy->bsid, bsid, sizeof (ip6_address_t)); - sr_policy->type = behavior; - sr_policy->fib_table = (fib_table != (u32) ~ 0 ? fib_table : 0); //Is default FIB 0 ? - sr_policy->is_encap = is_encap; - - /* Copy the key */ - mhash_set (&sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies, - NULL); - - /* Create a segment list and add the index to the SR policy */ - create_sl (sr_policy, segments, weight, is_encap); - - /* If FIB doesnt exist, create them */ - if (sm->fib_table_ip6 == (u32) ~ 0) - { - sm->fib_table_ip6 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, - "SRv6 steering of IP6 prefixes through BSIDs"); - sm->fib_table_ip4 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, - "SRv6 steering of IP4 prefixes through BSIDs"); - } - - /* Create IPv6 FIB for the BindingSID attached to the DPO of the only SL */ - if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) - update_lb (sr_policy); - else if (sr_policy->type == SR_POLICY_TYPE_SPRAY) - update_replicate (sr_policy); - return 0; -} - -/** - * @brief Delete a SR policy - * - * @param bsid is the bindingSID of the SR Policy - * @param index is the index of the SR policy - * - * @return 0 if correct, else error - */ -int -sr_policy_del (ip6_address_t * bsid, u32 index) -{ - ip6_sr_main_t *sm = &sr_main; - ip6_sr_policy_t *sr_policy = 0; - ip6_sr_sl_t *segment_list; - u32 *sl_index; - uword *p; - - if (bsid) - { - p = mhash_get (&sm->sr_policies_index_hash, bsid); - if (p) - sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); - else - return -1; - } - else - { - sr_policy = pool_elt_at_index (sm->sr_policies, index); - if (!sr_policy) - return -1; - } - - /* Remove BindingSID FIB entry */ - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = 128, - .fp_addr = { - .ip6 = sr_policy->bsid, - } - , - }; - - fib_table_entry_special_remove (fib_table_find (FIB_PROTOCOL_IP6, - sr_policy->fib_table), - &pfx, FIB_SOURCE_SR); - - fib_table_entry_special_remove (sm->fib_table_ip6, &pfx, FIB_SOURCE_SR); - - if (sr_policy->is_encap) - fib_table_entry_special_remove (sm->fib_table_ip4, &pfx, FIB_SOURCE_SR); - - if (dpo_id_is_valid (&sr_policy->bsid_dpo)) - { - dpo_reset (&sr_policy->bsid_dpo); - dpo_reset (&sr_policy->ip4_dpo); - dpo_reset (&sr_policy->ip6_dpo); - } - - /* Clean SID Lists */ - vec_foreach (sl_index, sr_policy->segments_lists) - { - segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); - vec_free (segment_list->segments); - vec_free (segment_list->rewrite); - vec_free (segment_list->rewrite_bsid); - pool_put_index (sm->sid_lists, *sl_index); - } - - /* Remove SR policy entry */ - mhash_unset (&sm->sr_policies_index_hash, &sr_policy->bsid, NULL); - pool_put (sm->sr_policies, sr_policy); - - /* If FIB empty unlock it */ - if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies)) - { - fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6); - fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6); - sm->fib_table_ip6 = (u32) ~ 0; - sm->fib_table_ip4 = (u32) ~ 0; - } - - return 0; -} - -/** - * @brief Modify an existing SR policy - * - * The possible modifications are adding a new Segment List, modifying an - * existing Segment List (modify the weight only) and delete a given - * Segment List from the SR Policy. - * - * @param bsid is the bindingSID of the SR Policy - * @param index is the index of the SR policy - * @param fib_table is the VRF where to install the FIB entry for the BSID - * @param operation is the operation to perform (among the top ones) - * @param segments is a vector of IPv6 address composing the segment list - * @param sl_index is the index of the Segment List to modify/delete - * @param weight is the weight of the sid list. optional. - * @param is_encap Mode. Encapsulation or SRH insertion. - * - * @return 0 if correct, else error - */ -int -sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table, - u8 operation, ip6_address_t * segments, u32 sl_index, - u32 weight) -{ - ip6_sr_main_t *sm = &sr_main; - ip6_sr_policy_t *sr_policy = 0; - ip6_sr_sl_t *segment_list; - u32 *sl_index_iterate; - uword *p; - - if (bsid) - { - p = mhash_get (&sm->sr_policies_index_hash, bsid); - if (p) - sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); - else - return -1; - } - else - { - sr_policy = pool_elt_at_index (sm->sr_policies, index); - if (!sr_policy) - return -1; - } - - if (operation == 1) /* Add SR List to an existing SR policy */ - { - /* Create the new SL */ - segment_list = - create_sl (sr_policy, segments, weight, sr_policy->is_encap); - - /* Create a new LB DPO */ - if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) - update_lb (sr_policy); - else if (sr_policy->type == SR_POLICY_TYPE_SPRAY) - update_replicate (sr_policy); - } - else if (operation == 2) /* Delete SR List from an existing SR policy */ - { - /* Check that currently there are more than one SID list */ - if (vec_len (sr_policy->segments_lists) == 1) - return -21; - - /* Check that the SR list does exist and is assigned to the sr policy */ - vec_foreach (sl_index_iterate, sr_policy->segments_lists) - if (*sl_index_iterate == sl_index) - break; - - if (*sl_index_iterate != sl_index) - return -22; - - /* Remove the lucky SR list that is being kicked out */ - segment_list = pool_elt_at_index (sm->sid_lists, sl_index); - vec_free (segment_list->segments); - vec_free (segment_list->rewrite); - vec_free (segment_list->rewrite_bsid); - pool_put_index (sm->sid_lists, sl_index); - vec_del1 (sr_policy->segments_lists, - sl_index_iterate - sr_policy->segments_lists); - - /* Create a new LB DPO */ - if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) - update_lb (sr_policy); - else if (sr_policy->type == SR_POLICY_TYPE_SPRAY) - update_replicate (sr_policy); - } - else if (operation == 3) /* Modify the weight of an existing SR List */ - { - /* Find the corresponding SL */ - vec_foreach (sl_index_iterate, sr_policy->segments_lists) - if (*sl_index_iterate == sl_index) - break; - - if (*sl_index_iterate != sl_index) - return -32; - - /* Change the weight */ - segment_list = pool_elt_at_index (sm->sid_lists, sl_index); - segment_list->weight = weight; - - /* Update LB */ - if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) - update_lb (sr_policy); - } - else /* Incorrect op. */ - return -1; - - return 0; -} - -/** - * @brief CLI for 'sr policies' command family - */ -static clib_error_t * -sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - int rv = -1; - char is_del = 0, is_add = 0, is_mod = 0; - char policy_set = 0; - ip6_address_t bsid, next_address; - u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0; - u32 weight = (u32) ~ 0, fib_table = (u32) ~ 0; - ip6_address_t *segments = 0, *this_seg; - u8 operation = 0; - char is_encap = 1; - char is_spray = 0; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (!is_add && !is_mod && !is_del && unformat (input, "add")) - is_add = 1; - else if (!is_add && !is_mod && !is_del && unformat (input, "del")) - is_del = 1; - else if (!is_add && !is_mod && !is_del && unformat (input, "mod")) - is_mod = 1; - else if (!policy_set - && unformat (input, "bsid %U", unformat_ip6_address, &bsid)) - policy_set = 1; - else if (!is_add && !policy_set - && unformat (input, "index %d", &sr_policy_index)) - policy_set = 1; - else if (unformat (input, "weight %d", &weight)); - else - if (unformat (input, "next %U", unformat_ip6_address, &next_address)) - { - vec_add2 (segments, this_seg, 1); - clib_memcpy (this_seg->as_u8, next_address.as_u8, - sizeof (*this_seg)); - } - else if (unformat (input, "add sl")) - operation = 1; - else if (unformat (input, "del sl index %d", &sl_index)) - operation = 2; - else if (unformat (input, "mod sl index %d", &sl_index)) - operation = 3; - else if (fib_table == (u32) ~ 0 - && unformat (input, "fib-table %d", &fib_table)); - else if (unformat (input, "encap")) - is_encap = 1; - else if (unformat (input, "insert")) - is_encap = 0; - else if (unformat (input, "spray")) - is_spray = 1; - else - break; - } - - if (!is_add && !is_mod && !is_del) - return clib_error_return (0, "Incorrect CLI"); - - if (!policy_set) - return clib_error_return (0, "No SR policy BSID or index specified"); - - if (is_add) - { - if (vec_len (segments) == 0) - return clib_error_return (0, "No Segment List specified"); - rv = sr_policy_add (&bsid, segments, weight, - (is_spray ? SR_POLICY_TYPE_SPRAY : - SR_POLICY_TYPE_DEFAULT), fib_table, is_encap); - } - else if (is_del) - rv = sr_policy_del ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid), - sr_policy_index); - else if (is_mod) - { - if (!operation) - return clib_error_return (0, "No SL modification specified"); - if (operation != 1 && sl_index == (u32) ~ 0) - return clib_error_return (0, "No Segment List index specified"); - if (operation == 1 && vec_len (segments) == 0) - return clib_error_return (0, "No Segment List specified"); - if (operation == 3 && weight == (u32) ~ 0) - return clib_error_return (0, "No new weight for the SL specified"); - rv = sr_policy_mod ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid), - sr_policy_index, fib_table, operation, segments, - sl_index, weight); - } - - switch (rv) - { - case 0: - break; - case 1: - return 0; - case -12: - return clib_error_return (0, - "There is already a FIB entry for the BindingSID address.\n" - "The SR policy could not be created."); - case -13: - return clib_error_return (0, "The specified FIB table does not exist."); - case -21: - return clib_error_return (0, - "The selected SR policy only contains ONE segment list. " - "Please remove the SR policy instead"); - case -22: - return clib_error_return (0, - "Could not delete the segment list. " - "It is not associated with that SR policy."); - case -32: - return clib_error_return (0, - "Could not modify the segment list. " - "The given SL is not associated with such SR policy."); - default: - return clib_error_return (0, "BUG: sr policy returns %d", rv); - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (sr_policy_command, static) = { - .path = "sr policy", - .short_help = "sr policy [add||del||mod] [bsid 2001::1||index 5] " - "next A:: next B:: next C:: (weight 1) (fib-table 2) (encap|insert)", - .long_help = - "Manipulation of SR policies.\n" - "A Segment Routing policy may contain several SID lists. Each SID list has\n" - "an associated weight (default 1), which will result in wECMP (uECMP).\n" - "Segment Routing policies might be of type encapsulation or srh insertion\n" - "Each SR policy will be associated with a unique BindingSID.\n" - "A BindingSID is a locally allocated SegmentID. For every packet that arrives\n" - "with IPv6_DA:BSID such traffic will be steered into the SR policy.\n" - "The add command will create a SR policy with its first segment list (sl)\n" - "The mod command allows you to add, remove, or modify the existing segment lists\n" - "within an SR policy.\n" - "The del command allows you to delete a SR policy along with all its associated\n" - "SID lists.\n", - .function = sr_policy_command_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief CLI to display onscreen all the SR policies - */ -static clib_error_t * -show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - u32 *sl_index; - ip6_sr_sl_t *segment_list = 0; - ip6_sr_policy_t *sr_policy = 0; - ip6_sr_policy_t **vec_policies = 0; - ip6_address_t *addr; - u8 *s; - int i = 0; - - vlib_cli_output (vm, "SR policies:"); - - /* *INDENT-OFF* */ - pool_foreach (sr_policy, sm->sr_policies, - {vec_add1 (vec_policies, sr_policy); } ); - /* *INDENT-ON* */ - - vec_foreach_index (i, vec_policies) - { - sr_policy = vec_policies[i]; - vlib_cli_output (vm, "[%u].-\tBSID: %U", - (u32) (sr_policy - sm->sr_policies), - format_ip6_address, &sr_policy->bsid); - vlib_cli_output (vm, "\tBehavior: %s", - (sr_policy->is_encap ? "Encapsulation" : - "SRH insertion")); - vlib_cli_output (vm, "\tType: %s", - (sr_policy->type == - SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray")); - vlib_cli_output (vm, "\tFIB table: %u", - (sr_policy->fib_table != - (u32) ~ 0 ? sr_policy->fib_table : 0)); - vlib_cli_output (vm, "\tSegment Lists:"); - vec_foreach (sl_index, sr_policy->segments_lists) - { - s = NULL; - s = format (s, "\t[%u].- ", *sl_index); - segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); - s = format (s, "< "); - vec_foreach (addr, segment_list->segments) - { - s = format (s, "%U, ", format_ip6_address, addr); - } - s = format (s, "\b\b > "); - s = format (s, "weight: %u", segment_list->weight); - vlib_cli_output (vm, " %s", s); - } - vlib_cli_output (vm, "-----------"); - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_sr_policies_command, static) = { - .path = "show sr policies", - .short_help = "show sr policies", - .function = show_sr_policies_command_fn, -}; -/* *INDENT-ON* */ - -/*************************** SR rewrite graph node ****************************/ -/** - * @brief Trace for the SR Policy Rewrite graph node - */ -static u8 * -format_sr_policy_rewrite_trace (u8 * s, va_list * args) -{ - //TODO - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - sr_policy_rewrite_trace_t *t = va_arg (*args, sr_policy_rewrite_trace_t *); - - s = format - (s, "SR-policy-rewrite: src %U dst %U", - format_ip6_address, &t->src, format_ip6_address, &t->dst); - - return s; -} - -/** - * @brief IPv6 encapsulation processing as per RFC2473 - */ -static_always_inline void -encaps_processing_v6 (vlib_node_runtime_t * node, - vlib_buffer_t * b0, - ip6_header_t * ip0, ip6_header_t * ip0_encap) -{ - u32 new_l0; - - ip0_encap->hop_limit -= 1; - new_l0 = - ip0->payload_length + sizeof (ip6_header_t) + - clib_net_to_host_u16 (ip0_encap->payload_length); - ip0->payload_length = clib_host_to_net_u16 (new_l0); - ip0->ip_version_traffic_class_and_flow_label = - ip0_encap->ip_version_traffic_class_and_flow_label; -} - -/** - * @brief Graph node for applying a SR policy into an IPv6 packet. Encapsulation - */ -static uword -sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - ip6_sr_main_t *sm = &sr_main; - u32 n_left_from, next_index, *from, *to_next; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - int encap_pkts = 0, bsid_pkts = 0; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Quad - Loop */ - while (n_left_from >= 8 && n_left_to_next >= 4) - { - u32 bi0, bi1, bi2, bi3; - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 next0, next1, next2, next3; - next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; - ip6_header_t *ip0, *ip1, *ip2, *ip3; - ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap; - ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p4, *p5, *p6, *p7; - - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - p6 = vlib_get_buffer (vm, from[6]); - p7 = vlib_get_buffer (vm, from[7]); - - /* Prefetch the buffer header and packet for the N+2 loop iteration */ - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - vlib_prefetch_buffer_header (p6, LOAD); - vlib_prefetch_buffer_header (p7, LOAD); - - CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - to_next[2] = bi2 = from[2]; - to_next[3] = bi3 = from[3]; - from += 4; - to_next += 4; - n_left_from -= 4; - n_left_to_next -= 4; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - b2 = vlib_get_buffer (vm, bi2); - b3 = vlib_get_buffer (vm, bi3); - - sl0 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - sl1 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b1)->ip.adj_index[VLIB_TX]); - sl2 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b2)->ip.adj_index[VLIB_TX]); - sl3 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b3)->ip.adj_index[VLIB_TX]); - - ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl0->rewrite)); - ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl1->rewrite)); - ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl2->rewrite)); - ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl3->rewrite)); - - ip0_encap = vlib_buffer_get_current (b0); - ip1_encap = vlib_buffer_get_current (b1); - ip2_encap = vlib_buffer_get_current (b2); - ip3_encap = vlib_buffer_get_current (b3); - - clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), - sl0->rewrite, vec_len (sl0->rewrite)); - clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite), - sl1->rewrite, vec_len (sl1->rewrite)); - clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite), - sl2->rewrite, vec_len (sl2->rewrite)); - clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite), - sl3->rewrite, vec_len (sl3->rewrite)); - - vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); - vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); - vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); - vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - ip2 = vlib_buffer_get_current (b2); - ip3 = vlib_buffer_get_current (b3); - - encaps_processing_v6 (node, b0, ip0, ip0_encap); - encaps_processing_v6 (node, b1, ip1, ip1_encap); - encaps_processing_v6 (node, b2, ip2, ip2_encap); - encaps_processing_v6 (node, b3, ip3, ip3_encap); - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - { - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b1, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b2, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b3, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - } - - encap_pkts += 4; - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, bi2, bi3, - next0, next1, next2, next3); - } - - /* Single loop for potentially the last three packets */ - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0 = 0, *ip0_encap = 0; - ip6_sr_sl_t *sl0; - u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - b0 = vlib_get_buffer (vm, bi0); - - sl0 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl0->rewrite)); - - ip0_encap = vlib_buffer_get_current (b0); - - clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), - sl0->rewrite, vec_len (sl0->rewrite)); - vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); - - ip0 = vlib_buffer_get_current (b0); - - encaps_processing_v6 (node, b0, ip0, ip0_encap); - - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && - PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - encap_pkts++; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - /* Update counters */ - vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, - SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, - encap_pkts); - vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, - SR_POLICY_REWRITE_ERROR_COUNTER_BSID, - bsid_pkts); - - return from_frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = { - .function = sr_policy_rewrite_encaps, - .name = "sr-pl-rewrite-encaps", - .vector_size = sizeof (u32), - .format_trace = format_sr_policy_rewrite_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = SR_POLICY_REWRITE_N_ERROR, - .error_strings = sr_policy_rewrite_error_strings, - .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, - foreach_sr_policy_rewrite_next -#undef _ - }, -}; -/* *INDENT-ON* */ - -/** - * @brief IPv4 encapsulation processing as per RFC2473 - */ -static_always_inline void -encaps_processing_v4 (vlib_node_runtime_t * node, - vlib_buffer_t * b0, - ip6_header_t * ip0, ip4_header_t * ip0_encap) -{ - u32 new_l0; - ip6_sr_header_t *sr0; - - u32 checksum0; - - /* Inner IPv4: Decrement TTL & update checksum */ - ip0_encap->ttl -= 1; - checksum0 = ip0_encap->checksum + clib_host_to_net_u16 (0x0100); - checksum0 += checksum0 >= 0xffff; - ip0_encap->checksum = checksum0; - - /* Outer IPv6: Update length, FL, proto */ - new_l0 = ip0->payload_length + clib_net_to_host_u16 (ip0_encap->length); - ip0->payload_length = clib_host_to_net_u16 (new_l0); - ip0->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (0 | ((6 & 0xF) << 28) | - ((ip0_encap->tos & 0xFF) << 20)); - sr0 = (void *) (ip0 + 1); - sr0->protocol = IP_PROTOCOL_IP_IN_IP; -} - -/** - * @brief Graph node for applying a SR policy into an IPv4 packet. Encapsulation - */ -static uword -sr_policy_rewrite_encaps_v4 (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - ip6_sr_main_t *sm = &sr_main; - u32 n_left_from, next_index, *from, *to_next; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - int encap_pkts = 0, bsid_pkts = 0; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Quad - Loop */ - while (n_left_from >= 8 && n_left_to_next >= 4) - { - u32 bi0, bi1, bi2, bi3; - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 next0, next1, next2, next3; - next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; - ip6_header_t *ip0, *ip1, *ip2, *ip3; - ip4_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap; - ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p4, *p5, *p6, *p7; - - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - p6 = vlib_get_buffer (vm, from[6]); - p7 = vlib_get_buffer (vm, from[7]); - - /* Prefetch the buffer header and packet for the N+2 loop iteration */ - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - vlib_prefetch_buffer_header (p6, LOAD); - vlib_prefetch_buffer_header (p7, LOAD); - - CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - to_next[2] = bi2 = from[2]; - to_next[3] = bi3 = from[3]; - from += 4; - to_next += 4; - n_left_from -= 4; - n_left_to_next -= 4; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - b2 = vlib_get_buffer (vm, bi2); - b3 = vlib_get_buffer (vm, bi3); - - sl0 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - sl1 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b1)->ip.adj_index[VLIB_TX]); - sl2 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b2)->ip.adj_index[VLIB_TX]); - sl3 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b3)->ip.adj_index[VLIB_TX]); - ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl0->rewrite)); - ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl1->rewrite)); - ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl2->rewrite)); - ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl3->rewrite)); - - ip0_encap = vlib_buffer_get_current (b0); - ip1_encap = vlib_buffer_get_current (b1); - ip2_encap = vlib_buffer_get_current (b2); - ip3_encap = vlib_buffer_get_current (b3); - - clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), - sl0->rewrite, vec_len (sl0->rewrite)); - clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite), - sl1->rewrite, vec_len (sl1->rewrite)); - clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite), - sl2->rewrite, vec_len (sl2->rewrite)); - clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite), - sl3->rewrite, vec_len (sl3->rewrite)); - - vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); - vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); - vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); - vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - ip2 = vlib_buffer_get_current (b2); - ip3 = vlib_buffer_get_current (b3); - - encaps_processing_v4 (node, b0, ip0, ip0_encap); - encaps_processing_v4 (node, b1, ip1, ip1_encap); - encaps_processing_v4 (node, b2, ip2, ip2_encap); - encaps_processing_v4 (node, b3, ip3, ip3_encap); - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - { - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b1, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b2, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b3, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - } - - encap_pkts += 4; - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, bi2, bi3, - next0, next1, next2, next3); - } - - /* Single loop for potentially the last three packets */ - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0 = 0; - ip4_header_t *ip0_encap = 0; - ip6_sr_sl_t *sl0; - u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - b0 = vlib_get_buffer (vm, bi0); - - sl0 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl0->rewrite)); - - ip0_encap = vlib_buffer_get_current (b0); - - clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), - sl0->rewrite, vec_len (sl0->rewrite)); - vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); - - ip0 = vlib_buffer_get_current (b0); - - encaps_processing_v4 (node, b0, ip0, ip0_encap); - - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && - PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - encap_pkts++; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - /* Update counters */ - vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, - SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, - encap_pkts); - vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, - SR_POLICY_REWRITE_ERROR_COUNTER_BSID, - bsid_pkts); - - return from_frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = { - .function = sr_policy_rewrite_encaps_v4, - .name = "sr-pl-rewrite-encaps-v4", - .vector_size = sizeof (u32), - .format_trace = format_sr_policy_rewrite_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = SR_POLICY_REWRITE_N_ERROR, - .error_strings = sr_policy_rewrite_error_strings, - .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, - foreach_sr_policy_rewrite_next -#undef _ - }, -}; -/* *INDENT-ON* */ - -always_inline u32 -ip_flow_hash (void *data) -{ - ip4_header_t *iph = (ip4_header_t *) data; - - if ((iph->ip_version_and_header_length & 0xF0) == 0x40) - return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT); - else - return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT); -} - -always_inline u64 -mac_to_u64 (u8 * m) -{ - return (*((u64 *) m) & 0xffffffffffff); -} - -always_inline u32 -l2_flow_hash (vlib_buffer_t * b0) -{ - ethernet_header_t *eh; - u64 a, b, c; - uword is_ip, eh_size; - u16 eh_type; - - eh = vlib_buffer_get_current (b0); - eh_type = clib_net_to_host_u16 (eh->type); - eh_size = ethernet_buffer_header_size (b0); - - is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6); - - /* since we have 2 cache lines, use them */ - if (is_ip) - a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size); - else - a = eh->type; - - b = mac_to_u64 ((u8 *) eh->dst_address); - c = mac_to_u64 ((u8 *) eh->src_address); - hash_mix64 (a, b, c); - - return (u32) c; -} - -/** - * @brief Graph node for applying a SR policy into a L2 frame - */ -static uword -sr_policy_rewrite_encaps_l2 (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - ip6_sr_main_t *sm = &sr_main; - u32 n_left_from, next_index, *from, *to_next; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - int encap_pkts = 0, bsid_pkts = 0; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Quad - Loop */ - while (n_left_from >= 8 && n_left_to_next >= 4) - { - u32 bi0, bi1, bi2, bi3; - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 next0, next1, next2, next3; - next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; - ethernet_header_t *en0, *en1, *en2, *en3; - ip6_header_t *ip0, *ip1, *ip2, *ip3; - ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; - ip6_sr_policy_t *sp0, *sp1, *sp2, *sp3; - ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p4, *p5, *p6, *p7; - - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - p6 = vlib_get_buffer (vm, from[6]); - p7 = vlib_get_buffer (vm, from[7]); - - /* Prefetch the buffer header and packet for the N+2 loop iteration */ - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - vlib_prefetch_buffer_header (p6, LOAD); - vlib_prefetch_buffer_header (p7, LOAD); - - CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - to_next[2] = bi2 = from[2]; - to_next[3] = bi3 = from[3]; - from += 4; - to_next += 4; - n_left_from -= 4; - n_left_to_next -= 4; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - b2 = vlib_get_buffer (vm, bi2); - b3 = vlib_get_buffer (vm, bi3); - - sp0 = pool_elt_at_index (sm->sr_policies, - sm->sw_iface_sr_policies[vnet_buffer - (b0)->sw_if_index - [VLIB_RX]]); - - sp1 = pool_elt_at_index (sm->sr_policies, - sm->sw_iface_sr_policies[vnet_buffer - (b1)->sw_if_index - [VLIB_RX]]); - - sp2 = pool_elt_at_index (sm->sr_policies, - sm->sw_iface_sr_policies[vnet_buffer - (b2)->sw_if_index - [VLIB_RX]]); - - sp3 = pool_elt_at_index (sm->sr_policies, - sm->sw_iface_sr_policies[vnet_buffer - (b3)->sw_if_index - [VLIB_RX]]); - - if (vec_len (sp0->segments_lists) == 1) - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0]; - else - { - vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = - sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash & - (vec_len (sp0->segments_lists) - 1))]; - } - - if (vec_len (sp1->segments_lists) == 1) - vnet_buffer (b1)->ip.adj_index[VLIB_TX] = sp1->segments_lists[1]; - else - { - vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1); - vnet_buffer (b1)->ip.adj_index[VLIB_TX] = - sp1->segments_lists[(vnet_buffer (b1)->ip.flow_hash & - (vec_len (sp1->segments_lists) - 1))]; - } - - if (vec_len (sp2->segments_lists) == 1) - vnet_buffer (b2)->ip.adj_index[VLIB_TX] = sp2->segments_lists[2]; - else - { - vnet_buffer (b2)->ip.flow_hash = l2_flow_hash (b2); - vnet_buffer (b2)->ip.adj_index[VLIB_TX] = - sp2->segments_lists[(vnet_buffer (b2)->ip.flow_hash & - (vec_len (sp2->segments_lists) - 1))]; - } - - if (vec_len (sp3->segments_lists) == 1) - vnet_buffer (b3)->ip.adj_index[VLIB_TX] = sp3->segments_lists[3]; - else - { - vnet_buffer (b3)->ip.flow_hash = l2_flow_hash (b3); - vnet_buffer (b3)->ip.adj_index[VLIB_TX] = - sp3->segments_lists[(vnet_buffer (b3)->ip.flow_hash & - (vec_len (sp3->segments_lists) - 1))]; - } - - sl0 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - sl1 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b1)->ip.adj_index[VLIB_TX]); - sl2 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b2)->ip.adj_index[VLIB_TX]); - sl3 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b3)->ip.adj_index[VLIB_TX]); - - ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl0->rewrite)); - ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl1->rewrite)); - ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl2->rewrite)); - ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl3->rewrite)); - - en0 = vlib_buffer_get_current (b0); - en1 = vlib_buffer_get_current (b1); - en2 = vlib_buffer_get_current (b2); - en3 = vlib_buffer_get_current (b3); - - clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite, - vec_len (sl0->rewrite)); - clib_memcpy (((u8 *) en1) - vec_len (sl1->rewrite), sl1->rewrite, - vec_len (sl1->rewrite)); - clib_memcpy (((u8 *) en2) - vec_len (sl2->rewrite), sl2->rewrite, - vec_len (sl2->rewrite)); - clib_memcpy (((u8 *) en3) - vec_len (sl3->rewrite), sl3->rewrite, - vec_len (sl3->rewrite)); - - vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); - vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); - vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); - vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - ip2 = vlib_buffer_get_current (b2); - ip3 = vlib_buffer_get_current (b3); - - ip0->payload_length = - clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t)); - ip1->payload_length = - clib_host_to_net_u16 (b1->current_length - sizeof (ip6_header_t)); - ip2->payload_length = - clib_host_to_net_u16 (b2->current_length - sizeof (ip6_header_t)); - ip3->payload_length = - clib_host_to_net_u16 (b3->current_length - sizeof (ip6_header_t)); - - sr0 = (void *) (ip0 + 1); - sr1 = (void *) (ip1 + 1); - sr2 = (void *) (ip2 + 1); - sr3 = (void *) (ip3 + 1); - - sr0->protocol = sr1->protocol = sr2->protocol = sr3->protocol = - IP_PROTOCOL_IP6_NONXT; - - /* Which Traffic class and flow label do I set ? */ - //ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(0|((6&0xF)<<28)|((ip0_encap->tos&0xFF)<<20)); - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - { - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b1, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b2, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b3, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - } - - encap_pkts += 4; - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, bi2, bi3, - next0, next1, next2, next3); - } - - /* Single loop for potentially the last three packets */ - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0 = 0; - ip6_sr_header_t *sr0; - ethernet_header_t *en0; - ip6_sr_policy_t *sp0; - ip6_sr_sl_t *sl0; - u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - b0 = vlib_get_buffer (vm, bi0); - - /* Find the SR policy */ - sp0 = pool_elt_at_index (sm->sr_policies, - sm->sw_iface_sr_policies[vnet_buffer - (b0)->sw_if_index - [VLIB_RX]]); - - /* In case there is more than one SL, LB among them */ - if (vec_len (sp0->segments_lists) == 1) - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0]; - else - { - vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = - sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash & - (vec_len (sp0->segments_lists) - 1))]; - } - sl0 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl0->rewrite)); - - en0 = vlib_buffer_get_current (b0); - - clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite, - vec_len (sl0->rewrite)); - - vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); - - ip0 = vlib_buffer_get_current (b0); - - ip0->payload_length = - clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t)); - - sr0 = (void *) (ip0 + 1); - sr0->protocol = IP_PROTOCOL_IP6_NONXT; - - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && - PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - encap_pkts++; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - /* Update counters */ - vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, - SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, - encap_pkts); - vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, - SR_POLICY_REWRITE_ERROR_COUNTER_BSID, - bsid_pkts); - - return from_frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = { - .function = sr_policy_rewrite_encaps_l2, - .name = "sr-pl-rewrite-encaps-l2", - .vector_size = sizeof (u32), - .format_trace = format_sr_policy_rewrite_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = SR_POLICY_REWRITE_N_ERROR, - .error_strings = sr_policy_rewrite_error_strings, - .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, - foreach_sr_policy_rewrite_next -#undef _ - }, -}; -/* *INDENT-ON* */ - -/** - * @brief Graph node for applying a SR policy into a packet. SRH insertion. - */ -static uword -sr_policy_rewrite_insert (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - ip6_sr_main_t *sm = &sr_main; - u32 n_left_from, next_index, *from, *to_next; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - int insert_pkts = 0, bsid_pkts = 0; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Quad - Loop */ - while (n_left_from >= 8 && n_left_to_next >= 4) - { - u32 bi0, bi1, bi2, bi3; - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 next0, next1, next2, next3; - next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; - ip6_header_t *ip0, *ip1, *ip2, *ip3; - ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; - ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; - u16 new_l0, new_l1, new_l2, new_l3; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p4, *p5, *p6, *p7; - - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - p6 = vlib_get_buffer (vm, from[6]); - p7 = vlib_get_buffer (vm, from[7]); - - /* Prefetch the buffer header and packet for the N+2 loop iteration */ - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - vlib_prefetch_buffer_header (p6, LOAD); - vlib_prefetch_buffer_header (p7, LOAD); - - CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - to_next[2] = bi2 = from[2]; - to_next[3] = bi3 = from[3]; - from += 4; - to_next += 4; - n_left_from -= 4; - n_left_to_next -= 4; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - b2 = vlib_get_buffer (vm, bi2); - b3 = vlib_get_buffer (vm, bi3); - - sl0 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - sl1 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b1)->ip.adj_index[VLIB_TX]); - sl2 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b2)->ip.adj_index[VLIB_TX]); - sl3 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b3)->ip.adj_index[VLIB_TX]); - ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl0->rewrite)); - ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl1->rewrite)); - ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl2->rewrite)); - ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl3->rewrite)); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - ip2 = vlib_buffer_get_current (b2); - ip3 = vlib_buffer_get_current (b3); - - if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) - sr0 = - (ip6_sr_header_t *) (((void *) (ip0 + 1)) + - ip6_ext_header_len (ip0 + 1)); - else - sr0 = (ip6_sr_header_t *) (ip0 + 1); - - if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) - sr1 = - (ip6_sr_header_t *) (((void *) (ip1 + 1)) + - ip6_ext_header_len (ip1 + 1)); - else - sr1 = (ip6_sr_header_t *) (ip1 + 1); - - if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) - sr2 = - (ip6_sr_header_t *) (((void *) (ip2 + 1)) + - ip6_ext_header_len (ip2 + 1)); - else - sr2 = (ip6_sr_header_t *) (ip2 + 1); - - if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) - sr3 = - (ip6_sr_header_t *) (((void *) (ip3 + 1)) + - ip6_ext_header_len (ip3 + 1)); - else - sr3 = (ip6_sr_header_t *) (ip3 + 1); - - clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0, - (void *) sr0 - (void *) ip0); - clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite), (u8 *) ip1, - (void *) sr1 - (void *) ip1); - clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite), (u8 *) ip2, - (void *) sr2 - (void *) ip2); - clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite), (u8 *) ip3, - (void *) sr3 - (void *) ip3); - - clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite, - vec_len (sl0->rewrite)); - clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite)), sl1->rewrite, - vec_len (sl1->rewrite)); - clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite)), sl2->rewrite, - vec_len (sl2->rewrite)); - clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite)), sl3->rewrite, - vec_len (sl3->rewrite)); - - vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); - vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); - vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); - vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); - - ip0 = ((void *) ip0) - vec_len (sl0->rewrite); - ip1 = ((void *) ip1) - vec_len (sl1->rewrite); - ip2 = ((void *) ip2) - vec_len (sl2->rewrite); - ip3 = ((void *) ip3) - vec_len (sl3->rewrite); - - ip0->hop_limit -= 1; - ip1->hop_limit -= 1; - ip2->hop_limit -= 1; - ip3->hop_limit -= 1; - - new_l0 = - clib_net_to_host_u16 (ip0->payload_length) + - vec_len (sl0->rewrite); - new_l1 = - clib_net_to_host_u16 (ip1->payload_length) + - vec_len (sl1->rewrite); - new_l2 = - clib_net_to_host_u16 (ip2->payload_length) + - vec_len (sl2->rewrite); - new_l3 = - clib_net_to_host_u16 (ip3->payload_length) + - vec_len (sl3->rewrite); - - ip0->payload_length = clib_host_to_net_u16 (new_l0); - ip1->payload_length = clib_host_to_net_u16 (new_l1); - ip2->payload_length = clib_host_to_net_u16 (new_l2); - ip3->payload_length = clib_host_to_net_u16 (new_l3); - - sr0 = ((void *) sr0) - vec_len (sl0->rewrite); - sr1 = ((void *) sr1) - vec_len (sl1->rewrite); - sr2 = ((void *) sr2) - vec_len (sl2->rewrite); - sr3 = ((void *) sr3) - vec_len (sl3->rewrite); - - sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0]; - sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1]; - sr1->segments->as_u64[0] = ip1->dst_address.as_u64[0]; - sr1->segments->as_u64[1] = ip1->dst_address.as_u64[1]; - sr2->segments->as_u64[0] = ip2->dst_address.as_u64[0]; - sr2->segments->as_u64[1] = ip2->dst_address.as_u64[1]; - sr3->segments->as_u64[0] = ip3->dst_address.as_u64[0]; - sr3->segments->as_u64[1] = ip3->dst_address.as_u64[1]; - - ip0->dst_address.as_u64[0] = - (sr0->segments + sr0->segments_left)->as_u64[0]; - ip0->dst_address.as_u64[1] = - (sr0->segments + sr0->segments_left)->as_u64[1]; - ip1->dst_address.as_u64[0] = - (sr1->segments + sr1->segments_left)->as_u64[0]; - ip1->dst_address.as_u64[1] = - (sr1->segments + sr1->segments_left)->as_u64[1]; - ip2->dst_address.as_u64[0] = - (sr2->segments + sr2->segments_left)->as_u64[0]; - ip2->dst_address.as_u64[1] = - (sr2->segments + sr2->segments_left)->as_u64[1]; - ip3->dst_address.as_u64[0] = - (sr3->segments + sr3->segments_left)->as_u64[0]; - ip3->dst_address.as_u64[1] = - (sr3->segments + sr3->segments_left)->as_u64[1]; - - ip6_ext_header_t *ip_ext; - if (ip0 + 1 == (void *) sr0) - { - sr0->protocol = ip0->protocol; - ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; - } - else - { - ip_ext = (void *) (ip0 + 1); - sr0->protocol = ip_ext->next_hdr; - ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; - } - - if (ip1 + 1 == (void *) sr1) - { - sr1->protocol = ip1->protocol; - ip1->protocol = IP_PROTOCOL_IPV6_ROUTE; - } - else - { - ip_ext = (void *) (ip2 + 1); - sr2->protocol = ip_ext->next_hdr; - ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; - } - - if (ip2 + 1 == (void *) sr2) - { - sr2->protocol = ip2->protocol; - ip2->protocol = IP_PROTOCOL_IPV6_ROUTE; - } - else - { - ip_ext = (void *) (ip2 + 1); - sr2->protocol = ip_ext->next_hdr; - ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; - } - - if (ip3 + 1 == (void *) sr3) - { - sr3->protocol = ip3->protocol; - ip3->protocol = IP_PROTOCOL_IPV6_ROUTE; - } - else - { - ip_ext = (void *) (ip3 + 1); - sr3->protocol = ip_ext->next_hdr; - ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; - } - - insert_pkts += 4; - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - { - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b1, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b2, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b3, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - } - - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, bi2, bi3, - next0, next1, next2, next3); - } - - /* Single loop for potentially the last three packets */ - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0 = 0; - ip6_sr_header_t *sr0 = 0; - ip6_sr_sl_t *sl0; - u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; - u16 new_l0 = 0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - sl0 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl0->rewrite)); - - ip0 = vlib_buffer_get_current (b0); - - if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) - sr0 = - (ip6_sr_header_t *) (((void *) (ip0 + 1)) + - ip6_ext_header_len (ip0 + 1)); - else - sr0 = (ip6_sr_header_t *) (ip0 + 1); - - clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0, - (void *) sr0 - (void *) ip0); - clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite, - vec_len (sl0->rewrite)); - - vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); - - ip0 = ((void *) ip0) - vec_len (sl0->rewrite); - ip0->hop_limit -= 1; - new_l0 = - clib_net_to_host_u16 (ip0->payload_length) + - vec_len (sl0->rewrite); - ip0->payload_length = clib_host_to_net_u16 (new_l0); - - sr0 = ((void *) sr0) - vec_len (sl0->rewrite); - sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0]; - sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1]; - - ip0->dst_address.as_u64[0] = - (sr0->segments + sr0->segments_left)->as_u64[0]; - ip0->dst_address.as_u64[1] = - (sr0->segments + sr0->segments_left)->as_u64[1]; - - if (ip0 + 1 == (void *) sr0) - { - sr0->protocol = ip0->protocol; - ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; - } - else - { - ip6_ext_header_t *ip_ext = (void *) (ip0 + 1); - sr0->protocol = ip_ext->next_hdr; - ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; - } - - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && - PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - insert_pkts++; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - /* Update counters */ - vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, - SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, - insert_pkts); - vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, - SR_POLICY_REWRITE_ERROR_COUNTER_BSID, - bsid_pkts); - return from_frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = { - .function = sr_policy_rewrite_insert, - .name = "sr-pl-rewrite-insert", - .vector_size = sizeof (u32), - .format_trace = format_sr_policy_rewrite_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = SR_POLICY_REWRITE_N_ERROR, - .error_strings = sr_policy_rewrite_error_strings, - .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, - foreach_sr_policy_rewrite_next -#undef _ - }, -}; -/* *INDENT-ON* */ - -/** - * @brief Graph node for applying a SR policy into a packet. BSID - SRH insertion. - */ -static uword -sr_policy_rewrite_b_insert (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - ip6_sr_main_t *sm = &sr_main; - u32 n_left_from, next_index, *from, *to_next; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - int insert_pkts = 0, bsid_pkts = 0; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Quad - Loop */ - while (n_left_from >= 8 && n_left_to_next >= 4) - { - u32 bi0, bi1, bi2, bi3; - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 next0, next1, next2, next3; - next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; - ip6_header_t *ip0, *ip1, *ip2, *ip3; - ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; - ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; - u16 new_l0, new_l1, new_l2, new_l3; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p4, *p5, *p6, *p7; - - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - p6 = vlib_get_buffer (vm, from[6]); - p7 = vlib_get_buffer (vm, from[7]); - - /* Prefetch the buffer header and packet for the N+2 loop iteration */ - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - vlib_prefetch_buffer_header (p6, LOAD); - vlib_prefetch_buffer_header (p7, LOAD); - - CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - to_next[2] = bi2 = from[2]; - to_next[3] = bi3 = from[3]; - from += 4; - to_next += 4; - n_left_from -= 4; - n_left_to_next -= 4; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - b2 = vlib_get_buffer (vm, bi2); - b3 = vlib_get_buffer (vm, bi3); - - sl0 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - sl1 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b1)->ip.adj_index[VLIB_TX]); - sl2 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b2)->ip.adj_index[VLIB_TX]); - sl3 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b3)->ip.adj_index[VLIB_TX]); - ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl0->rewrite_bsid)); - ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl1->rewrite_bsid)); - ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl2->rewrite_bsid)); - ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl3->rewrite_bsid)); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - ip2 = vlib_buffer_get_current (b2); - ip3 = vlib_buffer_get_current (b3); - - if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) - sr0 = - (ip6_sr_header_t *) (((void *) (ip0 + 1)) + - ip6_ext_header_len (ip0 + 1)); - else - sr0 = (ip6_sr_header_t *) (ip0 + 1); - - if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) - sr1 = - (ip6_sr_header_t *) (((void *) (ip1 + 1)) + - ip6_ext_header_len (ip1 + 1)); - else - sr1 = (ip6_sr_header_t *) (ip1 + 1); - - if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) - sr2 = - (ip6_sr_header_t *) (((void *) (ip2 + 1)) + - ip6_ext_header_len (ip2 + 1)); - else - sr2 = (ip6_sr_header_t *) (ip2 + 1); - - if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) - sr3 = - (ip6_sr_header_t *) (((void *) (ip3 + 1)) + - ip6_ext_header_len (ip3 + 1)); - else - sr3 = (ip6_sr_header_t *) (ip3 + 1); - - clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0, - (void *) sr0 - (void *) ip0); - clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite_bsid), (u8 *) ip1, - (void *) sr1 - (void *) ip1); - clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite_bsid), (u8 *) ip2, - (void *) sr2 - (void *) ip2); - clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite_bsid), (u8 *) ip3, - (void *) sr3 - (void *) ip3); - - clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)), - sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid)); - clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite_bsid)), - sl1->rewrite_bsid, vec_len (sl1->rewrite_bsid)); - clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite_bsid)), - sl2->rewrite_bsid, vec_len (sl2->rewrite_bsid)); - clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite_bsid)), - sl3->rewrite_bsid, vec_len (sl3->rewrite_bsid)); - - vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid)); - vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite_bsid)); - vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite_bsid)); - vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite_bsid)); - - ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid); - ip1 = ((void *) ip1) - vec_len (sl1->rewrite_bsid); - ip2 = ((void *) ip2) - vec_len (sl2->rewrite_bsid); - ip3 = ((void *) ip3) - vec_len (sl3->rewrite_bsid); - - ip0->hop_limit -= 1; - ip1->hop_limit -= 1; - ip2->hop_limit -= 1; - ip3->hop_limit -= 1; - - new_l0 = - clib_net_to_host_u16 (ip0->payload_length) + - vec_len (sl0->rewrite_bsid); - new_l1 = - clib_net_to_host_u16 (ip1->payload_length) + - vec_len (sl1->rewrite_bsid); - new_l2 = - clib_net_to_host_u16 (ip2->payload_length) + - vec_len (sl2->rewrite_bsid); - new_l3 = - clib_net_to_host_u16 (ip3->payload_length) + - vec_len (sl3->rewrite_bsid); - - ip0->payload_length = clib_host_to_net_u16 (new_l0); - ip1->payload_length = clib_host_to_net_u16 (new_l1); - ip2->payload_length = clib_host_to_net_u16 (new_l2); - ip3->payload_length = clib_host_to_net_u16 (new_l3); - - sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid); - sr1 = ((void *) sr1) - vec_len (sl1->rewrite_bsid); - sr2 = ((void *) sr2) - vec_len (sl2->rewrite_bsid); - sr3 = ((void *) sr3) - vec_len (sl3->rewrite_bsid); - - ip0->dst_address.as_u64[0] = - (sr0->segments + sr0->segments_left)->as_u64[0]; - ip0->dst_address.as_u64[1] = - (sr0->segments + sr0->segments_left)->as_u64[1]; - ip1->dst_address.as_u64[0] = - (sr1->segments + sr1->segments_left)->as_u64[0]; - ip1->dst_address.as_u64[1] = - (sr1->segments + sr1->segments_left)->as_u64[1]; - ip2->dst_address.as_u64[0] = - (sr2->segments + sr2->segments_left)->as_u64[0]; - ip2->dst_address.as_u64[1] = - (sr2->segments + sr2->segments_left)->as_u64[1]; - ip3->dst_address.as_u64[0] = - (sr3->segments + sr3->segments_left)->as_u64[0]; - ip3->dst_address.as_u64[1] = - (sr3->segments + sr3->segments_left)->as_u64[1]; - - ip6_ext_header_t *ip_ext; - if (ip0 + 1 == (void *) sr0) - { - sr0->protocol = ip0->protocol; - ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; - } - else - { - ip_ext = (void *) (ip0 + 1); - sr0->protocol = ip_ext->next_hdr; - ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; - } - - if (ip1 + 1 == (void *) sr1) - { - sr1->protocol = ip1->protocol; - ip1->protocol = IP_PROTOCOL_IPV6_ROUTE; - } - else - { - ip_ext = (void *) (ip2 + 1); - sr2->protocol = ip_ext->next_hdr; - ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; - } - - if (ip2 + 1 == (void *) sr2) - { - sr2->protocol = ip2->protocol; - ip2->protocol = IP_PROTOCOL_IPV6_ROUTE; - } - else - { - ip_ext = (void *) (ip2 + 1); - sr2->protocol = ip_ext->next_hdr; - ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; - } - - if (ip3 + 1 == (void *) sr3) - { - sr3->protocol = ip3->protocol; - ip3->protocol = IP_PROTOCOL_IPV6_ROUTE; - } - else - { - ip_ext = (void *) (ip3 + 1); - sr3->protocol = ip_ext->next_hdr; - ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; - } - - insert_pkts += 4; - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - { - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b1, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b2, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b3, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - } - - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, bi2, bi3, - next0, next1, next2, next3); - } - - /* Single loop for potentially the last three packets */ - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0 = 0; - ip6_sr_header_t *sr0 = 0; - ip6_sr_sl_t *sl0; - u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; - u16 new_l0 = 0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - sl0 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl0->rewrite_bsid)); - - ip0 = vlib_buffer_get_current (b0); - - if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) - sr0 = - (ip6_sr_header_t *) (((void *) (ip0 + 1)) + - ip6_ext_header_len (ip0 + 1)); - else - sr0 = (ip6_sr_header_t *) (ip0 + 1); - - clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0, - (void *) sr0 - (void *) ip0); - clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)), - sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid)); - - vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid)); - - ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid); - ip0->hop_limit -= 1; - new_l0 = - clib_net_to_host_u16 (ip0->payload_length) + - vec_len (sl0->rewrite_bsid); - ip0->payload_length = clib_host_to_net_u16 (new_l0); - - sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid); - - ip0->dst_address.as_u64[0] = - (sr0->segments + sr0->segments_left)->as_u64[0]; - ip0->dst_address.as_u64[1] = - (sr0->segments + sr0->segments_left)->as_u64[1]; - - if (ip0 + 1 == (void *) sr0) - { - sr0->protocol = ip0->protocol; - ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; - } - else - { - ip6_ext_header_t *ip_ext = (void *) (ip0 + 1); - sr0->protocol = ip_ext->next_hdr; - ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; - } - - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && - PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - insert_pkts++; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - /* Update counters */ - vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, - SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, - insert_pkts); - vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, - SR_POLICY_REWRITE_ERROR_COUNTER_BSID, - bsid_pkts); - return from_frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = { - .function = sr_policy_rewrite_b_insert, - .name = "sr-pl-rewrite-b-insert", - .vector_size = sizeof (u32), - .format_trace = format_sr_policy_rewrite_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = SR_POLICY_REWRITE_N_ERROR, - .error_strings = sr_policy_rewrite_error_strings, - .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, - foreach_sr_policy_rewrite_next -#undef _ - }, -}; -/* *INDENT-ON* */ - -/** - * @brief Function BSID encapsulation - */ -static_always_inline void -end_bsid_encaps_srh_processing (vlib_node_runtime_t * node, - vlib_buffer_t * b0, - ip6_header_t * ip0, - ip6_sr_header_t * sr0, u32 * next0) -{ - ip6_address_t *new_dst0; - - if (PREDICT_FALSE (!sr0)) - goto error_bsid_encaps; - - if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR)) - { - if (PREDICT_TRUE (sr0->segments_left != 0)) - { - sr0->segments_left -= 1; - new_dst0 = (ip6_address_t *) (sr0->segments); - new_dst0 += sr0->segments_left; - ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; - ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; - return; - } - } - -error_bsid_encaps: - *next0 = SR_POLICY_REWRITE_NEXT_ERROR; - b0->error = node->errors[SR_POLICY_REWRITE_ERROR_BSID_ZERO]; -} - -/** - * @brief Graph node for applying a SR policy BSID - Encapsulation - */ -static uword -sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - ip6_sr_main_t *sm = &sr_main; - u32 n_left_from, next_index, *from, *to_next; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - int encap_pkts = 0, bsid_pkts = 0; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Quad - Loop */ - while (n_left_from >= 8 && n_left_to_next >= 4) - { - u32 bi0, bi1, bi2, bi3; - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 next0, next1, next2, next3; - next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; - ip6_header_t *ip0, *ip1, *ip2, *ip3; - ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap; - ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; - ip6_ext_header_t *prev0, *prev1, *prev2, *prev3; - ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p4, *p5, *p6, *p7; - - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - p6 = vlib_get_buffer (vm, from[6]); - p7 = vlib_get_buffer (vm, from[7]); - - /* Prefetch the buffer header and packet for the N+2 loop iteration */ - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - vlib_prefetch_buffer_header (p6, LOAD); - vlib_prefetch_buffer_header (p7, LOAD); - - CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - to_next[2] = bi2 = from[2]; - to_next[3] = bi3 = from[3]; - from += 4; - to_next += 4; - n_left_from -= 4; - n_left_to_next -= 4; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - b2 = vlib_get_buffer (vm, bi2); - b3 = vlib_get_buffer (vm, bi3); - - sl0 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - sl1 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b1)->ip.adj_index[VLIB_TX]); - sl2 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b2)->ip.adj_index[VLIB_TX]); - sl3 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b3)->ip.adj_index[VLIB_TX]); - ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl0->rewrite)); - ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl1->rewrite)); - ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl2->rewrite)); - ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl3->rewrite)); - - ip0_encap = vlib_buffer_get_current (b0); - ip1_encap = vlib_buffer_get_current (b1); - ip2_encap = vlib_buffer_get_current (b2); - ip3_encap = vlib_buffer_get_current (b3); - - ip6_ext_header_find_t (ip0_encap, prev0, sr0, - IP_PROTOCOL_IPV6_ROUTE); - ip6_ext_header_find_t (ip1_encap, prev1, sr1, - IP_PROTOCOL_IPV6_ROUTE); - ip6_ext_header_find_t (ip2_encap, prev2, sr2, - IP_PROTOCOL_IPV6_ROUTE); - ip6_ext_header_find_t (ip3_encap, prev3, sr3, - IP_PROTOCOL_IPV6_ROUTE); - - end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0); - end_bsid_encaps_srh_processing (node, b1, ip1_encap, sr1, &next1); - end_bsid_encaps_srh_processing (node, b2, ip2_encap, sr2, &next2); - end_bsid_encaps_srh_processing (node, b3, ip3_encap, sr3, &next3); - - clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), - sl0->rewrite, vec_len (sl0->rewrite)); - clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite), - sl1->rewrite, vec_len (sl1->rewrite)); - clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite), - sl2->rewrite, vec_len (sl2->rewrite)); - clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite), - sl3->rewrite, vec_len (sl3->rewrite)); - - vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); - vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); - vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); - vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - ip2 = vlib_buffer_get_current (b2); - ip3 = vlib_buffer_get_current (b3); - - encaps_processing_v6 (node, b0, ip0, ip0_encap); - encaps_processing_v6 (node, b1, ip1, ip1_encap); - encaps_processing_v6 (node, b2, ip2, ip2_encap); - encaps_processing_v6 (node, b3, ip3, ip3_encap); - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - { - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b1, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b2, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b3, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - } - - encap_pkts += 4; - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, bi2, bi3, - next0, next1, next2, next3); - } - - /* Single loop for potentially the last three packets */ - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0 = 0, *ip0_encap = 0; - ip6_ext_header_t *prev0; - ip6_sr_header_t *sr0; - ip6_sr_sl_t *sl0; - u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - b0 = vlib_get_buffer (vm, bi0); - - sl0 = - pool_elt_at_index (sm->sid_lists, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= - vec_len (sl0->rewrite)); - - ip0_encap = vlib_buffer_get_current (b0); - ip6_ext_header_find_t (ip0_encap, prev0, sr0, - IP_PROTOCOL_IPV6_ROUTE); - end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0); - - clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), - sl0->rewrite, vec_len (sl0->rewrite)); - vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); - - ip0 = vlib_buffer_get_current (b0); - - encaps_processing_v6 (node, b0, ip0, ip0_encap); - - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && - PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_policy_rewrite_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - - encap_pkts++; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - /* Update counters */ - vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, - SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, - encap_pkts); - vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, - SR_POLICY_REWRITE_ERROR_COUNTER_BSID, - bsid_pkts); - - return from_frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = { - .function = sr_policy_rewrite_b_encaps, - .name = "sr-pl-rewrite-b-encaps", - .vector_size = sizeof (u32), - .format_trace = format_sr_policy_rewrite_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = SR_POLICY_REWRITE_N_ERROR, - .error_strings = sr_policy_rewrite_error_strings, - .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, - foreach_sr_policy_rewrite_next -#undef _ - }, -}; -/* *INDENT-ON* */ - -/*************************** SR Segment Lists DPOs ****************************/ -static u8 * -format_sr_segment_list_dpo (u8 * s, va_list * args) -{ - ip6_sr_main_t *sm = &sr_main; - ip6_address_t *addr; - ip6_sr_sl_t *sl; - - index_t index = va_arg (*args, index_t); - CLIB_UNUSED (u32 indent) = va_arg (*args, u32); - s = format (s, "SR: Segment List index:[%d]", index); - s = format (s, "\n\tSegments:"); - - sl = pool_elt_at_index (sm->sid_lists, index); - - s = format (s, "< "); - vec_foreach (addr, sl->segments) - { - s = format (s, "%U, ", format_ip6_address, addr); - } - s = format (s, "\b\b > - "); - s = format (s, "Weight: %u", sl->weight); - - return s; -} - -const static dpo_vft_t sr_policy_rewrite_vft = { - .dv_lock = sr_dpo_lock, - .dv_unlock = sr_dpo_unlock, - .dv_format = format_sr_segment_list_dpo, -}; - -const static char *const sr_pr_encaps_ip6_nodes[] = { - "sr-pl-rewrite-encaps", - NULL, -}; - -const static char *const sr_pr_encaps_ip4_nodes[] = { - "sr-pl-rewrite-encaps-v4", - NULL, -}; - -const static char *const *const sr_pr_encaps_nodes[DPO_PROTO_NUM] = { - [DPO_PROTO_IP6] = sr_pr_encaps_ip6_nodes, - [DPO_PROTO_IP4] = sr_pr_encaps_ip4_nodes, -}; - -const static char *const sr_pr_insert_ip6_nodes[] = { - "sr-pl-rewrite-insert", - NULL, -}; - -const static char *const *const sr_pr_insert_nodes[DPO_PROTO_NUM] = { - [DPO_PROTO_IP6] = sr_pr_insert_ip6_nodes, -}; - -const static char *const sr_pr_bsid_insert_ip6_nodes[] = { - "sr-pl-rewrite-b-insert", - NULL, -}; - -const static char *const *const sr_pr_bsid_insert_nodes[DPO_PROTO_NUM] = { - [DPO_PROTO_IP6] = sr_pr_bsid_insert_ip6_nodes, -}; - -const static char *const sr_pr_bsid_encaps_ip6_nodes[] = { - "sr-pl-rewrite-b-encaps", - NULL, -}; - -const static char *const *const sr_pr_bsid_encaps_nodes[DPO_PROTO_NUM] = { - [DPO_PROTO_IP6] = sr_pr_bsid_encaps_ip6_nodes, -}; - -/********************* SR Policy Rewrite initialization ***********************/ -/** - * @brief SR Policy Rewrite initialization - */ -clib_error_t * -sr_policy_rewrite_init (vlib_main_t * vm) -{ - ip6_sr_main_t *sm = &sr_main; - - /* Init memory for sr policy keys (bsid <-> ip6_address_t) */ - mhash_init (&sm->sr_policies_index_hash, sizeof (uword), - sizeof (ip6_address_t)); - - /* Init SR VPO DPOs type */ - sr_pr_encaps_dpo_type = - dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_encaps_nodes); - - sr_pr_insert_dpo_type = - dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_insert_nodes); - - sr_pr_bsid_encaps_dpo_type = - dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_encaps_nodes); - - sr_pr_bsid_insert_dpo_type = - dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_insert_nodes); - - /* Register the L2 encaps node used in HW redirect */ - sm->l2_sr_policy_rewrite_index = sr_policy_rewrite_encaps_node.index; - - sm->fib_table_ip6 = (u32) ~ 0; - sm->fib_table_ip4 = (u32) ~ 0; - - return 0; -} - -VLIB_INIT_FUNCTION (sr_policy_rewrite_init); - - -/* -* fd.io coding-style-patch-verification: ON -* -* Local Variables: -* eval: (c-set-style "gnu") -* End: -*/ diff --git a/src/vnet/sr/sr_steering.c b/src/vnet/sr/sr_steering.c deleted file mode 100755 index 04646198..00000000 --- a/src/vnet/sr/sr_steering.c +++ /dev/null @@ -1,573 +0,0 @@ -/* - * sr_steering.c: ipv6 segment routing steering into SR policy - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file - * @brief Packet steering into SR Policies - * - * This file is in charge of handling the FIB appropiatly to steer packets - * through SR Policies as defined in 'sr_policy_rewrite.c'. Notice that here - * we are only doing steering. SR policy application is done in - * sr_policy_rewrite.c - * - * Supports: - * - Steering of IPv6 traffic Destination Address based - * - Steering of IPv4 traffic Destination Address based - * - Steering of L2 frames, interface based (sw interface) - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -/** - * @brief Steer traffic L2 and L3 traffic through a given SR policy - * - * @param is_del - * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index) - * @param sr_policy is the index of the SR Policy (alt to bsid) - * @param table_id is the VRF where to install the FIB entry for the BSID - * @param prefix is the IPv4/v6 address for L3 traffic type - * @param mask_width is the mask for L3 traffic type - * @param sw_if_index is the incoming interface for L2 traffic - * @param traffic_type describes the type of traffic - * - * @return 0 if correct, else error - */ -int -sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index, - u32 table_id, ip46_address_t * prefix, u32 mask_width, - u32 sw_if_index, u8 traffic_type) -{ - ip6_sr_main_t *sm = &sr_main; - sr_steering_key_t key; - ip6_sr_steering_policy_t *steer_pl; - fib_prefix_t pfx = { 0 }; - - ip6_sr_policy_t *sr_policy = 0; - uword *p = 0; - - memset (&key, 0, sizeof (sr_steering_key_t)); - - /* Compute the steer policy key */ - if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6) - { - key.l3.prefix.as_u64[0] = prefix->as_u64[0]; - key.l3.prefix.as_u64[1] = prefix->as_u64[1]; - key.l3.mask_width = mask_width; - key.l3.fib_table = (table_id != (u32) ~ 0 ? table_id : 0); - } - else if (traffic_type == SR_STEER_L2) - { - key.l2.sw_if_index = sw_if_index; - - /* Sanitise the SW_IF_INDEX */ - if (pool_is_free_index (sm->vnet_main->interface_main.sw_interfaces, - sw_if_index)) - return -3; - - vnet_sw_interface_t *sw = - vnet_get_sw_interface (sm->vnet_main, sw_if_index); - if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE) - return -3; - } - else - return -1; - - key.traffic_type = traffic_type; - - /* Search for the item */ - p = mhash_get (&sm->sr_steer_policies_hash, &key); - - if (p) - { - /* Retrieve Steer Policy function */ - steer_pl = pool_elt_at_index (sm->steer_policies, p[0]); - - if (is_del) - { - if (steer_pl->classify.traffic_type == SR_STEER_IPV6) - { - /* Remove FIB entry */ - pfx.fp_proto = FIB_PROTOCOL_IP6; - pfx.fp_len = steer_pl->classify.l3.mask_width; - pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6; - - fib_table_entry_delete (fib_table_find - (FIB_PROTOCOL_IP6, - steer_pl->classify.l3.fib_table), - &pfx, FIB_SOURCE_SR); - } - else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) - { - /* Remove FIB entry */ - pfx.fp_proto = FIB_PROTOCOL_IP4; - pfx.fp_len = steer_pl->classify.l3.mask_width; - pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4; - - fib_table_entry_delete (fib_table_find - (FIB_PROTOCOL_IP4, - steer_pl->classify.l3.fib_table), &pfx, - FIB_SOURCE_SR); - } - else if (steer_pl->classify.traffic_type == SR_STEER_L2) - { - /* Remove HW redirection */ - vnet_feature_enable_disable ("device-input", - "sr-policy-rewrite-encaps-l2", - sw_if_index, 0, 0, 0); - sm->sw_iface_sr_policies[sw_if_index] = ~(u32) 0; - - /* Remove promiscous mode from interface */ - vnet_main_t *vnm = vnet_get_main (); - ethernet_main_t *em = ðernet_main; - ethernet_interface_t *eif = - ethernet_get_interface (em, sw_if_index); - - if (!eif) - goto cleanup_error_redirection; - - ethernet_set_flags (vnm, sw_if_index, 0); - } - - /* Delete SR steering policy entry */ - pool_put (sm->steer_policies, steer_pl); - mhash_unset (&sm->sr_steer_policies_hash, &key, NULL); - - /* If no more SR policies or steering policies */ - if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies)) - { - fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6); - fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6); - sm->fib_table_ip6 = (u32) ~ 0; - sm->fib_table_ip4 = (u32) ~ 0; - } - - return 1; - } - else /* It means user requested to update an existing SR steering policy */ - { - /* Retrieve SR steering policy */ - if (bsid) - { - p = mhash_get (&sm->sr_policies_index_hash, bsid); - if (p) - sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); - else - return -2; - } - else - sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index); - - if (!sr_policy) - return -2; - - steer_pl->sr_policy = sr_policy - sm->sr_policies; - - /* Remove old FIB/hw redirection and create a new one */ - if (steer_pl->classify.traffic_type == SR_STEER_IPV6) - { - /* Remove FIB entry */ - pfx.fp_proto = FIB_PROTOCOL_IP6; - pfx.fp_len = steer_pl->classify.l3.mask_width; - pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6; - - fib_table_entry_delete (fib_table_find - (FIB_PROTOCOL_IP6, - steer_pl->classify.l3.fib_table), - &pfx, FIB_SOURCE_SR); - - /* Create a new one */ - goto update_fib; - } - else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) - { - /* Remove FIB entry */ - pfx.fp_proto = FIB_PROTOCOL_IP4; - pfx.fp_len = steer_pl->classify.l3.mask_width; - pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4; - - fib_table_entry_delete (fib_table_find - (FIB_PROTOCOL_IP4, - steer_pl->classify.l3.fib_table), - &pfx, FIB_SOURCE_SR); - - /* Create a new one */ - goto update_fib; - } - else if (steer_pl->classify.traffic_type == SR_STEER_L2) - { - /* Update L2-HW redirection */ - goto update_fib; - } - } - } - else - /* delete; steering policy does not exist; complain */ - if (is_del) - return -4; - - /* Retrieve SR policy */ - if (bsid) - { - p = mhash_get (&sm->sr_policies_index_hash, bsid); - if (p) - sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); - else - return -2; - } - else - sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index); - - /* Create a new steering policy */ - pool_get (sm->steer_policies, steer_pl); - memset (steer_pl, 0, sizeof (*steer_pl)); - - if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6) - { - clib_memcpy (&steer_pl->classify.l3.prefix, prefix, - sizeof (ip46_address_t)); - steer_pl->classify.l3.mask_width = mask_width; - steer_pl->classify.l3.fib_table = - (table_id != (u32) ~ 0 ? table_id : 0); - steer_pl->classify.traffic_type = traffic_type; - } - else if (traffic_type == SR_STEER_L2) - { - steer_pl->classify.l2.sw_if_index = sw_if_index; - steer_pl->classify.traffic_type = traffic_type; - } - else - { - /* Incorrect API usage. Should never get here */ - pool_put (sm->steer_policies, steer_pl); - mhash_unset (&sm->sr_steer_policies_hash, &key, NULL); - return -1; - } - steer_pl->sr_policy = sr_policy - sm->sr_policies; - - /* Create and store key */ - mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies, - NULL); - - if (traffic_type == SR_STEER_L2) - { - if (!sr_policy->is_encap) - goto cleanup_error_encap; - - if (vnet_feature_enable_disable - ("device-input", "sr-pl-rewrite-encaps-l2", sw_if_index, 1, 0, 0)) - goto cleanup_error_redirection; - - /* Set promiscous mode on interface */ - vnet_main_t *vnm = vnet_get_main (); - ethernet_main_t *em = ðernet_main; - ethernet_interface_t *eif = ethernet_get_interface (em, sw_if_index); - - if (!eif) - goto cleanup_error_redirection; - - ethernet_set_flags (vnm, sw_if_index, - ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); - } - else if (traffic_type == SR_STEER_IPV4) - if (!sr_policy->is_encap) - goto cleanup_error_encap; - -update_fib: - /* FIB API calls - Recursive route through the BindingSID */ - if (traffic_type == SR_STEER_IPV6) - { - pfx.fp_proto = FIB_PROTOCOL_IP6; - pfx.fp_len = steer_pl->classify.l3.mask_width; - pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6; - - fib_table_entry_path_add (fib_table_find (FIB_PROTOCOL_IP6, - (table_id != - (u32) ~ 0 ? - table_id : 0)), - &pfx, FIB_SOURCE_SR, - FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, - FIB_PROTOCOL_IP6, - (ip46_address_t *) & sr_policy->bsid, ~0, - sm->fib_table_ip6, 1, NULL, - FIB_ROUTE_PATH_FLAG_NONE); - } - else if (traffic_type == SR_STEER_IPV4) - { - pfx.fp_proto = FIB_PROTOCOL_IP4; - pfx.fp_len = steer_pl->classify.l3.mask_width; - pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4; - - fib_table_entry_path_add (fib_table_find (FIB_PROTOCOL_IP4, - (table_id != - (u32) ~ 0 ? - table_id : 0)), - &pfx, FIB_SOURCE_SR, - FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, - FIB_PROTOCOL_IP6, - (ip46_address_t *) & sr_policy->bsid, ~0, - sm->fib_table_ip4, 1, NULL, - FIB_ROUTE_PATH_FLAG_NONE); - } - else if (traffic_type == SR_STEER_L2) - { - if (sw_if_index < vec_len (sm->sw_iface_sr_policies)) - sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy; - else - { - vec_resize (sm->sw_iface_sr_policies, - (pool_len (sm->vnet_main->interface_main.sw_interfaces) - - vec_len (sm->sw_iface_sr_policies))); - sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy; - } - } - - return 0; - -cleanup_error_encap: - pool_put (sm->steer_policies, steer_pl); - mhash_unset (&sm->sr_steer_policies_hash, &key, NULL); - return -5; - -cleanup_error_redirection: - pool_put (sm->steer_policies, steer_pl); - mhash_unset (&sm->sr_steer_policies_hash, &key, NULL); - return -3; -} - -static clib_error_t * -sr_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vnet_main_t *vnm = vnet_get_main (); - - int is_del = 0; - - ip46_address_t prefix; - u32 dst_mask_width = 0; - u32 sw_if_index = (u32) ~ 0; - u8 traffic_type = 0; - u32 fib_table = (u32) ~ 0; - - ip6_address_t bsid; - u32 sr_policy_index = (u32) ~ 0; - - u8 sr_policy_set = 0; - - memset (&prefix, 0, sizeof (ip46_address_t)); - - int rv; - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "del")) - is_del = 1; - else if (!traffic_type - && unformat (input, "l3 %U/%d", unformat_ip6_address, - &prefix.ip6, &dst_mask_width)) - traffic_type = SR_STEER_IPV6; - else if (!traffic_type - && unformat (input, "l3 %U/%d", unformat_ip4_address, - &prefix.ip4, &dst_mask_width)) - traffic_type = SR_STEER_IPV4; - else if (!traffic_type - && unformat (input, "l2 %U", unformat_vnet_sw_interface, vnm, - &sw_if_index)) - traffic_type = SR_STEER_L2; - else if (!sr_policy_set - && unformat (input, "via sr policy index %d", - &sr_policy_index)) - sr_policy_set = 1; - else if (!sr_policy_set - && unformat (input, "via sr policy bsid %U", - unformat_ip6_address, &bsid)) - sr_policy_set = 1; - else if (fib_table == (u32) ~ 0 - && unformat (input, "fib-table %d", &fib_table)); - else - break; - } - - if (!traffic_type) - return clib_error_return (0, "No L2/L3 traffic specified"); - if (!sr_policy_set) - return clib_error_return (0, "No SR policy specified"); - - /* Make sure that the prefixes are clean */ - if (traffic_type == SR_STEER_IPV4) - { - u32 mask = - (dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0); - prefix.ip4.as_u32 &= mask; - } - else if (traffic_type == SR_STEER_IPV6) - { - ip6_address_t mask; - ip6_address_mask_from_width (&mask, dst_mask_width); - ip6_address_mask (&prefix.ip6, &mask); - } - - rv = - sr_steering_policy (is_del, (sr_policy_index == ~(u32) 0 ? &bsid : NULL), - sr_policy_index, fib_table, &prefix, dst_mask_width, - sw_if_index, traffic_type); - - switch (rv) - { - case 0: - break; - case 1: - return 0; - case -1: - return clib_error_return (0, "Incorrect API usage."); - case -2: - return clib_error_return (0, - "The requested SR policy could not be located. Review the BSID/index."); - case -3: - return clib_error_return (0, - "Unable to do SW redirect. Incorrect interface."); - case -4: - return clib_error_return (0, - "The requested SR steering policy could not be deleted."); - case -5: - return clib_error_return (0, - "The SR policy is not an encapsulation one."); - default: - return clib_error_return (0, "BUG: sr steer policy returns %d", rv); - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (sr_steer_policy_command, static) = { - .path = "sr steer", - .short_help = "sr steer (del) [l3 |l2 ]" - "via sr policy [index |bsid ]" - "(fib-table )", - .long_help = - "\tSteer a L2 or L3 traffic through an existing SR policy.\n" - "\tExamples:\n" - "\t\tsr steer l3 2001::/64 via sr_policy index 5\n" - "\t\tsr steer l3 2001::/64 via sr_policy bsid 2010::9999:1\n" - "\t\tsr steer l2 GigabitEthernet0/5/0 via sr_policy index 5\n" - "\t\tsr steer del l3 2001::/64 via sr_policy index 5\n", - .function = sr_steer_policy_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -show_sr_steering_policies_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - ip6_sr_steering_policy_t **steer_policies = 0; - ip6_sr_steering_policy_t *steer_pl; - - vnet_main_t *vnm = vnet_get_main (); - - ip6_sr_policy_t *pl = 0; - int i; - - vlib_cli_output (vm, "SR steering policies:"); - /* *INDENT-OFF* */ - pool_foreach (steer_pl, sm->steer_policies, ({vec_add1(steer_policies, steer_pl);})); - /* *INDENT-ON* */ - vlib_cli_output (vm, "Traffic\t\tSR policy BSID"); - for (i = 0; i < vec_len (steer_policies); i++) - { - steer_pl = steer_policies[i]; - pl = pool_elt_at_index (sm->sr_policies, steer_pl->sr_policy); - if (steer_pl->classify.traffic_type == SR_STEER_L2) - { - vlib_cli_output (vm, "L2 %U\t%U", - format_vnet_sw_if_index_name, vnm, - steer_pl->classify.l2.sw_if_index, - format_ip6_address, &pl->bsid); - } - else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) - { - vlib_cli_output (vm, "L3 %U/%d\t%U", - format_ip4_address, - &steer_pl->classify.l3.prefix.ip4, - steer_pl->classify.l3.mask_width, - format_ip6_address, &pl->bsid); - } - else if (steer_pl->classify.traffic_type == SR_STEER_IPV6) - { - vlib_cli_output (vm, "L3 %U/%d\t%U", - format_ip6_address, - &steer_pl->classify.l3.prefix.ip6, - steer_pl->classify.l3.mask_width, - format_ip6_address, &pl->bsid); - } - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_sr_steering_policies_command, static) = { - .path = "show sr steering policies", - .short_help = "show sr steering policies", - .function = show_sr_steering_policies_command_fn, -}; -/* *INDENT-ON* */ - -clib_error_t * -sr_steering_init (vlib_main_t * vm) -{ - ip6_sr_main_t *sm = &sr_main; - - /* Init memory for function keys */ - mhash_init (&sm->sr_steer_policies_hash, sizeof (uword), - sizeof (sr_steering_key_t)); - - sm->sw_iface_sr_policies = 0; - - sm->vnet_main = vnet_get_main (); - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_INIT_FUNCTION (sr_steering_init); -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VNET_FEATURE_INIT (sr_pl_rewrite_encaps_l2, static) = -{ - .arc_name = "device-input", - .node_name = "sr-pl-rewrite-encaps-l2", - .runs_before = VNET_FEATURES ("ethernet-input"), -}; -/* *INDENT-ON* */ - -/* -* fd.io coding-style-patch-verification: ON -* -* Local Variables: -* eval: (c-set-style "gnu") -* End: -*/ diff --git a/src/vnet/sr/sr_steering.md b/src/vnet/sr/sr_steering.md deleted file mode 100644 index cf446f81..00000000 --- a/src/vnet/sr/sr_steering.md +++ /dev/null @@ -1,11 +0,0 @@ -# Steering packets into a SR Policy {#srv6_steering_doc} - -To steer packets in Transit into an SR policy (T.Insert, T.Encaps and T.Encaps.L2 behaviors), the user needs to create an 'sr steering policy'. - - sr steer l3 2001::/64 via sr policy index 1 - sr steer l3 2001::/64 via sr policy bsid cafe::1 - sr steer l3 2001::/64 via sr policy bsid cafe::1 fib-table 3 - sr steer l3 10.0.0.0/16 via sr policy bsid cafe::1 - sr steer l2 TenGE0/1/0 via sr policy bsid cafe::1 - -Disclaimer: The T.Encaps.L2 will steer L2 frames into an SR Policy. Notice that creating an SR steering policy for L2 frames will actually automatically *put the interface into promiscous mode*. diff --git a/src/vnet/srmpls/dir.dox b/src/vnet/srmpls/dir.dox new file mode 100755 index 00000000..76ec1d6a --- /dev/null +++ b/src/vnet/srmpls/dir.dox @@ -0,0 +1,22 @@ +/* + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + @dir + @brief Segment Routing MPLS code + + An implementation of Segment Routing for the MPLS dataplane. + +*/ \ No newline at end of file diff --git a/src/vnet/srmpls/sr.h b/src/vnet/srmpls/sr.h new file mode 100755 index 00000000..0e106697 --- /dev/null +++ b/src/vnet/srmpls/sr.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Segment Routing MPLS data structures definitions + * + */ + +#ifndef included_vnet_srmpls_h +#define included_vnet_srmpls_h + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* SR policy types */ +#define SR_POLICY_TYPE_DEFAULT 0 +#define SR_POLICY_TYPE_SPRAY 1 + +#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1 + +#define SR_STEER_IPV4 4 +#define SR_STEER_IPV6 6 + +/** + * @brief SR Segment List (SID list) + */ +typedef struct +{ + /** + * SIDs (key) + */ + mpls_label_t *segments; + + /** + * SID list weight (wECMP / UCMP) + */ + u32 weight; + +} mpls_sr_sl_t; + +typedef struct +{ + u32 *segments_lists; /**< Pool of SID lists indexes */ + + mpls_label_t bsid; /**< BindingSID (key) */ + + u8 type; /**< Type (default is 0) */ + /* SR Policy specific DPO */ + /* IF Type = DEFAULT Then Load Balancer DPO among SID lists */ + /* IF Type = SPRAY then Spray DPO with all SID lists */ + +} mpls_sr_policy_t; + +/** + * @brief Steering db key + * + * L3 is IPv4/IPv6 + mask + */ +typedef struct +{ + ip46_address_t prefix; /**< IP address of the prefix */ + u32 mask_width; /**< Mask width of the prefix */ + u32 fib_table; /**< VRF of the prefix */ + u8 traffic_type; /**< Traffic type (IPv4, IPv6, L2) */ + u8 padding[3]; +} sr_mpls_steering_key_t; + +typedef struct +{ + sr_mpls_steering_key_t classify; /**< Traffic classification */ + u32 sr_policy; /**< SR Policy index */ +} mpls_sr_steering_policy_t; + +/** + * @brief Segment Routing main datastructure + */ +typedef struct +{ + /** + * SR SID lists + */ + mpls_sr_sl_t *sid_lists; + + /** + * SR MPLS policies + */ + mpls_sr_policy_t *sr_policies; + + /** + * Hash table mapping BindingSID to SR MPLS policy + */ + uword *sr_policies_index_hash; + + /** + * Pool of SR steer policies instances + */ + mpls_sr_steering_policy_t *steer_policies; + + /** + * MHash table mapping steering rules to SR steer instance + */ + mhash_t sr_steer_policies_hash; + + /** + * convenience + */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} mpls_sr_main_t; + +extern mpls_sr_main_t sr_mpls_main; + +extern int +sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments, + u8 behavior, u32 weight); + +extern int +sr_mpls_policy_mod (mpls_label_t bsid, u32 index, u8 operation, + mpls_label_t * segments, u32 sl_index, u32 weight); + +extern int sr_mpls_policy_del (mpls_label_t bsid, u32 index); + +#endif /* included_vnet_sr_mpls_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/srmpls/sr_doc.md b/src/vnet/srmpls/sr_doc.md new file mode 100644 index 00000000..d60592bb --- /dev/null +++ b/src/vnet/srmpls/sr_doc.md @@ -0,0 +1,87 @@ +# SR-MPLS: Segment Routing for MPLS {#srmpls_doc} + +This is a memo intended to contain documentation of the VPP SR-MPLS implementation. +Everything that is not directly obvious should come here. +For any feedback on content that should be explained please mailto:pcamaril@cisco.com + +## Segment Routing + +Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior. + +Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era. + +Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements. + +Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design. + +Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network. + +**The implementation of Segment Routing in VPP covers both the IPv6 data plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page contains the SR-MPLS documentation.** + +## Segment Routing terminology + +* SegmentID (SID): is an MPLS label. +* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse. +* SR Policy: is a set of candidate paths (SID list+weight). An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights. +* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with MPLS label corresponding to a BindingSID, then the SR policy will be applied to such packet. (BindingSID is popped first.) + +## SR-MPLS features in VPP + +The SR-MPLS implementation is focused on the SR policies, as well on its steering. Others SR-MPLS features, such as for example AdjSIDs, can be achieved using the regular VPP MPLS implementation. + +The Segment Routing Policy (*draft-filsfils-spring-segment-routing-policy*) defines SR Policies. + +## Creating a SR Policy + +An SR Policy is defined by a Binding SID and a weighted set of Segment Lists. + +A new SR policy is created with a first SID list using: + + sr mpls policy add bsid 40001 next 16001 next 16002 next 16003 (weight 5) + +* The weight parameter is only used if more than one SID list is associated with the policy. + +An SR policy is deleted with: + + sr mpls policy del bsid 40001 + +The existing SR policies are listed with: + + show sr mpls policies + +### Adding/Removing SID Lists from an SR policy + +An additional SID list is associated with an existing SR policy with: + + sr mpls policy mod bsid 40001 add sl next 16001 next 16002 next 16003 (weight 3) + +Conversely, a SID list can be removed from an SR policy with: + + sr mpls policy mod bsid 4001 del sl index 1 + +Note that this CLI cannot be used to remove the last SID list of a policy. Instead the SR policy delete CLI must be used. + +The weight of a SID list can also be modified with: + + sr mpls policy mod bsid 40001 mod sl index 1 weight 4 + sr mpls policy mod index 1 mod sl index 1 weight 4 + +### SR Policies: Spray policies + +Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them. + +SID list weights are ignored with this type of policies. + +A Spray policy is instantiated by appending the keyword **spray** to a regular SR-MPLS policy command, as in: + + sr mpls policy add bsid 40002 next 16001 next 16002 next 16003 spray + +Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node. + +## Steering packets into a SR Policy + +To steer packets in Transit into an SR policy, the user needs to create an 'sr steering policy'. + + sr mpls steer l3 2001::/64 via sr policy bsid 40001 + sr mpls steer l3 2001::/64 via sr policy bsid 40001 fib-table 3 + sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001 diff --git a/src/vnet/srmpls/sr_mpls_policy.c b/src/vnet/srmpls/sr_mpls_policy.c new file mode 100755 index 00000000..5ebbc60d --- /dev/null +++ b/src/vnet/srmpls/sr_mpls_policy.c @@ -0,0 +1,569 @@ +/* + * sr_mpls_policy.c: SR-MPLS policies + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief SR MPLS policy creation and application + * + * Create an SR policy. + * An SR policy can be either of 'default' type or 'spray' type + * An SR policy has attached a list of SID lists. + * In case the SR policy is a default one it will load balance among them. + * An SR policy has associated a BindingSID. + * In case any packet arrives with MPLS_label == BindingSID then the SR policy + * associated to such bindingSID will be applied to such packet. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +mpls_sr_main_t sr_mpls_main; + +/*************************** SR LB helper functions **************************/ +/** + * @brief Creates a Segment List and adds it to an SR policy + * + * Creates a Segment List and adds it to the SR policy. Notice that the SL are + * not necessarily unique. Hence there might be two Segment List within the + * same SR Policy with exactly the same segments and same weight. + * + * @param sr_policy is the SR policy where the SL will be added + * @param sl is a vector of IPv6 addresses composing the Segment List + * @param weight is the weight of the SegmentList (for load-balancing purposes) + * @param is_encap represents the mode (SRH insertion vs Encapsulation) + * + * @return pointer to the just created segment list + */ +static inline mpls_sr_sl_t * +create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_sl_t *segment_list; + + pool_get (sm->sid_lists, segment_list); + memset (segment_list, 0, sizeof (*segment_list)); + + vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists); + + /* Fill in segment list */ + segment_list->weight = + (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT); + segment_list->segments = vec_dup (sl); + + fib_route_path_t path = { + .frp_proto = FIB_PROTOCOL_MPLS, + .frp_sw_if_index = ~0, + .frp_fib_index = 0, + .frp_weight = segment_list->weight, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + .frp_label_stack = NULL, + .frp_local_label = sl[0], + }; + + vec_add (path.frp_label_stack, sl + 1, vec_len (sl) - 1); + + fib_route_path_t *paths = NULL; + vec_add1 (paths, path); + + mpls_eos_bit_t eos; + FOR_EACH_MPLS_EOS_BIT (eos) + { + /* *INDENT-OFF* */ + fib_prefix_t pfx = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = sr_policy->bsid, + .fp_eos = eos, + .fp_payload_proto = DPO_PROTO_MPLS, + }; + /* *INDENT-ON* */ + + fib_table_entry_path_add2 (0, + &pfx, + FIB_SOURCE_SR, + (sr_policy->type == SR_POLICY_TYPE_DEFAULT ? + FIB_ENTRY_FLAG_NONE : + FIB_ENTRY_FLAG_MULTICAST), paths); + } + + vec_free (paths); + + return segment_list; +} + +/******************************* SR rewrite API *******************************/ +/* Three functions for handling sr policies: + * -> sr_mpls_policy_add + * -> sr_mpls_policy_del + * -> sr_mpls_policy_mod + * All of them are API. CLI function on sr_policy_command_fn */ + +/** + * @brief Create a new SR policy + * + * @param bsid is the bindingSID of the SR Policy + * @param segments is a vector of MPLS labels composing the segment list + * @param behavior is the behavior of the SR policy. (default//spray) + * @param fib_table is the VRF where to install the FIB entry for the BSID + * @param weight is the weight of this specific SID list + * + * @return 0 if correct, else error + */ +int +sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments, + u8 behavior, u32 weight) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_policy_t *sr_policy = 0; + uword *p; + + /* Search for existing keys (BSID) */ + p = hash_get (sm->sr_policies_index_hash, bsid); + if (p) + { + /* Add SR policy that already exists; complain */ + return -12; + } + + /* Add an SR policy object */ + pool_get (sm->sr_policies, sr_policy); + memset (sr_policy, 0, sizeof (*sr_policy)); + sr_policy->bsid = bsid; + sr_policy->type = behavior; + + /* Copy the key */ + hash_set (sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies); + + /* Create a segment list and add the index to the SR policy */ + create_sl (sr_policy, segments, weight); + + return 0; +} + +/** + * @brief Delete a SR policy + * + * @param bsid is the bindingSID of the SR Policy + * @param index is the index of the SR policy + * + * @return 0 if correct, else error + */ +int +sr_mpls_policy_del (mpls_label_t bsid, u32 index) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_policy_t *sr_policy = 0; + mpls_sr_sl_t *segment_list; + mpls_eos_bit_t eos; + u32 *sl_index; + uword *p; + + if (bsid) + { + p = hash_get (sm->sr_policies_index_hash, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -1; + } + else + { + sr_policy = pool_elt_at_index (sm->sr_policies, index); + if (!sr_policy) + return -1; + } + + /* Clean SID Lists */ + vec_foreach (sl_index, sr_policy->segments_lists) + { + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + + fib_route_path_t path = { + .frp_proto = FIB_PROTOCOL_MPLS, + .frp_sw_if_index = ~0, + .frp_fib_index = 0, + .frp_weight = segment_list->weight, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + .frp_local_label = segment_list->segments[0], + }; + + fib_route_path_t *paths = NULL; + vec_add1 (paths, path); + + /* remove each of the MPLS routes */ + FOR_EACH_MPLS_EOS_BIT (eos) + { + /* *INDENT-OFF* */ + fib_prefix_t pfx = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = sr_policy->bsid, + .fp_eos = eos, + .fp_payload_proto = DPO_PROTO_MPLS, + }; + /* *INDENT-ON* */ + + fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths); + } + vec_free (paths); + vec_free (segment_list->segments); + pool_put_index (sm->sid_lists, *sl_index); + } + + /* Remove SR policy entry */ + hash_unset (sm->sr_policies_index_hash, sr_policy->bsid); + pool_put (sm->sr_policies, sr_policy); + + return 0; +} + +/** + * @brief Modify an existing SR policy + * + * The possible modifications are adding a new Segment List, modifying an + * existing Segment List (modify the weight only) and delete a given + * Segment List from the SR Policy. + * + * @param bsid is the bindingSID of the SR Policy + * @param index is the index of the SR policy + * @param fib_table is the VRF where to install the FIB entry for the BSID + * @param operation is the operation to perform (among the top ones) + * @param segments is a vector of IPv6 address composing the segment list + * @param sl_index is the index of the Segment List to modify/delete + * @param weight is the weight of the sid list. optional. + * + * @return 0 if correct, else error + */ +int +sr_mpls_policy_mod (mpls_label_t bsid, u32 index, u8 operation, + mpls_label_t * segments, u32 sl_index, u32 weight) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_policy_t *sr_policy = 0; + mpls_sr_sl_t *segment_list; + u32 *sl_index_iterate; + uword *p; + + if (bsid) + { + p = hash_get (sm->sr_policies_index_hash, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -1; + } + else + { + sr_policy = pool_elt_at_index (sm->sr_policies, index); + if (!sr_policy) + return -1; + } + + if (operation == 1) /* Add SR List to an existing SR policy */ + { + /* Create the new SL */ + segment_list = create_sl (sr_policy, segments, weight); + + } + else if (operation == 2) /* Delete SR List from an existing SR policy */ + { + /* Check that currently there are more than one SID list */ + if (vec_len (sr_policy->segments_lists) == 1) + return -21; + + /* Check that the SR list does exist and is assigned to the sr policy */ + vec_foreach (sl_index_iterate, sr_policy->segments_lists) + if (*sl_index_iterate == sl_index) + break; + + if (*sl_index_iterate != sl_index) + return -22; + + /* Remove the lucky SR list that is being kicked out */ + segment_list = pool_elt_at_index (sm->sid_lists, sl_index); + + mpls_eos_bit_t eos; + fib_route_path_t path = { + .frp_proto = FIB_PROTOCOL_MPLS, + .frp_sw_if_index = ~0, + .frp_fib_index = 0, + .frp_weight = segment_list->weight, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + .frp_local_label = segment_list->segments[0], + }; + + fib_route_path_t *paths = NULL; + vec_add1 (paths, path); + + FOR_EACH_MPLS_EOS_BIT (eos) + { + /* *INDENT-OFF* */ + fib_prefix_t pfx = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = sr_policy->bsid, + .fp_eos = eos, + .fp_payload_proto = DPO_PROTO_MPLS, + }; + /* *INDENT-ON* */ + + fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths); + } + + vec_free (paths); + vec_free (segment_list->segments); + pool_put_index (sm->sid_lists, sl_index); + vec_del1 (sr_policy->segments_lists, + sl_index_iterate - sr_policy->segments_lists); + } + else if (operation == 3) /* Modify the weight of an existing SR List */ + { + /* Find the corresponding SL */ + vec_foreach (sl_index_iterate, sr_policy->segments_lists) + if (*sl_index_iterate == sl_index) + break; + + if (*sl_index_iterate != sl_index) + return -32; + + /* Change the weight */ + segment_list = pool_elt_at_index (sm->sid_lists, sl_index); + segment_list->weight = weight; + + /* Update LB */ + //FIXME + } + return 0; +} + +/** + * @brief CLI for 'sr mpls policies' command family + */ +static clib_error_t * +sr_mpls_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int rv = -1; + char is_del = 0, is_add = 0, is_mod = 0; + char policy_set = 0; + mpls_label_t bsid, next_label; + u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0; + u32 weight = (u32) ~ 0; + mpls_label_t *segments = 0; + u8 operation = 0; + u8 is_spray = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (!is_add && !is_mod && !is_del && unformat (input, "add")) + is_add = 1; + else if (!is_add && !is_mod && !is_del && unformat (input, "del")) + is_del = 1; + else if (!is_add && !is_mod && !is_del && unformat (input, "mod")) + is_mod = 1; + else if (!policy_set + && unformat (input, "bsid %U", unformat_mpls_unicast_label, + &bsid)) + policy_set = 1; + else if (!is_add && !policy_set + && unformat (input, "index %d", &sr_policy_index)) + policy_set = 1; + else if (unformat (input, "weight %d", &weight)); + else + if (unformat + (input, "next %U", unformat_mpls_unicast_label, &next_label)) + { + vec_add (segments, &next_label, 1); + } + else if (unformat (input, "add sl")) + operation = 1; + else if (unformat (input, "del sl index %d", &sl_index)) + operation = 2; + else if (unformat (input, "mod sl index %d", &sl_index)) + operation = 3; + else if (unformat (input, "spray")) + is_spray = 1; + else + break; + } + + if (!is_add && !is_mod && !is_del) + return clib_error_return (0, "Incorrect CLI"); + + if (!policy_set) + return clib_error_return (0, "No SR policy BSID or index specified"); + + if (is_add) + { + if (vec_len (segments) == 0) + return clib_error_return (0, "No Segment List specified"); + + rv = sr_mpls_policy_add (bsid, segments, + (is_spray ? SR_POLICY_TYPE_SPRAY : + SR_POLICY_TYPE_DEFAULT), weight); + } + else if (is_del) + rv = + sr_mpls_policy_del ((sr_policy_index != (u32) ~ 0 ? (u32) ~ 0 : bsid), + sr_policy_index); + else if (is_mod) + { + if (!operation) + return clib_error_return (0, "No SL modification specified"); + if (operation != 1 && sl_index == (u32) ~ 0) + return clib_error_return (0, "No Segment List index specified"); + if (operation == 1 && vec_len (segments) == 0) + return clib_error_return (0, "No Segment List specified"); + if (operation == 3 && weight == (u32) ~ 0) + return clib_error_return (0, "No new weight for the SL specified"); + rv = + sr_mpls_policy_mod ((sr_policy_index != (u32) ~ 0 ? (u32) ~ 0 : bsid), + sr_policy_index, operation, segments, + sl_index, weight); + } + + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -12: + return clib_error_return (0, + "There is already a FIB entry for the BindingSID address.\n" + "The SR policy could not be created."); + case -21: + return clib_error_return (0, + "The selected SR policy only contains ONE segment list. " + "Please remove the SR policy instead"); + case -22: + return clib_error_return (0, + "Could not delete the segment list. " + "It is not associated with that SR policy."); + case -32: + return clib_error_return (0, + "Could not modify the segment list. " + "The given SL is not associated with such SR policy."); + default: + return clib_error_return (0, "BUG: sr policy returns %d", rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_mpls_policy_command, static) = { + .path = "sr mpls policy", + .short_help = "sr mpls policy [add||del||mod] bsid 2999 " + "next 10 next 20 next 30 (weight 1) (spray)", + .long_help = "TBD.\n", + .function = sr_mpls_policy_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief CLI to display onscreen all the SR MPLS policies + */ +static clib_error_t * +show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_sl_t *segment_list = 0; + mpls_sr_policy_t *sr_policy = 0; + mpls_sr_policy_t **vec_policies = 0; + mpls_label_t *label; + u32 *sl_index; + u8 *s; + int i = 0; + + vlib_cli_output (vm, "SR MPLS policies:"); + + /* *INDENT-OFF* */ + pool_foreach (sr_policy, sm->sr_policies, {vec_add1 (vec_policies, sr_policy); } ); + /* *INDENT-ON* */ + + vec_foreach_index (i, vec_policies) + { + sr_policy = vec_policies[i]; + vlib_cli_output (vm, "[%u].-\tBSID: %U", + (u32) (sr_policy - sm->sr_policies), + format_mpls_unicast_label, sr_policy->bsid); + vlib_cli_output (vm, "\tType: %s", + (sr_policy->type == + SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray")); + vlib_cli_output (vm, "\tSegment Lists:"); + vec_foreach (sl_index, sr_policy->segments_lists) + { + s = NULL; + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + s = format (s, "\t[%u].- ", *sl_index); + s = format (s, "< "); + vec_foreach (label, segment_list->segments) + { + s = format (s, "%U, ", format_mpls_unicast_label, *label); + } + s = format (s, "\b\b > "); + vlib_cli_output (vm, " %s", s); + } + vlib_cli_output (vm, "-----------"); + } + vec_free (vec_policies); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_mpls_policies_command, static) = { + .path = "show sr mpls policies", + .short_help = "show sr mpls policies", + .function = show_sr_mpls_policies_command_fn, +}; +/* *INDENT-ON* */ + +/********************* SR MPLS Policy initialization ***********************/ +/** + * @brief SR MPLS Policy initialization + */ +clib_error_t * +sr_mpls_policy_rewrite_init (vlib_main_t * vm) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + + /* Init memory for sr policy keys (bsid <-> ip6_address_t) */ + sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t)); + + return 0; +} + +VLIB_INIT_FUNCTION (sr_mpls_policy_rewrite_init); + +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/srmpls/sr_mpls_steering.c b/src/vnet/srmpls/sr_mpls_steering.c new file mode 100755 index 00000000..37707049 --- /dev/null +++ b/src/vnet/srmpls/sr_mpls_steering.c @@ -0,0 +1,453 @@ +/* + * sr_steering.c: ipv6 segment routing steering into SR policy + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Packet steering into SR-MPLS Policies + * + * This file is in charge of handling the FIB appropiatly to steer packets + * through SR Policies as defined in 'sr_mpls_policy.c'. Notice that here + * we are only doing steering. SR policy application is done in + * sr_policy_rewrite.c + * + * Supports: + * - Steering of IPv6 traffic Destination Address based + * - Steering of IPv4 traffic Destination Address based + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +/** + * @brief Steer traffic L3 traffic through a given SR-MPLS policy + * + * @param is_del + * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index) + * @param sr_policy is the index of the SR Policy (alt to bsid) + * @param table_id is the VRF where to install the FIB entry for the BSID + * @param prefix is the IPv4/v6 address for L3 traffic type + * @param mask_width is the mask for L3 traffic type + * @param traffic_type describes the type of traffic + * + * @return 0 if correct, else error + */ +int +sr_mpls_steering_policy (int is_del, mpls_label_t bsid, u32 sr_policy_index, + u32 table_id, ip46_address_t * prefix, + u32 mask_width, u8 traffic_type) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + sr_mpls_steering_key_t key; + mpls_sr_steering_policy_t *steer_pl; + fib_prefix_t pfx = { 0 }; + + mpls_sr_policy_t *sr_policy = 0; + uword *p = 0; + + memset (&key, 0, sizeof (sr_mpls_steering_key_t)); + + /* Compute the steer policy key */ + if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6) + { + key.prefix.as_u64[0] = prefix->as_u64[0]; + key.prefix.as_u64[1] = prefix->as_u64[1]; + key.mask_width = mask_width; + key.fib_table = (table_id != (u32) ~ 0 ? table_id : 0); + } + else + return -1; + + key.traffic_type = traffic_type; + + /* Search for the item */ + p = mhash_get (&sm->sr_steer_policies_hash, &key); + + if (p) + { + /* Retrieve Steer Policy function */ + steer_pl = pool_elt_at_index (sm->steer_policies, p[0]); + + if (is_del) + { + if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6; + + fib_table_entry_delete (fib_table_find + (FIB_PROTOCOL_MPLS, + steer_pl->classify.fib_table), &pfx, + FIB_SOURCE_SR); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4; + + fib_table_entry_delete (fib_table_find + (FIB_PROTOCOL_MPLS, + steer_pl->classify.fib_table), &pfx, + FIB_SOURCE_SR); + } + + /* Delete SR steering policy entry */ + pool_put (sm->steer_policies, steer_pl); + mhash_unset (&sm->sr_steer_policies_hash, &key, NULL); + + return 1; + } + else /* It means user requested to update an existing SR steering policy */ + { + /* Retrieve SR steering policy */ + if (bsid) //TODO FIXME + { + p = hash_get (sm->sr_policies_index_hash, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -2; + } + else + sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index); + + if (!sr_policy) + return -2; + + steer_pl->sr_policy = sr_policy - sm->sr_policies; + + /* Remove old FIB/hw redirection and create a new one */ + if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6; + + fib_table_entry_delete (fib_table_find + (FIB_PROTOCOL_IP6, + steer_pl->classify.fib_table), &pfx, + FIB_SOURCE_SR); + + /* Create a new one */ + goto update_fib; + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4; + + fib_table_entry_delete (fib_table_find + (FIB_PROTOCOL_IP4, + steer_pl->classify.fib_table), &pfx, + FIB_SOURCE_SR); + + /* Create a new one */ + goto update_fib; + } + } + } + else + /* delete; steering policy does not exist; complain */ + if (is_del) + return -4; + + /* Retrieve SR policy */ + if (bsid) //FIX + { + p = hash_get (sm->sr_policies_index_hash, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -2; + } + else + sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index); + + /* Create a new steering policy */ + pool_get (sm->steer_policies, steer_pl); + memset (steer_pl, 0, sizeof (*steer_pl)); + + if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6) + { + clib_memcpy (&steer_pl->classify.prefix, prefix, + sizeof (ip46_address_t)); + steer_pl->classify.mask_width = mask_width; + steer_pl->classify.fib_table = (table_id != (u32) ~ 0 ? table_id : 0); + steer_pl->classify.traffic_type = traffic_type; + } + else + { + /* Incorrect API usage. Should never get here */ + pool_put (sm->steer_policies, steer_pl); + mhash_unset (&sm->sr_steer_policies_hash, &key, NULL); + return -1; + } + steer_pl->sr_policy = sr_policy - sm->sr_policies; + + /* Create and store key */ + mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies, + NULL); + +update_fib:; + + fib_route_path_t path = { + .frp_proto = FIB_PROTOCOL_MPLS, + .frp_local_label = sr_policy->bsid, + .frp_eos = MPLS_EOS, + .frp_sw_if_index = ~0, + .frp_fib_index = 0, + .frp_weight = 1, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + .frp_label_stack = NULL + }; + + fib_route_path_t *paths = NULL; + + /* FIB API calls - Recursive route through the BindingSID */ + if (traffic_type == SR_STEER_IPV6) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6; + path.frp_fib_index = 0; + + vec_add1 (paths, path); + + fib_table_entry_path_add2 (fib_table_find + (FIB_PROTOCOL_IP6, + (table_id != (u32) ~ 0 ? table_id : 0)), + &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths); + + vec_free (paths); + } + else if (traffic_type == SR_STEER_IPV4) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4; + path.frp_fib_index = 0; + + vec_add1 (paths, path); + + fib_table_entry_path_add2 (fib_table_find + (FIB_PROTOCOL_IP4, + (table_id != (u32) ~ 0 ? table_id : 0)), + &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths); + + vec_free (paths); + } + + return 0; +} + +static clib_error_t * +sr_mpls_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_del = 0; + + ip46_address_t prefix; + u32 dst_mask_width = 0; + u8 traffic_type = 0; + u32 fib_table = (u32) ~ 0; + + mpls_label_t bsid; + u32 sr_policy_index = (u32) ~ 0; + + u8 sr_policy_set = 0; + + memset (&prefix, 0, sizeof (ip46_address_t)); + + int rv; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (!traffic_type + && unformat (input, "l3 %U/%d", unformat_ip6_address, + &prefix.ip6, &dst_mask_width)) + traffic_type = SR_STEER_IPV6; + else if (!traffic_type + && unformat (input, "l3 %U/%d", unformat_ip4_address, + &prefix.ip4, &dst_mask_width)) + traffic_type = SR_STEER_IPV4; + else if (!sr_policy_set + && unformat (input, "via sr policy index %d", + &sr_policy_index)) + sr_policy_set = 1; + else if (!sr_policy_set + && unformat (input, "via sr policy bsid %U", + unformat_mpls_unicast_label, &bsid)) + sr_policy_set = 1; + else if (fib_table == (u32) ~ 0 + && unformat (input, "fib-table %d", &fib_table)); + else + break; + } + + if (!traffic_type) + return clib_error_return (0, "No L3 traffic specified"); + if (!sr_policy_set) + return clib_error_return (0, "No SR policy specified"); + + /* Make sure that the prefixes are clean */ + if (traffic_type == SR_STEER_IPV4) + { + u32 mask = + (dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0); + prefix.ip4.as_u32 &= mask; + } + else if (traffic_type == SR_STEER_IPV6) + { + ip6_address_t mask; + ip6_address_mask_from_width (&mask, dst_mask_width); + ip6_address_mask (&prefix.ip6, &mask); + } + + rv = + sr_mpls_steering_policy (is_del, bsid, + sr_policy_index, fib_table, &prefix, + dst_mask_width, traffic_type); + + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -1: + return clib_error_return (0, "Incorrect API usage."); + case -2: + return clib_error_return (0, + "The requested SR policy could not be located. Review the BSID/index."); + case -3: + return clib_error_return (0, + "Unable to do SW redirect. Incorrect interface."); + case -4: + return clib_error_return (0, + "The requested SR steering policy could not be deleted."); + case -5: + return clib_error_return (0, + "The SR policy is not an encapsulation one."); + default: + return clib_error_return (0, "BUG: sr steer policy returns %d", rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_mpls_steer_policy_command, static) = { + .path = "sr mpls steer", + .short_help = "sr mpls steer (del) l3 " + "via sr policy bsid (fib-table )", + .long_help = + "\tSteer L3 traffic through an existing SR policy.\n" + "\tExamples:\n" + "\t\tsr steer l3 2001::/64 via sr_policy index 5\n" + "\t\tsr steer l3 2001::/64 via sr_policy bsid 29999\n" + "\t\tsr steer del l3 2001::/64 via sr_policy index 5\n", + .function = sr_mpls_steer_policy_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + mpls_sr_steering_policy_t **steer_policies = 0; + mpls_sr_steering_policy_t *steer_pl; + + mpls_sr_policy_t *pl = 0; + int i; + + vlib_cli_output (vm, "SR MPLS steering policies:"); + /* *INDENT-OFF* */ + pool_foreach (steer_pl, sm->steer_policies, ({vec_add1(steer_policies, steer_pl);})); + /* *INDENT-ON* */ + vlib_cli_output (vm, "Traffic\t\tSR policy BSID"); + for (i = 0; i < vec_len (steer_policies); i++) + { + steer_pl = steer_policies[i]; + pl = pool_elt_at_index (sm->sr_policies, steer_pl->sr_policy); + if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + vlib_cli_output (vm, "L3 %U/%d\t%U", + format_ip4_address, + &steer_pl->classify.prefix.ip4, + steer_pl->classify.mask_width, + format_mpls_unicast_label, pl->bsid); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + vlib_cli_output (vm, "L3 %U/%d\t%U", + format_ip6_address, + &steer_pl->classify.prefix.ip6, + steer_pl->classify.mask_width, + format_mpls_unicast_label, pl->bsid); + } + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_mpls_steering_policies_command, static) = { + .path = "show sr mpls steering policies", + .short_help = "show sr mpls steering policies", + .function = show_sr_mpls_steering_policies_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +sr_mpls_steering_init (vlib_main_t * vm) +{ + mpls_sr_main_t *sm = &sr_mpls_main; + + /* Init memory for function keys */ + mhash_init (&sm->sr_steer_policies_hash, sizeof (uword), + sizeof (sr_mpls_steering_key_t)); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_INIT_FUNCTION (sr_mpls_steering_init); +/* *INDENT-ON* */ + +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/srv6/dir.dox b/src/vnet/srv6/dir.dox new file mode 100755 index 00000000..3f539a58 --- /dev/null +++ b/src/vnet/srv6/dir.dox @@ -0,0 +1,25 @@ +/* + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + @dir + @brief Segment Routing code + + An implementation of Segment Routing as per: + draft-ietf-6man-segment-routing-header-05 + + @see ietf_draft_05.txt + +*/ \ No newline at end of file diff --git a/src/vnet/srv6/ietf_draft_05.txt b/src/vnet/srv6/ietf_draft_05.txt new file mode 100755 index 00000000..e9bff04f --- /dev/null +++ b/src/vnet/srv6/ietf_draft_05.txt @@ -0,0 +1,1564 @@ +Network Working Group S. Previdi, Ed. +Internet-Draft C. Filsfils +Intended status: Standards Track Cisco Systems, Inc. +Expires: August 5, 2017 B. Field + Comcast + I. Leung + Rogers Communications + J. Linkova + Google + E. Aries + Facebook + T. Kosugi + NTT + E. Vyncke + Cisco Systems, Inc. + D. Lebrun + Universite Catholique de Louvain + February 1, 2017 + + + IPv6 Segment Routing Header (SRH) + draft-ietf-6man-segment-routing-header-05 + +Abstract + + Segment Routing (SR) allows a node to steer a packet through a + controlled set of instructions, called segments, by prepending an SR + header to the packet. A segment can represent any instruction, + topological or service-based. SR allows to enforce a flow through + any path (topological, or application/service based) while + maintaining per-flow state only at the ingress node to the SR domain. + + Segment Routing can be applied to the IPv6 data plane with the + addition of a new type of Routing Extension Header. This draft + describes the Segment Routing Extension Header Type and how it is + used by SR capable nodes. + +Requirements Language + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [RFC2119]. + +Status of This Memo + + This Internet-Draft is submitted in full conformance with the + provisions of BCP 78 and BCP 79. + + + + +Previdi, et al. Expires August 5, 2017 [Page 1] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + Internet-Drafts are working documents of the Internet Engineering + Task Force (IETF). Note that other groups may also distribute + working documents as Internet-Drafts. The list of current Internet- + Drafts is at http://datatracker.ietf.org/drafts/current/. + + Internet-Drafts are draft documents valid for a maximum of six months + and may be updated, replaced, or obsoleted by other documents at any + time. It is inappropriate to use Internet-Drafts as reference + material or to cite them other than as "work in progress." + + This Internet-Draft will expire on August 5, 2017. + +Copyright Notice + + Copyright (c) 2017 IETF Trust and the persons identified as the + document authors. All rights reserved. + + This document is subject to BCP 78 and the IETF Trust's Legal + Provisions Relating to IETF Documents + (http://trustee.ietf.org/license-info) in effect on the date of + publication of this document. Please review these documents + carefully, as they describe your rights and restrictions with respect + to this document. Code Components extracted from this document must + include Simplified BSD License text as described in Section 4.e of + the Trust Legal Provisions and are provided without warranty as + described in the Simplified BSD License. + +Table of Contents + + 1. Segment Routing Documents . . . . . . . . . . . . . . . . . . 3 + 2. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3 + 2.1. Data Planes supporting Segment Routing . . . . . . . . . 4 + 2.2. Segment Routing (SR) Domain . . . . . . . . . . . . . . . 4 + 2.2.1. SR Domain in a Service Provider Network . . . . . . . 5 + 2.2.2. SR Domain in a Overlay Network . . . . . . . . . . . 6 + 3. Segment Routing Extension Header (SRH) . . . . . . . . . . . 7 + 3.1. SRH TLVs . . . . . . . . . . . . . . . . . . . . . . . . 9 + 3.1.1. Ingress Node TLV . . . . . . . . . . . . . . . . . . 10 + 3.1.2. Egress Node TLV . . . . . . . . . . . . . . . . . . . 11 + 3.1.3. Opaque Container TLV . . . . . . . . . . . . . . . . 11 + 3.1.4. Padding TLV . . . . . . . . . . . . . . . . . . . . . 12 + 3.1.5. HMAC TLV . . . . . . . . . . . . . . . . . . . . . . 13 + 3.2. SRH and RFC2460 behavior . . . . . . . . . . . . . . . . 14 + 4. SRH Procedures . . . . . . . . . . . . . . . . . . . . . . . 14 + 4.1. Source SR Node . . . . . . . . . . . . . . . . . . . . . 14 + 4.2. Transit Node . . . . . . . . . . . . . . . . . . . . . . 15 + 4.3. SR Segment Endpoint Node . . . . . . . . . . . . . . . . 16 + 5. Security Considerations . . . . . . . . . . . . . . . . . . . 16 + + + +Previdi, et al. Expires August 5, 2017 [Page 2] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + 5.1. Threat model . . . . . . . . . . . . . . . . . . . . . . 17 + 5.1.1. Source routing threats . . . . . . . . . . . . . . . 17 + 5.1.2. Applicability of RFC 5095 to SRH . . . . . . . . . . 17 + 5.1.3. Service stealing threat . . . . . . . . . . . . . . . 18 + 5.1.4. Topology disclosure . . . . . . . . . . . . . . . . . 18 + 5.1.5. ICMP Generation . . . . . . . . . . . . . . . . . . . 18 + 5.2. Security fields in SRH . . . . . . . . . . . . . . . . . 19 + 5.2.1. Selecting a hash algorithm . . . . . . . . . . . . . 20 + 5.2.2. Performance impact of HMAC . . . . . . . . . . . . . 21 + 5.2.3. Pre-shared key management . . . . . . . . . . . . . . 21 + 5.3. Deployment Models . . . . . . . . . . . . . . . . . . . . 22 + 5.3.1. Nodes within the SR domain . . . . . . . . . . . . . 22 + 5.3.2. Nodes outside of the SR domain . . . . . . . . . . . 22 + 5.3.3. SR path exposure . . . . . . . . . . . . . . . . . . 23 + 5.3.4. Impact of BCP-38 . . . . . . . . . . . . . . . . . . 23 + 6. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 24 + 7. Manageability Considerations . . . . . . . . . . . . . . . . 24 + 8. Contributors . . . . . . . . . . . . . . . . . . . . . . . . 24 + 9. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 24 + 10. References . . . . . . . . . . . . . . . . . . . . . . . . . 25 + 10.1. Normative References . . . . . . . . . . . . . . . . . . 25 + 10.2. Informative References . . . . . . . . . . . . . . . . . 25 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 27 + +1. Segment Routing Documents + + Segment Routing terminology is defined in + [I-D.ietf-spring-segment-routing]. + + Segment Routing use cases are described in [RFC7855] and + [I-D.ietf-spring-ipv6-use-cases]. + + Segment Routing protocol extensions are defined in + [I-D.ietf-isis-segment-routing-extensions], and + [I-D.ietf-ospf-ospfv3-segment-routing-extensions]. + +2. Introduction + + Segment Routing (SR), defined in [I-D.ietf-spring-segment-routing], + allows a node to steer a packet through a controlled set of + instructions, called segments, by prepending an SR header to the + packet. A segment can represent any instruction, topological or + service-based. SR allows to enforce a flow through any path + (topological or service/application based) while maintaining per-flow + state only at the ingress node to the SR domain. Segments can be + derived from different components: IGP, BGP, Services, Contexts, + Locators, etc. The list of segment forming the path is called the + Segment List and is encoded in the packet header. + + + +Previdi, et al. Expires August 5, 2017 [Page 3] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + SR allows the use of strict and loose source based routing paradigms + without requiring any additional signaling protocols in the + infrastructure hence delivering an excellent scalability property. + + The source based routing model described in + [I-D.ietf-spring-segment-routing] is inherited from the ones proposed + by [RFC1940] and [RFC2460]. The source based routing model offers + the support for explicit routing capability. + +2.1. Data Planes supporting Segment Routing + + Segment Routing (SR), can be instantiated over MPLS + ([I-D.ietf-spring-segment-routing-mpls]) and IPv6. This document + defines its instantiation over the IPv6 data-plane based on the use- + cases defined in [I-D.ietf-spring-ipv6-use-cases]. + + This document defines a new type of Routing Header (originally + defined in [RFC2460]) called the Segment Routing Header (SRH) in + order to convey the Segment List in the packet header as defined in + [I-D.ietf-spring-segment-routing]. Mechanisms through which segment + are known and advertised are outside the scope of this document. + + A segment is materialized by an IPv6 address. A segment identifies a + topological instruction or a service instruction. A segment can be + either: + + o global: a global segment represents an instruction supported by + all nodes in the SR domain and it is instantiated through an IPv6 + address globally known in the SR domain. + + o local: a local segment represents an instruction supported only by + the node who originates it and it is instantiated through an IPv6 + address that is known only by the local node. + +2.2. Segment Routing (SR) Domain + + We define the concept of the Segment Routing Domain (SR Domain) as + the set of nodes participating into the source based routing model. + These nodes may be connected to the same physical infrastructure + (e.g.: a Service Provider's network) as well as nodes remotely + connected to each other (e.g.: an enterprise VPN or an overlay). + + A non-exhaustive list of examples of SR Domains is: + + o The network of an operator, service provider, content provider, + enterprise including nodes, links and Autonomous Systems. + + + + + +Previdi, et al. Expires August 5, 2017 [Page 4] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o A set of nodes connected as an overlay over one or more transit + providers. The overlay nodes exchange SR-enabled traffic with + segments belonging solely to the overlay routers (the SR domain). + None of the segments in the SR-enabled packets exchanged by the + overlay belong to the transit networks + + The source based routing model through its instantiation of the + Segment Routing Header (SRH) defined in this document equally applies + to all the above examples. + + It is assumed in this document that the SRH is added to the packet by + its source, consistently with the source routing model defined in + [RFC2460]. For example: + + o At the node originating the packet (host, server). + + o At the ingress node of an SR domain where the ingress node + receives an IPv6 packet and encapsulates it into an outer IPv6 + header followed by a Segment Routing header. + +2.2.1. SR Domain in a Service Provider Network + + The following figure illustrates an SR domain consisting of an + operator's network infrastructure. + + (-------------------------- Operator 1 -----------------------) + ( ) + ( (-----AS 1-----) (-------AS 2-------) (----AS 3-------) ) + ( ( ) ( ) ( ) ) + A1--(--(--11---13--14-)--(-21---22---23--24-)--(-31---32---34--)--)--Z1 + ( ( /|\ /|\ /| ) ( |\ /|\ /|\ /| ) ( |\ /|\ /| \ ) ) + A2--(--(/ | \/ | \/ | ) ( | \/ | \/ | \/ | ) ( | \/ | \/ | \)--)--Z2 + ( ( | /\ | /\ | ) ( | /\ | /\ | /\ | ) ( | /\ | /\ | ) ) + ( ( |/ \|/ \| ) ( |/ \|/ \|/ \| ) ( |/ \|/ \| ) ) + A3--(--(--15---17--18-)--(-25---26---27--28-)--(-35---36---38--)--)--Z3 + ( ( ) ( ) ( ) ) + ( (--------------) (------------------) (---------------) ) + ( ) + (-------------------------------------------------------------) + + Figure 1: Service Provider SR Domain + + Figure 1 describes an operator network including several ASes and + delivering connectivity between endpoints. In this scenario, Segment + Routing is used within the operator networks and across the ASes + boundaries (all being under the control of the same operator). In + this case segment routing can be used in order to address use cases + such as end-to-end traffic engineering, fast re-route, egress peer + + + +Previdi, et al. Expires August 5, 2017 [Page 5] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + engineering, data-center traffic engineering as described in + [RFC7855], [I-D.ietf-spring-ipv6-use-cases] and + [I-D.ietf-spring-resiliency-use-cases]. + + Typically, an IPv6 packet received at ingress (i.e.: from outside the + SR domain), is classified according to network operator policies and + such classification results into an outer header with an SRH applied + to the incoming packet. The SRH contains the list of segment + representing the path the packet must take inside the SR domain. + Thus, the SA of the packet is the ingress node, the DA (due to SRH + procedures described in Section 4) is set as the first segment of the + path and the last segment of the path is the egress node of the SR + domain. + + The path may include intra-AS as well as inter-AS segments. It has + to be noted that all nodes within the SR domain are under control of + the same administration. When the packet reaches the egress point of + the SR domain, the outer header and its SRH are removed so that the + destination of the packet is unaware of the SR domain the packet has + traversed. + + The outer header with the SRH is no different from any other + tunneling encapsulation mechanism and allows a network operator to + implement traffic engineering mechanisms so to efficiently steer + traffic across his infrastructure. + +2.2.2. SR Domain in a Overlay Network + + The following figure illustrates an SR domain consisting of an + overlay network over multiple operator's networks. + + (--Operator 1---) (-----Operator 2-----) (--Operator 3---) + ( ) ( ) ( ) + A1--(--11---13--14--)--(--21---22---23--24--)--(-31---32---34--)--C1 + ( /|\ /|\ /| ) ( |\ /|\ /|\ /| ) ( |\ /|\ /| \ ) + A2--(/ | \/ | \/ | ) ( | \/ | \/ | \/ | ) ( | \/ | \/ | \)--C2 + ( | /\ | /\ | ) ( | /\ | /\ | /\ | ) ( | /\ | /\ | ) + ( |/ \|/ \| ) ( |/ \|/ \|/ \| ) ( |/ \|/ \| ) + A3--(--15---17--18--)--(--25---26---27--28--)--(-35---36---38--)--C3 + ( ) ( | | | ) ( ) + (---------------) (--|----|---------|--) (---------------) + | | | + B1 B2 B3 + + Figure 2: Overlay SR Domain + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 6] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + Figure 2 describes an overlay consisting of nodes connected to three + different network operators and forming a single overlay network + where Segment routing packets are exchanged. + + The overlay consists of nodes A1, A2, A3, B1, B2, B3, C1, C2 and C3. + These nodes are connected to their respective network operator and + form an overlay network. + + Each node may originate packets with an SRH which contains, in the + segment list of the SRH or in the DA, segments identifying other + overlay nodes. This implies that packets with an SRH may traverse + operator's networks but, obviously, these SRHs cannot contain an + address/segment of the transit operators 1, 2 and 3. The SRH + originated by the overlay can only contain address/segment under the + administration of the overlay (e.g. address/segments supported by A1, + A2, A3, B1, B2, B3, C1,C2 or C3). + + In this model, the operator network nodes are transit nodes and, + according to [RFC2460], MUST NOT inspect the routing extension header + since they are not the DA of the packet. + + It is a common practice in operators networks to filter out, at + ingress, any packet whose DA is the address of an internal node and + it is also possible that an operator would filter out any packet + destined to an internal address and having an extension header in it. + + This common practice does not impact the SR-enabled traffic between + the overlay nodes as the intermediate transit networks never see a + destination address belonging to their infrastructure. These SR- + enabled overlay packets will thus never be filtered by the transit + operators. + + In all cases, transit packets (i.e.: packets whose DA is outside the + domain of the operator's network) will be forwarded accordingly + without introducing any security concern in the operator's network. + This is similar to tunneled packets. + +3. Segment Routing Extension Header (SRH) + + A new type of the Routing Header (originally defined in [RFC2460]) is + defined: the Segment Routing Header (SRH) which has a new Routing + Type, (suggested value 4) to be assigned by IANA. + + The Segment Routing Header (SRH) is defined as follows: + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 7] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Next Header | Hdr Ext Len | Routing Type | Segments Left | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | First Segment | Flags | RESERVED | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Segment List[0] (128 bits IPv6 address) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | | + ... + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Segment List[n] (128 bits IPv6 address) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // // + // Optional Type Length Value objects (variable) // + // // + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Next Header: 8-bit selector. Identifies the type of header + immediately following the SRH. + + o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH + header in 8-octet units, not including the first 8 octets. + + o Routing Type: TBD, to be assigned by IANA (suggested value: 4). + + o Segments Left. Defined in [RFC2460], it contains the index, in + the Segment List, of the next segment to inspect. Segments Left + is decremented at each segment. + + o First Segment: contains the index, in the Segment List, of the + first segment of the path which is in fact the last element of the + Segment List. + + o Flags: 8 bits of flags. Following flags are defined: + + + + +Previdi, et al. Expires August 5, 2017 [Page 8] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + 0 1 2 3 4 5 6 7 + +-+-+-+-+-+-+-+-+ + |U|P|O|A|H| U | + +-+-+-+-+-+-+-+-+ + + U: Unused and for future use. SHOULD be unset on transmission + and MUST be ignored on receipt. + + P-flag: Protected flag. Set when the packet has been rerouted + through FRR mechanism by an SR endpoint node. + + O-flag: OAM flag. When set, it indicates that this packet is + an operations and management (OAM) packet. + + A-flag: Alert flag. If present, it means important Type Length + Value (TLV) objects are present. See Section 3.1 for details + on TLVs objects. + + H-flag: HMAC flag. If set, the HMAC TLV is present and is + encoded as the last TLV of the SRH. In other words, the last + 36 octets of the SRH represent the HMAC information. See + Section 3.1.5 for details on the HMAC TLV. + + o RESERVED: SHOULD be unset on transmission and MUST be ignored on + receipt. + + o Segment List[n]: 128 bit IPv6 addresses representing the nth + segment in the Segment List. The Segment List is encoded starting + from the last segment of the path. I.e., the first element of the + segment list (Segment List [0]) contains the last segment of the + path while the last segment of the Segment List (Segment List[n]) + contains the first segment of the path. The index contained in + "Segments Left" identifies the current active segment. + + o Type Length Value (TLV) are described in Section 3.1. + +3.1. SRH TLVs + + This section defines TLVs of the Segment Routing Header. + + Type Length Value (TLV) contain optional information that may be used + by the node identified in the DA of the packet. It has to be noted + that the information carried in the TLVs is not intended to be used + by the routing layer. Typically, TLVs carry information that is + consumed by other components (e.g.: OAM) than the routing function. + + Each TLV has its own length, format and semantic. The code-point + allocated (by IANA) to each TLV defines both the format and the + + + +Previdi, et al. Expires August 5, 2017 [Page 9] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + semantic of the information carried in the TLV. Multiple TLVs may be + encoded in the same SRH. + + The "Length" field of the TLV is primarily used to skip the TLV while + inspecting the SRH in case the node doesn't support or recognize the + TLV codepoint. The "Length" defines the TLV length in octets and not + including the "Type" and "Length" fields. + + The primary scope of TLVs is to give the receiver of the packet + information related to the source routed path (e.g.: where the packet + entered in the SR domain and where it is expected to exit). + + Additional TLVs may be defined in the future. + +3.1.1. Ingress Node TLV + + The Ingress Node TLV is optional and identifies the node this packet + traversed when entered the SR domain. The Ingress Node TLV has + following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | RESERVED | Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Ingress Node (16 octets) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 1). + + o Length: 18. + + o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be + ignored on receipt. + + o Flags: 8 bits. No flags are defined in this document. + + o Ingress Node: 128 bits. Defines the node where the packet is + expected to enter the SR domain. In the encapsulation case + described in Section 2.2.1, this information corresponds to the SA + of the encapsulating header. + + + + + +Previdi, et al. Expires August 5, 2017 [Page 10] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + +3.1.2. Egress Node TLV + + The Egress Node TLV is optional and identifies the node this packet + is expected to traverse when exiting the SR domain. The Egress Node + TLV has following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | RESERVED | Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Egress Node (16 octets) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 2). + + o Length: 18. + + o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be + ignored on receipt. + + o Flags: 8 bits. No flags are defined in this document. + + o Egress Node: 128 bits. Defines the node where the packet is + expected to exit the SR domain. In the encapsulation case + described in Section 2.2.1, this information corresponds to the + last segment of the SRH in the encapsulating header. + +3.1.3. Opaque Container TLV + + The Opaque Container TLV is optional and has the following format: + + + + + + + + + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 11] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | RESERVED | Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Opaque Container (16 octets) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 3). + + o Length: 18. + + o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be + ignored on receipt. + + o Flags: 8 bits. No flags are defined in this document. + + o Opaque Container: 128 bits of opaque data not relevant for the + routing layer. Typically, this information is consumed by a non- + routing component of the node receiving the packet (i.e.: the node + in the DA). + +3.1.4. Padding TLV + + The Padding TLV is optional and with the purpose of aligning the SRH + on a 8 octet boundary. The Padding TLV has the following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | Padding (variable) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // Padding (variable) // + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 4). + + o Length: 1 to 7 + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 12] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o Padding: from 1 to 7 octets of padding. Padding bits have no + semantic. They SHOULD be set to 0 on transmission and MUST be + ignored on receipt. + + The following applies to the Padding TLV: + + o Padding TLV is optional and MAY only appear once in the SRH. If + present, it MUST have a length between 1 and 7 octets. + + o The Padding TLV is used in order to align the SRH total length on + the 8 octet boundary. + + o When present, the Padding TLV MUST appear as the last TLV before + the HMAC TLV (if HMAC TLV is present). + + o When present, the Padding TLV MUST have a length from 1 to 7 in + order to align the SRH total lenght on a 8-octet boundary. + + o When a router inspecting the SRH encounters the Padding TLV, it + MUST assume that no other TLV (other than the HMAC) follow the + Padding TLV. + +3.1.5. HMAC TLV + + HMAC TLV is optional and contains the HMAC information. The HMAC TLV + has the following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | RESERVED | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | HMAC Key ID (4 octets) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | // + | HMAC (32 octets) // + | // + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 5). + + o Length: 38. + + o RESERVED: 2 octets. SHOULD be unset on transmission and MUST be + ignored on receipt. + + + + +Previdi, et al. Expires August 5, 2017 [Page 13] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o HMAC Key ID: 4 octets. + + o HMAC: 32 octets. + + o HMAC and HMAC Key ID usage is described in Section 5 + + The Following applies to the HMAC TLV: + + o When present, the HMAC TLV MUST be encoded as the last TLV of the + SRH. + + o If the HMAC TLV is present, the SRH H-Flag (Figure 4) MUST be set. + + o When the H-flag is set in the SRH, the router inspecting the SRH + MUST find the HMAC TLV in the last 38 octets of the SRH. + +3.2. SRH and RFC2460 behavior + + The SRH being a new type of the Routing Header, it also has the same + properties: + + SHOULD only appear once in the packet. + + Only the router whose address is in the DA field of the packet + header MUST inspect the SRH. + + Therefore, Segment Routing in IPv6 networks implies that the segment + identifier (i.e.: the IPv6 address of the segment) is moved into the + DA of the packet. + + The DA of the packet changes at each segment termination/completion + and therefore the final DA of the packet MUST be encoded as the last + segment of the path. + +4. SRH Procedures + + In this section we describe the different procedures on the SRH. + +4.1. Source SR Node + + A Source SR Node can be any node originating an IPv6 packet with its + IPv6 and Segment Routing Headers. This include either: + + A host originating an IPv6 packet. + + An SR domain ingress router encapsulating a received IPv6 packet + into an outer IPv6 header followed by an SRH. + + + + +Previdi, et al. Expires August 5, 2017 [Page 14] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + The mechanism through which a Segment List is derived is outside of + the scope of this document. As an example, the Segment List may be + obtained through: + + Local path computation. + + Local configuration. + + Interaction with a centralized controller delivering the path. + + Any other mechanism. + + The following are the steps of the creation of the SRH: + + Next Header and Hdr Ext Len fields are set according to [RFC2460]. + + Routing Type field is set as TBD (to be allocated by IANA, + suggested value 4). + + The Segment List is built with the FIRST segment of the path + encoded in the LAST element of the Segment List. Subsequent + segments are encoded on top of the first segment. Finally, the + LAST segment of the path is encoded in the FIRST element of the + Segment List. In other words, the Segment List is encoded in the + reverse order of the path. + + The final DA of the packet is encoded as the last segment of the + path (encoded in the first element of the Segment List). + + The DA of the packet is set with the value of the first segment + (found in the last element of the segment list). + + The Segments Left field is set to n-1 where n is the number of + elements in the Segment List. + + The First Segment field is set to n-1 where n is the number of + elements in the Segment List. + + The packet is sent out towards the first segment (i.e.: + represented in the packet DA). + + HMAC TLV may be set according to Section 5. + +4.2. Transit Node + + According to [RFC2460], the only node who is allowed to inspect the + Routing Extension Header (and therefore the SRH), is the node + corresponding to the DA of the packet. Any other transit node MUST + + + +Previdi, et al. Expires August 5, 2017 [Page 15] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + NOT inspect the underneath routing header and MUST forward the packet + towards the DA and according to the IPv6 routing table. + + In the example case described in Section 2.2.2, when SR capable nodes + are connected through an overlay spanning multiple third-party + infrastructure, it is safe to send SRH packets (i.e.: packet having a + Segment Routing Header) between each other overlay/SR-capable nodes + as long as the segment list does not include any of the transit + provider nodes. In addition, as a generic security measure, any + service provider will block any packet destined to one of its + internal routers, especially if these packets have an extended header + in it. + +4.3. SR Segment Endpoint Node + + The SR segment endpoint node is the node whose address is in the DA. + The segment endpoint node inspects the SRH and does: + + 1. IF DA = myself (segment endpoint) + 2. IF Segments Left > 0 THEN + decrement Segments Left + update DA with Segment List[Segments Left] + 3. ELSE continue IPv6 processing of the packet + End of processing. + 4. Forward the packet out + +5. Security Considerations + + This section analyzes the security threat model, the security issues + and proposed solutions related to the new Segment Routing Header. + + The Segment Routing Header (SRH) is simply another type of the + routing header as described in RFC 2460 [RFC2460] and is: + + o Added by an SR edge router when entering the segment routing + domain or by the originating host itself. The source host can + even be outside the SR domain; + + o inspected and acted upon when reaching the destination address of + the IP header per RFC 2460 [RFC2460]. + + Per RFC2460 [RFC2460], routers on the path that simply forward an + IPv6 packet (i.e. the IPv6 destination address is none of theirs) + will never inspect and process the content of the SRH. Routers whose + one interface IPv6 address equals the destination address field of + the IPv6 packet MUST parse the SRH and, if supported and if the local + configuration allows it, MUST act accordingly to the SRH content. + + + + +Previdi, et al. Expires August 5, 2017 [Page 16] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + According to RFC2460 [RFC2460], the default behavior of a non SR- + capable router upon receipt of an IPv6 packet with SRH destined to an + address of its, is to: + + o ignore the SRH completely if the Segment Left field is 0 and + proceed to process the next header in the IPv6 packet; + + o discard the IPv6 packet if Segment Left field is greater than 0, + it MAY send a Parameter Problem ICMP message back to the Source + Address. + +5.1. Threat model + +5.1.1. Source routing threats + + Using an SRH is similar to source routing, therefore it has some + well-known security issues as described in RFC4942 [RFC4942] section + 2.1.1 and RFC5095 [RFC5095]: + + o amplification attacks: where a packet could be forged in such a + way to cause looping among a set of SR-enabled routers causing + unnecessary traffic, hence a Denial of Service (DoS) against + bandwidth; + + o reflection attack: where a hacker could force an intermediate node + to appear as the immediate attacker, hence hiding the real + attacker from naive forensic; + + o bypass attack: where an intermediate node could be used as a + stepping stone (for example in a De-Militarized Zone) to attack + another host (for example in the datacenter or any back-end + server). + +5.1.2. Applicability of RFC 5095 to SRH + + First of all, the reader must remember this specific part of section + 1 of RFC5095 [RFC5095], "A side effect is that this also eliminates + benign RH0 use-cases; however, such applications may be facilitated + by future Routing Header specifications.". In short, it is not + forbidden to create new secure type of Routing Header; for example, + RFC 6554 (RPL) [RFC6554] also creates a new Routing Header type for a + specific application confined in a single network. + + In the segment routing architecture described in + [I-D.ietf-spring-segment-routing] there are basically two kinds of + nodes (routers and hosts): + + + + + +Previdi, et al. Expires August 5, 2017 [Page 17] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o nodes within the SR domain, which is within one single + administrative domain, i.e., where all nodes are trusted anyway + else the damage caused by those nodes could be worse than + amplification attacks: traffic interception, man-in-the-middle + attacks, more server DoS by dropping packets, and so on. + + o nodes outside of the SR domain, which is outside of the + administrative segment routing domain hence they cannot be trusted + because there is no physical security for those nodes, i.e., they + can be replaced by hostile nodes or can be coerced in wrong + behaviors. + + The main use case for SR consists of the single administrative domain + where only trusted nodes with SR enabled and configured participate + in SR: this is the same model as in RFC6554 [RFC6554]. All non- + trusted nodes do not participate as either SR processing is not + enabled by default or because they only process SRH from nodes within + their domain. + + Moreover, all SR nodes ignore SRH created by outsiders based on + topology information (received on a peering or internal interface) or + on presence and validity of the HMAC field. Therefore, if + intermediate nodes ONLY act on valid and authorized SRH (such as + within a single administrative domain), then there is no security + threat similar to RH-0. Hence, the RFC 5095 [RFC5095] attacks are + not applicable. + +5.1.3. Service stealing threat + + Segment routing is used for added value services, there is also a + need to prevent non-participating nodes to use those services; this + is called 'service stealing prevention'. + +5.1.4. Topology disclosure + + The SRH may also contains IPv6 addresses of some intermediate SR- + nodes in the path towards the destination, this obviously reveals + those addresses to the potentially hostile attackers if those + attackers are able to intercept packets containing SRH. On the other + hand, if the attacker can do a traceroute whose probes will be + forwarded along the SR path, then there is little learned by + intercepting the SRH itself. + +5.1.5. ICMP Generation + + Per section 4.4 of RFC2460 [RFC2460], when destination nodes (i.e. + where the destination address is one of theirs) receive a Routing + Header with unsupported Routing Type, the required behavior is: + + + +Previdi, et al. Expires August 5, 2017 [Page 18] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o If Segments Left is zero, the node must ignore the Routing header + and proceed to process the next header in the packet. + + o If Segments Left is non-zero, the node must discard the packet and + send an ICMP Parameter Problem, Code 0, message to the packet's + Source Address, pointing to the unrecognized Routing Type. + + This required behavior could be used by an attacker to force the + generation of ICMP message by any node. The attacker could send + packets with SRH (with Segment Left set to 0) destined to a node not + supporting SRH. Per RFC2460 [RFC2460], the destination node could + generate an ICMP message, causing a local CPU utilization and if the + source of the offending packet with SRH was spoofed could lead to a + reflection attack without any amplification. + + It must be noted that this is a required behavior for any unsupported + Routing Type and not limited to SRH packets. So, it is not specific + to SRH and the usual rate limiting for ICMP generation is required + anyway for any IPv6 implementation and has been implemented and + deployed for many years. + +5.2. Security fields in SRH + + This section summarizes the use of specific fields in the SRH. They + are based on a key-hashed message authentication code (HMAC). + + The security-related fields in the SRH are instantiated by the HMAC + TLV, containing: + + o HMAC Key-id, 32 bits wide; + + o HMAC, 256 bits wide (optional, exists only if HMAC Key-id is not + 0). + + The HMAC field is the output of the HMAC computation (per RFC 2104 + [RFC2104]) using a pre-shared key identified by HMAC Key-id and of + the text which consists of the concatenation of: + + o the source IPv6 address; + + o First Segment field; + + o an octet of bit flags; + + o HMAC Key-id; + + o all addresses in the Segment List. + + + + +Previdi, et al. Expires August 5, 2017 [Page 19] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + The purpose of the HMAC TLV is to verify the validity, the integrity + and the authorization of the SRH itself. If an outsider of the SR + domain does not have access to a current pre-shared secret, then it + cannot compute the right HMAC field and the first SR router on the + path processing the SRH and configured to check the validity of the + HMAC will simply reject the packet. + + The HMAC TLV is located at the end of the SRH simply because only the + router on the ingress of the SR domain needs to process it, then all + other SR nodes can ignore it (based on local policy) because they + trust the upstream router. This is to speed up forwarding operations + because SR routers which do not validate the SRH do not need to parse + the SRH until the end. + + The HMAC Key-id field allows for the simultaneous existence of + several hash algorithms (SHA-256, SHA3-256 ... or future ones) as + well as pre-shared keys. The HMAC Key-id field is opaque, i.e., it + has neither syntax nor semantic except as an index to the right + combination of pre-shared key and hash algorithm and except that a + value of 0 means that there is no HMAC field. Having an HMAC Key-id + field allows for pre-shared key roll-over when two pre-shared keys + are supported for a while when all SR nodes converged to a fresher + pre-shared key. It could also allow for interoperation among + different SR domains if allowed by local policy and assuming a + collision-free HMAC Key Id allocation. + + When a specific SRH is linked to a time-related service (such as + turbo-QoS for a 1-hour period) where the DA, Segment ID (SID) are + identical, then it is important to refresh the shared-secret + frequently as the HMAC validity period expires only when the HMAC + Key-id and its associated shared-secret expires. + +5.2.1. Selecting a hash algorithm + + The HMAC field in the HMAC TLV is 256 bit wide. Therefore, the HMAC + MUST be based on a hash function whose output is at least 256 bits. + If the output of the hash function is 256, then this output is simply + inserted in the HMAC field. If the output of the hash function is + larger than 256 bits, then the output value is truncated to 256 by + taking the least-significant 256 bits and inserting them in the HMAC + field. + + SRH implementations can support multiple hash functions but MUST + implement SHA-2 [FIPS180-4] in its SHA-256 variant. + + NOTE: SHA-1 is currently used by some early implementations used for + quick interoperations testing, the 160-bit hash value must then be + + + + +Previdi, et al. Expires August 5, 2017 [Page 20] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + right-hand padded with 96 bits set to 0. The authors understand that + this is not secure but is ok for limited tests. + +5.2.2. Performance impact of HMAC + + While adding an HMAC to each and every SR packet increases the + security, it has a performance impact. Nevertheless, it must be + noted that: + + o the HMAC field is used only when SRH is added by a device (such as + a home set-up box) which is outside of the segment routing domain. + If the SRH is added by a router in the trusted segment routing + domain, then, there is no need for an HMAC field, hence no + performance impact. + + o when present, the HMAC field MUST only be checked and validated by + the first router of the segment routing domain, this router is + named 'validating SR router'. Downstream routers may not inspect + the HMAC field. + + o this validating router can also have a cache of to improve the performance. It is not the + same use case as in IPsec where HMAC value was unique per packet, + in SRH, the HMAC value is unique per flow. + + o Last point, hash functions such as SHA-2 have been optimized for + security and performance and there are multiple implementations + with good performance. + + With the above points in mind, the performance impact of using HMAC + is minimized. + +5.2.3. Pre-shared key management + + The field HMAC Key-id allows for: + + o key roll-over: when there is a need to change the key (the hash + pre-shared secret), then multiple pre-shared keys can be used + simultaneously. The validating routing can have a table of for the currently active and future + keys. + + o different algorithms: by extending the previous table to , the validating router + can also support simultaneously several hash algorithms (see + section Section 5.2.1) + + The pre-shared secret distribution can be done: + + + +Previdi, et al. Expires August 5, 2017 [Page 21] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o in the configuration of the validating routers, either by static + configuration or any SDN oriented approach; + + o dynamically using a trusted key distribution such as [RFC6407] + + The intent of this document is NOT to define yet-another-key- + distribution-protocol. + +5.3. Deployment Models + +5.3.1. Nodes within the SR domain + + An SR domain is defined as a set of interconnected routers where all + routers at the perimeter are configured to add and act on SRH. Some + routers inside the SR domain can also act on SRH or simply forward + IPv6 packets. + + The routers inside an SR domain can be trusted to generate SRH and to + process SRH received on interfaces that are part of the SR domain. + These nodes MUST drop all SRH packets received on an interface that + is not part of the SR domain and containing an SRH whose HMAC field + cannot be validated by local policies. This includes obviously + packet with an SRH generated by a non-cooperative SR domain. + + If the validation fails, then these packets MUST be dropped, ICMP + error messages (parameter problem) SHOULD be generated (but rate + limited) and SHOULD be logged. + +5.3.2. Nodes outside of the SR domain + + Nodes outside of the SR domain cannot be trusted for physical + security; hence, they need to request by some trusted means (outside + of the scope of this document) a complete SRH for each new connection + (i.e. new destination address). The received SRH MUST include an + HMAC TLV which is computed correctly (see Section 5.2). + + When an outside node sends a packet with an SRH and towards an SR + domain ingress node, the packet MUST contain the HMAC TLV (with a + Key-id and HMAC fields) and the the destination address MUST be an + address of an SR domain ingress node . + + The ingress SR router, i.e., the router with an interface address + equals to the destination address, MUST verify the HMAC TLV. + + If the validation is successful, then the packet is simply forwarded + as usual for an SR packet. As long as the packet travels within the + SR domain, no further HMAC check needs to be done. Subsequent + + + + +Previdi, et al. Expires August 5, 2017 [Page 22] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + routers in the SR domain MAY verify the HMAC TLV when they process + the SRH (i.e. when they are the destination). + + If the validation fails, then this packet MUST be dropped, an ICMP + error message (parameter problem) SHOULD be generated (but rate + limited) and SHOULD be logged. + +5.3.3. SR path exposure + + As the intermediate SR nodes addresses appears in the SRH, if this + SRH is visible to an outsider then he/she could reuse this knowledge + to launch an attack on the intermediate SR nodes or get some insider + knowledge on the topology. This is especially applicable when the + path between the source node and the first SR domain ingress router + is on the public Internet. + + The first remark is to state that 'security by obscurity' is never + enough; in other words, the security policy of the SR domain MUST + assume that the internal topology and addressing is known by the + attacker. A simple traceroute will also give the same information + (with even more information as all intermediate nodes between SID + will also be exposed). IPsec Encapsulating Security Payload + [RFC4303] cannot be use to protect the SRH as per RFC4303 the ESP + header must appear after any routing header (including SRH). + + To prevent a user to leverage the gained knowledge by intercepting + SRH, it it recommended to apply an infrastructure Access Control List + (iACL) at the edge of the SR domain. This iACL will drop all packets + from outside the SR-domain whose destination is any address of any + router inside the domain. This security policy should be tuned for + local operations. + +5.3.4. Impact of BCP-38 + + BCP-38 [RFC2827], also known as "Network Ingress Filtering", checks + whether the source address of packets received on an interface is + valid for this interface. The use of loose source routing such as + SRH forces packets to follow a path which differs from the expected + routing. Therefore, if BCP-38 was implemented in all routers inside + the SR domain, then SR packets could be received by an interface + which is not expected one and the packets could be dropped. + + As an SR domain is usually a subset of one administrative domain, and + as BCP-38 is only deployed at the ingress routers of this + administrative domain and as packets arriving at those ingress + routers have been normally forwarded using the normal routing + information, then there is no reason why this ingress router should + + + + +Previdi, et al. Expires August 5, 2017 [Page 23] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + drop the SRH packet based on BCP-38. Routers inside the domain + commonly do not apply BCP-38; so, this is not a problem. + +6. IANA Considerations + + This document makes the following registrations in the Internet + Protocol Version 6 (IPv6) Parameters "Routing Type" registry + maintained by IANA: + + Suggested Description Reference + Value + ---------------------------------------------------------- + 4 Segment Routing Header (SRH) This document + + In addition, this document request IANA to create and maintain a new + Registry: "Segment Routing Header Type-Value Objects". The following + code-points are requested from the registry: + + Registry: Segment Routing Header Type-Value Objects + + Suggested Description Reference + Value + ----------------------------------------------------- + 1 Ingress Node TLV This document + 2 Egress Node TLV This document + 3 Opaque Container TLV This document + 4 Padding TLV This document + 5 HMAC TLV This document + +7. Manageability Considerations + + TBD + +8. Contributors + + Dave Barach, John Leddy, John Brzozowski, Pierre Francois, Nagendra + Kumar, Mark Townsley, Christian Martin, Roberta Maglione, James + Connolly, Aloys Augustin contributed to the content of this document. + +9. Acknowledgements + + The authors would like to thank Ole Troan, Bob Hinden, Fred Baker, + Brian Carpenter, Alexandru Petrescu and Punit Kumar Jaiswal for their + comments to this document. + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 24] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + +10. References + +10.1. Normative References + + [FIPS180-4] + National Institute of Standards and Technology, "FIPS + 180-4 Secure Hash Standard (SHS)", March 2012, + . + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, + DOI 10.17487/RFC2119, March 1997, + . + + [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 + (IPv6) Specification", RFC 2460, DOI 10.17487/RFC2460, + December 1998, . + + [RFC4303] Kent, S., "IP Encapsulating Security Payload (ESP)", + RFC 4303, DOI 10.17487/RFC4303, December 2005, + . + + [RFC5095] Abley, J., Savola, P., and G. Neville-Neil, "Deprecation + of Type 0 Routing Headers in IPv6", RFC 5095, + DOI 10.17487/RFC5095, December 2007, + . + + [RFC6407] Weis, B., Rowles, S., and T. Hardjono, "The Group Domain + of Interpretation", RFC 6407, DOI 10.17487/RFC6407, + October 2011, . + +10.2. Informative References + + [I-D.ietf-isis-segment-routing-extensions] + Previdi, S., Filsfils, C., Bashandy, A., Gredler, H., + Litkowski, S., Decraene, B., and j. jefftant@gmail.com, + "IS-IS Extensions for Segment Routing", draft-ietf-isis- + segment-routing-extensions-09 (work in progress), October + 2016. + + [I-D.ietf-ospf-ospfv3-segment-routing-extensions] + Psenak, P., Previdi, S., Filsfils, C., Gredler, H., + Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3 + Extensions for Segment Routing", draft-ietf-ospf-ospfv3- + segment-routing-extensions-07 (work in progress), October + 2016. + + + + +Previdi, et al. Expires August 5, 2017 [Page 25] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + [I-D.ietf-spring-ipv6-use-cases] + Brzozowski, J., Leddy, J., Townsley, W., Filsfils, C., and + R. Maglione, "IPv6 SPRING Use Cases", draft-ietf-spring- + ipv6-use-cases-08 (work in progress), January 2017. + + [I-D.ietf-spring-resiliency-use-cases] + Filsfils, C., Previdi, S., Decraene, B., and R. Shakir, + "Resiliency use cases in SPRING networks", draft-ietf- + spring-resiliency-use-cases-08 (work in progress), October + 2016. + + [I-D.ietf-spring-segment-routing] + Filsfils, C., Previdi, S., Decraene, B., Litkowski, S., + and R. Shakir, "Segment Routing Architecture", draft-ietf- + spring-segment-routing-10 (work in progress), November + 2016. + + [I-D.ietf-spring-segment-routing-mpls] + Filsfils, C., Previdi, S., Bashandy, A., Decraene, B., + Litkowski, S., Horneffer, M., Shakir, R., + jefftant@gmail.com, j., and E. Crabbe, "Segment Routing + with MPLS data plane", draft-ietf-spring-segment-routing- + mpls-06 (work in progress), January 2017. + + [RFC1940] Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D. + Zappala, "Source Demand Routing: Packet Format and + Forwarding Specification (Version 1)", RFC 1940, + DOI 10.17487/RFC1940, May 1996, + . + + [RFC2104] Krawczyk, H., Bellare, M., and R. Canetti, "HMAC: Keyed- + Hashing for Message Authentication", RFC 2104, + DOI 10.17487/RFC2104, February 1997, + . + + [RFC2827] Ferguson, P. and D. Senie, "Network Ingress Filtering: + Defeating Denial of Service Attacks which employ IP Source + Address Spoofing", BCP 38, RFC 2827, DOI 10.17487/RFC2827, + May 2000, . + + [RFC4942] Davies, E., Krishnan, S., and P. Savola, "IPv6 Transition/ + Co-existence Security Considerations", RFC 4942, + DOI 10.17487/RFC4942, September 2007, + . + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 26] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + [RFC6554] Hui, J., Vasseur, JP., Culler, D., and V. Manral, "An IPv6 + Routing Header for Source Routes with the Routing Protocol + for Low-Power and Lossy Networks (RPL)", RFC 6554, + DOI 10.17487/RFC6554, March 2012, + . + + [RFC7855] Previdi, S., Ed., Filsfils, C., Ed., Decraene, B., + Litkowski, S., Horneffer, M., and R. Shakir, "Source + Packet Routing in Networking (SPRING) Problem Statement + and Requirements", RFC 7855, DOI 10.17487/RFC7855, May + 2016, . + +Authors' Addresses + + Stefano Previdi (editor) + Cisco Systems, Inc. + Via Del Serafico, 200 + Rome 00142 + Italy + + Email: sprevidi@cisco.com + + + Clarence Filsfils + Cisco Systems, Inc. + Brussels + BE + + Email: cfilsfil@cisco.com + + + Brian Field + Comcast + 4100 East Dry Creek Road + Centennial, CO 80122 + US + + Email: Brian_Field@cable.comcast.com + + + Ida Leung + Rogers Communications + 8200 Dixie Road + Brampton, ON L6T 0C1 + CA + + Email: Ida.Leung@rci.rogers.com + + + + +Previdi, et al. Expires August 5, 2017 [Page 27] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + Jen Linkova + Google + 1600 Amphitheatre Parkway + Mountain View, CA 94043 + US + + Email: furry@google.com + + + Ebben Aries + Facebook + US + + Email: exa@fb.com + + + Tomoya Kosugi + NTT + 3-9-11, Midori-Cho Musashino-Shi, + Tokyo 180-8585 + JP + + Email: kosugi.tomoya@lab.ntt.co.jp + + + Eric Vyncke + Cisco Systems, Inc. + De Kleetlaann 6A + Diegem 1831 + Belgium + + Email: evyncke@cisco.com + + + David Lebrun + Universite Catholique de Louvain + Place Ste Barbe, 2 + Louvain-la-Neuve, 1348 + Belgium + + Email: david.lebrun@uclouvain.be + + + + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 28] \ No newline at end of file diff --git a/src/vnet/srv6/sr.api b/src/vnet/srv6/sr.api new file mode 100644 index 00000000..9e900741 --- /dev/null +++ b/src/vnet/srv6/sr.api @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief IPv6 SR LocalSID add/del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_del Boolean of whether its a delete instruction + @param localsid_addr IPv6 address of the localsid + @param end_psp Boolean of whether decapsulation is allowed in this function + @param behavior Type of behavior (function) for this localsid + @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table. + @param vlan_index Only for L2 xconnect. Outgoing VLAN tag. + @param fib_table FIB table in which we should install the localsid entry + @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. +*/ +autoreply define sr_localsid_add_del +{ + u32 client_index; + u32 context; + u8 is_del; + u8 localsid_addr[16]; + u8 end_psp; + u8 behavior; + u32 sw_if_index; + u32 vlan_index; + u32 fib_table; + u8 nh_addr[16]; +}; + +/** \brief IPv6 SR policy add + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bsid is the bindingSID of the SR Policy + @param weight is the weight of the sid list. optional. + @param is_encap is the behavior of the SR policy. (0.SRH insert // 1.Encapsulation) + @param type is the type of the SR policy. (0.Default // 1.Spray) + @param fib_table is the VRF where to install the FIB entry for the BSID + @param segments is a vector of IPv6 address composing the segment list +*/ +autoreply define sr_policy_add +{ + u32 client_index; + u32 context; + u8 bsid_addr[16]; + u32 weight; + u8 is_encap; + u8 type; + u32 fib_table; + u8 n_segments; + u8 segments[0]; +}; + +/** \brief IPv6 SR policy modification + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bsid is the bindingSID of the SR Policy + @param sr_policy_index is the index of the SR policy + @param fib_table is the VRF where to install the FIB entry for the BSID + @param operation is the operation to perform (among the top ones) + @param segments is a vector of IPv6 address composing the segment list + @param sl_index is the index of the Segment List to modify/delete + @param weight is the weight of the sid list. optional. + @param is_encap Mode. Encapsulation or SRH insertion. +*/ +autoreply define sr_policy_mod +{ + u32 client_index; + u32 context; + u8 bsid_addr[16]; + u32 sr_policy_index; + u32 fib_table; + u8 operation; + u32 sl_index; + u32 weight; + u8 n_segments; + u8 segments[0]; +}; + +/** \brief IPv6 SR policy deletion + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bsid is the bindingSID of the SR Policy + @param index is the index of the SR policy +*/ +autoreply define sr_policy_del +{ + u32 client_index; + u32 context; + u8 bsid_addr[16]; + u32 sr_policy_index; +}; + +/** \brief IPv6 SR steering add/del + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_del + @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index) + @param sr_policy is the index of the SR Policy (alt to bsid) + @param table_id is the VRF where to install the FIB entry for the BSID + @param prefix is the IPv4/v6 address for L3 traffic type + @param mask_width is the mask for L3 traffic type + @param sw_if_index is the incoming interface for L2 traffic + @param traffic_type describes the type of traffic +*/ +autoreply define sr_steering_add_del +{ + u32 client_index; + u32 context; + u8 is_del; + u8 bsid_addr[16]; + u32 sr_policy_index; + u32 table_id; + u8 prefix_addr[16]; + u32 mask_width; + u32 sw_if_index; + u8 traffic_type; +}; + +/** \brief Dump the list of SR LocalSIDs + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +/**define sr_localsids_dump +{ + u32 client_index; + u32 context; +};*/ + +/** \brief Details about a single SR LocalSID + @param context - returned sender context, to match reply w/ request + @param localsid_addr IPv6 address of the localsid + @param behavior Type of behavior (function) for this localsid + @param end_psp Boolean of whether decapsulation is allowed in this function + @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table. + @param vlan_index Only for L2 xconnect. Outgoing VLAN tag. + @param fib_table FIB table in which we should install the localsid entry + @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. +*/ +/**manual_endian define sr_localsid_details +{ + u32 context; + u8 localsid_addr[16]; + u8 behavior; + u8 end_psp; + u32 sw_if_index; + u32 vlan_index; + u32 fib_table; + u8 nh_addr[16]; +};*/ + +/* + * fd.io coding-style-patch-verification: ON + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/srv6/sr.c b/src/vnet/srv6/sr.c new file mode 100755 index 00000000..eb4f09e7 --- /dev/null +++ b/src/vnet/srv6/sr.c @@ -0,0 +1,57 @@ +/* + * sr.c: ipv6 segment routing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Segment Routing initialization + * + */ + +#include +#include +#include +#include +#include +#include + +ip6_sr_main_t sr_main; + +/** + * @brief no-op lock function. + * The lifetime of the SR entry is managed by the control plane + */ +void +sr_dpo_lock (dpo_id_t * dpo) +{ +} + +/** + * @brief no-op unlock function. + * The lifetime of the SR entry is managed by the control plane + */ +void +sr_dpo_unlock (dpo_id_t * dpo) +{ +} + +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/srv6/sr.h b/src/vnet/srv6/sr.h new file mode 100755 index 00000000..2014a23e --- /dev/null +++ b/src/vnet/srv6/sr.h @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Segment Routing data structures definitions + * + */ + +#ifndef included_vnet_srv6_h +#define included_vnet_srv6_h + +#include +#include +#include +#include + +#include +#include + +#define IPv6_DEFAULT_HEADER_LENGTH 40 +#define IPv6_DEFAULT_HOP_LIMIT 64 +#define IPv6_DEFAULT_MAX_MASK_WIDTH 128 + +#define SR_BEHAVIOR_END 1 +#define SR_BEHAVIOR_X 2 +#define SR_BEHAVIOR_D_FIRST 3 /* Unused. Separator in between regular and D */ +#define SR_BEHAVIOR_DX2 4 +#define SR_BEHAVIOR_DX6 5 +#define SR_BEHAVIOR_DX4 6 +#define SR_BEHAVIOR_DT6 7 +#define SR_BEHAVIOR_DT4 8 +#define SR_BEHAVIOR_LAST 9 /* Must always be the last one */ + +#define SR_STEER_L2 2 +#define SR_STEER_IPV4 4 +#define SR_STEER_IPV6 6 + +#define SR_FUNCTION_SIZE 4 +#define SR_ARGUMENT_SIZE 4 + +#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1 + +/** + * @brief SR Segment List (SID list) + */ +typedef struct +{ + ip6_address_t *segments; /**< SIDs (key) */ + + u32 weight; /**< SID list weight (wECMP / UCMP) */ + + u8 *rewrite; /**< Precomputed rewrite header */ + u8 *rewrite_bsid; /**< Precomputed rewrite header for bindingSID */ + + dpo_id_t bsid_dpo; /**< DPO for Encaps/Insert for BSID */ + dpo_id_t ip6_dpo; /**< DPO for Encaps/Insert IPv6 */ + dpo_id_t ip4_dpo; /**< DPO for Encaps IPv6 */ +} ip6_sr_sl_t; + +/* SR policy types */ +#define SR_POLICY_TYPE_DEFAULT 0 +#define SR_POLICY_TYPE_SPRAY 1 +/** + * @brief SR Policy + */ +typedef struct +{ + u32 *segments_lists; /**< SID lists indexes (vector) */ + + ip6_address_t bsid; /**< BindingSID (key) */ + + u8 type; /**< Type (default is 0) */ + /* SR Policy specific DPO */ + /* IF Type = DEFAULT Then Load Balancer DPO among SID lists */ + /* IF Type = SPRAY then Spray DPO with all SID lists */ + dpo_id_t bsid_dpo; /**< SR Policy specific DPO - BSID */ + dpo_id_t ip4_dpo; /**< SR Policy specific DPO - IPv6 */ + dpo_id_t ip6_dpo; /**< SR Policy specific DPO - IPv4 */ + + u32 fib_table; /**< FIB table */ + + u8 is_encap; /**< Mode (0 is SRH insert, 1 Encaps) */ +} ip6_sr_policy_t; + +/** + * @brief SR LocalSID + */ +typedef struct +{ + ip6_address_t localsid; /**< LocalSID IPv6 address */ + + char end_psp; /**< Combined with End.PSP? */ + + u16 behavior; /**< Behavior associated to this localsid */ + + union + { + u32 sw_if_index; /**< xconnect only */ + u32 vrf_index; /**< vrf only */ + }; + + u32 fib_table; /**< FIB table where localsid is registered */ + + u32 vlan_index; /**< VLAN tag (not an index) */ + + ip46_address_t next_hop; /**< Next_hop for xconnect usage only */ + + u32 nh_adj; /**< Next_adj for xconnect usage only */ + + void *plugin_mem; /**< Memory to be used by the plugin callback functions */ +} ip6_sr_localsid_t; + +typedef int (sr_plugin_callback_t) (ip6_sr_localsid_t * localsid); + +/** + * @brief SR LocalSID behavior registration + */ +typedef struct +{ + u16 sr_localsid_function_number; /**< SR LocalSID plugin function (>SR_BEHAVIOR_LAST) */ + + u8 *function_name; /**< Function name. (key). */ + + u8 *keyword_str; /**< Behavior keyword (i.e. End.X) */ + + u8 *def_str; /**< Behavior definition (i.e. Endpoint with cross-connect) */ + + u8 *params_str; /**< Behavior parameters (i.e. ) */ + + dpo_type_t dpo; /**< DPO type registration */ + + format_function_t *ls_format; /**< LocalSID format function */ + + unformat_function_t *ls_unformat; /**< LocalSID unformat function */ + + sr_plugin_callback_t *creation; /**< Function within plugin that will be called after localsid creation*/ + + sr_plugin_callback_t *removal; /**< Function within plugin that will be called before localsid removal */ +} sr_localsid_fn_registration_t; + +/** + * @brief Steering db key + * + * L3 is IPv4/IPv6 + mask + * L2 is sf_if_index + vlan + */ +typedef struct +{ + union + { + struct + { + ip46_address_t prefix; /**< IP address of the prefix */ + u32 mask_width; /**< Mask width of the prefix */ + u32 fib_table; /**< VRF of the prefix */ + } l3; + struct + { + u32 sw_if_index; /**< Incoming software interface */ + } l2; + }; + u8 traffic_type; /**< Traffic type (IPv4, IPv6, L2) */ + u8 padding[3]; +} sr_steering_key_t; + +typedef struct +{ + sr_steering_key_t classify; /**< Traffic classification */ + u32 sr_policy; /**< SR Policy index */ +} ip6_sr_steering_policy_t; + +/** + * @brief Segment Routing main datastructure + */ +typedef struct +{ + /* L2-input -> SR rewrite next index */ + u32 l2_sr_policy_rewrite_index; + + /* SR SID lists */ + ip6_sr_sl_t *sid_lists; + + /* SRv6 policies */ + ip6_sr_policy_t *sr_policies; + + /* Hash table mapping BindingSID to SRv6 policy */ + mhash_t sr_policies_index_hash; + + /* Pool of SR localsid instances */ + ip6_sr_localsid_t *localsids; + + /* Hash table mapping LOC:FUNC to SR LocalSID instance */ + mhash_t sr_localsids_index_hash; + + /* Pool of SR steer policies instances */ + ip6_sr_steering_policy_t *steer_policies; + + /* Hash table mapping steering rules to SR steer instance */ + mhash_t sr_steer_policies_hash; + + /* L2 steering ifaces - sr_policies */ + u32 *sw_iface_sr_policies; + + /* Spray DPO */ + dpo_type_t sr_pr_spray_dpo_type; + + /* Plugin functions */ + sr_localsid_fn_registration_t *plugin_functions; + + /* Find plugin function by name */ + uword *plugin_functions_by_key; + + /* Counters */ + vlib_combined_counter_main_t sr_ls_valid_counters; + vlib_combined_counter_main_t sr_ls_invalid_counters; + + /* SR Policies FIBs */ + u32 fib_table_ip6; + u32 fib_table_ip4; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} ip6_sr_main_t; + +extern ip6_sr_main_t sr_main; + +extern vlib_node_registration_t sr_policy_rewrite_encaps_node; +extern vlib_node_registration_t sr_policy_rewrite_insert_node; +extern vlib_node_registration_t sr_localsid_node; +extern vlib_node_registration_t sr_localsid_d_node; + +extern void sr_dpo_lock (dpo_id_t * dpo); +extern void sr_dpo_unlock (dpo_id_t * dpo); + +extern int +sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name, + u8 * keyword_str, u8 * def_str, + u8 * params_str, dpo_type_t * dpo, + format_function_t * ls_format, + unformat_function_t * ls_unformat, + sr_plugin_callback_t * creation_fn, + sr_plugin_callback_t * removal_fn); + +extern int +sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments, + u32 weight, u8 behavior, u32 fib_table, u8 is_encap); +extern int +sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table, + u8 operation, ip6_address_t * segments, u32 sl_index, + u32 weight); +extern int sr_policy_del (ip6_address_t * bsid, u32 index); + +extern int +sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, + char end_psp, u8 behavior, u32 sw_if_index, + u32 vlan_index, u32 fib_table, ip46_address_t * nh_addr, + void *ls_plugin_mem); + +extern int +sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index, + u32 table_id, ip46_address_t * prefix, u32 mask_width, + u32 sw_if_index, u8 traffic_type); + +/** + * @brief SR rewrite string computation for SRH insertion (inline) + * + * @param sl is a vector of IPv6 addresses composing the Segment List + * + * @return precomputed rewrite string for SRH insertion + */ +static inline u8 * +ip6_sr_compute_rewrite_string_insert (ip6_address_t * sl) +{ + ip6_sr_header_t *srh; + ip6_address_t *addrp, *this_address; + u32 header_length = 0; + u8 *rs = NULL; + + header_length = 0; + header_length += sizeof (ip6_sr_header_t); + header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t); + + vec_validate (rs, header_length - 1); + + srh = (ip6_sr_header_t *) rs; + srh->type = ROUTING_HEADER_TYPE_SR; + srh->segments_left = vec_len (sl); + srh->first_segment = vec_len (sl); + srh->length = ((sizeof (ip6_sr_header_t) + + ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1; + srh->flags = 0x00; + srh->reserved = 0x0000; + addrp = srh->segments + vec_len (sl); + vec_foreach (this_address, sl) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + return rs; +} + + +#endif /* included_vnet_sr_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c new file mode 100644 index 00000000..925b50a1 --- /dev/null +++ b/src/vnet/srv6/sr_api.c @@ -0,0 +1,244 @@ +/* + *------------------------------------------------------------------ + * sr_api.c - ipv6 segment routing api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include + +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(SR_LOCALSID_ADD_DEL, sr_localsid_add_del) \ +_(SR_POLICY_DEL, sr_policy_del) \ +_(SR_STEERING_ADD_DEL, sr_steering_add_del) +//_(SR_LOCALSIDS, sr_localsids_dump) +//_(SR_LOCALSID_BEHAVIORS, sr_localsid_behaviors_dump) + +static void vl_api_sr_localsid_add_del_t_handler + (vl_api_sr_localsid_add_del_t * mp) +{ + vl_api_sr_localsid_add_del_reply_t *rmp; + int rv = 0; +/* + * int sr_cli_localsid (char is_del, ip6_address_t *localsid_addr, + * char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, u32 fib_table, + * ip46_address_t *nh_addr, void *ls_plugin_mem) + */ + rv = sr_cli_localsid (mp->is_del, + (ip6_address_t *) & mp->localsid_addr, + mp->end_psp, + mp->behavior, + ntohl (mp->sw_if_index), + ntohl (mp->vlan_index), + ntohl (mp->fib_table), + (ip46_address_t *) & mp->nh_addr, NULL); + + REPLY_MACRO (VL_API_SR_LOCALSID_ADD_DEL_REPLY); +} + +static void +vl_api_sr_policy_add_t_handler (vl_api_sr_policy_add_t * mp) +{ + vl_api_sr_policy_add_reply_t *rmp; + ip6_address_t *segments = 0, *seg; + ip6_address_t *this_address = (ip6_address_t *) mp->segments; + + int i; + for (i = 0; i < mp->n_segments; i++) + { + vec_add2 (segments, seg, 1); + clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); + this_address++; + } + +/* + * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, + * u32 weight, u8 behavior, u32 fib_table, u8 is_encap) + */ + int rv = 0; + rv = sr_policy_add ((ip6_address_t *) & mp->bsid_addr, + segments, + ntohl (mp->weight), + mp->type, ntohl (mp->fib_table), mp->is_encap); + + REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY); +} + +static void +vl_api_sr_policy_mod_t_handler (vl_api_sr_policy_mod_t * mp) +{ + vl_api_sr_policy_mod_reply_t *rmp; + + ip6_address_t *segments = 0, *seg; + ip6_address_t *this_address = (ip6_address_t *) mp->segments; + + int i; + for (i = 0; i < mp->n_segments; i++) + { + vec_add2 (segments, seg, 1); + clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); + this_address++; + } + + int rv = 0; +/* + * int + * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table, + * u8 operation, ip6_address_t *segments, u32 sl_index, + * u32 weight, u8 is_encap) + */ + rv = sr_policy_mod ((ip6_address_t *) & mp->bsid_addr, + ntohl (mp->sr_policy_index), + ntohl (mp->fib_table), + mp->operation, + segments, ntohl (mp->sl_index), ntohl (mp->weight)); + + REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY); +} + +static void +vl_api_sr_policy_del_t_handler (vl_api_sr_policy_del_t * mp) +{ + vl_api_sr_policy_del_reply_t *rmp; + int rv = 0; +/* + * int + * sr_policy_del (ip6_address_t *bsid, u32 index) + */ + rv = sr_policy_del ((ip6_address_t *) & mp->bsid_addr, + ntohl (mp->sr_policy_index)); + + REPLY_MACRO (VL_API_SR_POLICY_DEL_REPLY); +} + +static void vl_api_sr_steering_add_del_t_handler + (vl_api_sr_steering_add_del_t * mp) +{ + vl_api_sr_steering_add_del_reply_t *rmp; + int rv = 0; +/* + * int + * sr_steering_policy(int is_del, ip6_address_t *bsid, u32 sr_policy_index, + * u32 table_id, ip46_address_t *prefix, u32 mask_width, u32 sw_if_index, + * u8 traffic_type) + */ + rv = sr_steering_policy (mp->is_del, + (ip6_address_t *) & mp->bsid_addr, + ntohl (mp->sr_policy_index), + ntohl (mp->table_id), + (ip46_address_t *) & mp->prefix_addr, + ntohl (mp->mask_width), + ntohl (mp->sw_if_index), mp->traffic_type); + + REPLY_MACRO (VL_API_SR_STEERING_ADD_DEL_REPLY); +} + +/* + * sr_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_sr; +#undef _ +} + +static clib_error_t * +sr_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Manually register the sr policy add msg, so we trace + * enough bytes to capture a typical segment list + */ + vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD, + "sr_policy_add", + vl_api_sr_policy_add_t_handler, + vl_noop_handler, + vl_api_sr_policy_add_t_endian, + vl_api_sr_policy_add_t_print, 256, 1); + + /* + * Manually register the sr policy mod msg, so we trace + * enough bytes to capture a typical segment list + */ + vl_msg_api_set_handlers (VL_API_SR_POLICY_MOD, + "sr_policy_mod", + vl_api_sr_policy_mod_t_handler, + vl_noop_handler, + vl_api_sr_policy_mod_t_endian, + vl_api_sr_policy_mod_t_print, 256, 1); + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (sr_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/srv6/sr_doc.md b/src/vnet/srv6/sr_doc.md new file mode 100644 index 00000000..5cdfc906 --- /dev/null +++ b/src/vnet/srv6/sr_doc.md @@ -0,0 +1,55 @@ +# SRv6: Segment Routing for IPv6 {#srv6_doc} + +This is a memo intended to contain documentation of the VPP SRv6 implementation. +Everything that is not directly obvious should come here. +For any feedback on content that should be explained please mailto:pcamaril@cisco.com + +## Segment Routing + +Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior. + +Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era. + +Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements. + +Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design. + +Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network. + +**The implementation of Segment Routing in VPP covers both the IPv6 data plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page contains the SRv6 documentation.** + +## Segment Routing terminology + +* Segment Routing Header (SRH): IPv6 routing extension header of type 'Segment Routing'. (draft-ietf-6man-segment-routing-header-05) +* SegmentID (SID): is an IPv6 address. +* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse. +* SR Policy: defines the SRH that will be applied to a packet. A packet steered into an SR policy may either receive the SRH by IPv6 header encapsulation (as recommended in draft-ietf-6man-rfc2460bis) or it could be inserted within an existing IPv6 header. An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights. +* Local SID: is a SID associated with a processing function on the local node, which may go from advancing to the next SID in the SRH, to complex user-defined behaviors. When a FIB lookup, either in the main FIB or in a specific VRF, returns a match on a local SID, the associated function is performed. +* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with an IPv6 DA corresponding to a BindingSID, then the SR policy will be applied to such packet. + +## SRv6 Features in VPP + +The SRv6 Network Programming (*draft-filsfils-spring-srv6-network-programming*) defines the SRv6 architecture. + +VPP supports the following SRv6 LocalSID functions: End, End.X, End.DX6, End.DT6, End.DX4, End.DT4, End.DX2, End.B6, End.B6.Encaps. + +For further information and how to configure each specific function: @subpage srv6_localsid_doc + + +The Segment Routing Policy (*draft-filsfils-spring-segment-routing-policy*) defines SR Policies. + +VPP supports SRv6 Policies with T.Insert and T.Encaps behaviors. + +For further information on how to create SR Policies: @subpage srv6_policy_doc + +For further information on how to steer traffic into SR Policies: @subpage srv6_steering_doc + +## SRv6 LocalSID development framework + +One of the *'key'* concepts about SRv6 is network programmability. This is why an SRv6 LocalSID is associated with an specific function. + +However, the trully way to enable network programmability is allowing any developer **easily** create his own SRv6 LocalSID function. That is the reason why we have added some API calls such that any developer can code his own SRv6 LocalSID behaviors as plugins an add them to the running SRv6 code. + +The principle is that the developer only codes the behavior -the graph node-. However all the FIB handling, SR LocalSID instantiation and so on are done by the VPP SRv6 code. + +For more information please refer to: @subpage srv6_plugin_doc diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c new file mode 100755 index 00000000..bdc66386 --- /dev/null +++ b/src/vnet/srv6/sr_localsid.c @@ -0,0 +1,1492 @@ +/* + * sr_localsid.c: ipv6 segment routing Endpoint behaviors + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Processing of packets with a SRH + * + * CLI to define new Segment Routing End processing functions. + * Graph node to support such functions. + * + * Each function associates an SRv6 segment (IPv6 address) with an specific + * Segment Routing function. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/** + * @brief Dynamically added SR localsid DPO type + */ +static dpo_type_t sr_localsid_dpo_type; +static dpo_type_t sr_localsid_d_dpo_type; + +/** + * @brief SR localsid add/del + * + * Function to add or delete SR LocalSIDs. + * + * @param is_del Boolean of whether its a delete instruction + * @param localsid_addr IPv6 address of the localsid + * @param is_decap Boolean of whether decapsulation is allowed in this function + * @param behavior Type of behavior (function) for this localsid + * @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table. + * @param vlan_index Only for L2 xconnect. Outgoing VLAN tag. + * @param fib_table FIB table in which we should install the localsid entry + * @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. + * + * @return 0 on success, error otherwise. + */ +int +sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, + char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, + u32 fib_table, ip46_address_t * nh_addr, void *ls_plugin_mem) +{ + ip6_sr_main_t *sm = &sr_main; + uword *p; + int rv; + + ip6_sr_localsid_t *ls = 0; + + dpo_id_t dpo = DPO_INVALID; + + /* Search for the item */ + p = mhash_get (&sm->sr_localsids_index_hash, localsid_addr); + + if (p) + { + if (is_del) + { + /* Retrieve localsid */ + ls = pool_elt_at_index (sm->localsids, p[0]); + /* Delete FIB entry */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = *localsid_addr, + } + }; + + fib_table_entry_delete (fib_table_find (FIB_PROTOCOL_IP6, + fib_table), + &pfx, FIB_SOURCE_SR); + + /* In case it is a Xconnect iface remove the (OIF, NHOP) adj */ + if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX6 + || ls->behavior == SR_BEHAVIOR_DX4) + adj_unlock (ls->nh_adj); + + if (ls->behavior >= SR_BEHAVIOR_LAST) + { + sr_localsid_fn_registration_t *plugin = 0; + plugin = pool_elt_at_index (sm->plugin_functions, + ls->behavior - SR_BEHAVIOR_LAST); + + /* Callback plugin removal function */ + rv = plugin->removal (ls); + } + + /* Delete localsid registry */ + pool_put (sm->localsids, ls); + mhash_unset (&sm->sr_localsids_index_hash, localsid_addr, NULL); + return 1; + } + else /* create with function already existing; complain */ + return -1; + } + else + /* delete; localsid does not exist; complain */ + if (is_del) + return -2; + + /* Check whether there exists a FIB entry with such address */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + }; + + pfx.fp_addr.as_u64[0] = localsid_addr->as_u64[0]; + pfx.fp_addr.as_u64[1] = localsid_addr->as_u64[1]; + + /* Lookup the FIB index associated to the table id provided */ + u32 fib_index = fib_table_find (FIB_PROTOCOL_IP6, fib_table); + if (fib_index == ~0) + return -3; + + /* Lookup the localsid in such FIB table */ + fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx); + if (FIB_NODE_INDEX_INVALID != fei) + return -4; //There is an entry for such address (the localsid addr) + + /* Create a new localsid registry */ + pool_get (sm->localsids, ls); + memset (ls, 0, sizeof (*ls)); + + clib_memcpy (&ls->localsid, localsid_addr, sizeof (ip6_address_t)); + ls->end_psp = end_psp; + ls->behavior = behavior; + ls->nh_adj = (u32) ~ 0; + ls->fib_table = fib_table; + switch (behavior) + { + case SR_BEHAVIOR_END: + break; + case SR_BEHAVIOR_X: + ls->sw_if_index = sw_if_index; + clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t)); + break; + case SR_BEHAVIOR_DX4: + ls->sw_if_index = sw_if_index; + clib_memcpy (&ls->next_hop.ip4, &nh_addr->ip4, sizeof (ip4_address_t)); + break; + case SR_BEHAVIOR_DX6: + ls->sw_if_index = sw_if_index; + clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t)); + break; + case SR_BEHAVIOR_DT6: + ls->vrf_index = sw_if_index; + break; + case SR_BEHAVIOR_DX2: + ls->sw_if_index = sw_if_index; + ls->vlan_index = vlan_index; + break; + } + + /* Figure out the adjacency magic for Xconnect variants */ + if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX4 + || ls->behavior == SR_BEHAVIOR_DX6) + { + adj_index_t nh_adj_index = ADJ_INDEX_INVALID; + + /* Retrieve the adjacency corresponding to the (OIF, next_hop) */ + if (ls->behavior == SR_BEHAVIOR_DX6 || ls->behavior == SR_BEHAVIOR_X) + nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6, + nh_addr, sw_if_index); + + else if (ls->behavior == SR_BEHAVIOR_DX4) + nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4, + nh_addr, sw_if_index); + + /* Check for ADJ creation error. If so panic */ + if (nh_adj_index == ADJ_INDEX_INVALID) + { + pool_put (sm->localsids, ls); + return -5; + } + + ls->nh_adj = nh_adj_index; + } + + /* Set DPO */ + if (ls->behavior == SR_BEHAVIOR_END || ls->behavior == SR_BEHAVIOR_X) + dpo_set (&dpo, sr_localsid_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); + else if (ls->behavior > SR_BEHAVIOR_D_FIRST + && ls->behavior < SR_BEHAVIOR_LAST) + dpo_set (&dpo, sr_localsid_d_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); + else if (ls->behavior >= SR_BEHAVIOR_LAST) + { + sr_localsid_fn_registration_t *plugin = 0; + plugin = pool_elt_at_index (sm->plugin_functions, + ls->behavior - SR_BEHAVIOR_LAST); + /* Copy the unformat memory result */ + ls->plugin_mem = ls_plugin_mem; + /* Callback plugin creation function */ + rv = plugin->creation (ls); + if (rv) + { + pool_put (sm->localsids, ls); + return -6; + } + dpo_set (&dpo, plugin->dpo, DPO_PROTO_IP6, ls - sm->localsids); + } + + /* Set hash key for searching localsid by address */ + mhash_set (&sm->sr_localsids_index_hash, localsid_addr, ls - sm->localsids, + NULL); + + fib_table_entry_special_dpo_add (fib_index, &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); + dpo_reset (&dpo); + + /* Set counter to zero */ + vlib_validate_combined_counter (&(sm->sr_ls_valid_counters), + ls - sm->localsids); + vlib_validate_combined_counter (&(sm->sr_ls_invalid_counters), + ls - sm->localsids); + + vlib_zero_combined_counter (&(sm->sr_ls_valid_counters), + ls - sm->localsids); + vlib_zero_combined_counter (&(sm->sr_ls_invalid_counters), + ls - sm->localsids); + + return 0; +} + +/** + * @brief SR LocalSID CLI function. + * + * @see sr_cli_localsid + */ +static clib_error_t * +sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_sr_main_t *sm = &sr_main; + u32 sw_if_index = (u32) ~ 0, vlan_index = (u32) ~ 0, fib_index = 0; + int is_del = 0; + int end_psp = 0; + ip6_address_t resulting_address; + ip46_address_t next_hop; + char address_set = 0; + char behavior = 0; + void *ls_plugin_mem = 0; + + int rv; + + memset (&resulting_address, 0, sizeof (ip6_address_t)); + ip46_address_reset (&next_hop); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (!address_set + && unformat (input, "address %U", unformat_ip6_address, + &resulting_address)) + address_set = 1; + else if (!address_set + && unformat (input, "addr %U", unformat_ip6_address, + &resulting_address)) + address_set = 1; + else if (unformat (input, "fib-table %u", &fib_index)); + else if (vlan_index == (u32) ~ 0 + && unformat (input, "vlan %u", &vlan_index)); + else if (!behavior && unformat (input, "behavior")) + { + if (unformat (input, "end.x %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip6_address, &next_hop.ip6)) + behavior = SR_BEHAVIOR_X; + else if (unformat (input, "end.dx6 %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip6_address, &next_hop.ip6)) + behavior = SR_BEHAVIOR_DX6; + else if (unformat (input, "end.dx4 %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip4_address, &next_hop.ip4)) + behavior = SR_BEHAVIOR_DX4; + else if (unformat (input, "end.dx2 %U", + unformat_vnet_sw_interface, vnm, &sw_if_index)) + behavior = SR_BEHAVIOR_DX2; + else if (unformat (input, "end.dt6 %u", &sw_if_index)) + behavior = SR_BEHAVIOR_DT6; + else if (unformat (input, "end.dt4 %u", &sw_if_index)) + behavior = SR_BEHAVIOR_DT4; + else + { + /* Loop over all the plugin behavior format functions */ + sr_localsid_fn_registration_t *plugin = 0, **vec_plugins = 0; + sr_localsid_fn_registration_t **plugin_it = 0; + + /* Create a vector out of the plugin pool as recommended */ + /* *INDENT-OFF* */ + pool_foreach (plugin, sm->plugin_functions, + { + vec_add1 (vec_plugins, plugin); + }); + /* *INDENT-ON* */ + + vec_foreach (plugin_it, vec_plugins) + { + if (unformat + (input, "%U", (*plugin_it)->ls_unformat, &ls_plugin_mem)) + { + behavior = (*plugin_it)->sr_localsid_function_number; + break; + } + } + } + + if (!behavior) + { + if (unformat (input, "end")) + behavior = SR_BEHAVIOR_END; + else + break; + } + } + else if (!end_psp && unformat (input, "psp")) + end_psp = 1; + else + break; + } + + if (!behavior && end_psp) + behavior = SR_BEHAVIOR_END; + + if (!address_set) + return clib_error_return (0, + "Error: SRv6 LocalSID address is mandatory."); + if (!is_del && !behavior) + return clib_error_return (0, + "Error: SRv6 LocalSID behavior is mandatory."); + if (vlan_index != (u32) ~ 0) + return clib_error_return (0, + "Error: SRv6 End.DX2 with rewrite VLAN tag not supported by now."); + if (end_psp && !(behavior == SR_BEHAVIOR_END || behavior == SR_BEHAVIOR_X)) + return clib_error_return (0, + "Error: SRv6 PSP only compatible with End and End.X"); + + rv = sr_cli_localsid (is_del, &resulting_address, end_psp, behavior, + sw_if_index, vlan_index, fib_index, &next_hop, + ls_plugin_mem); + + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -1: + return clib_error_return (0, + "Identical localsid already exists. Requested localsid not created."); + case -2: + return clib_error_return (0, + "The requested localsid could not be deleted. SR localsid not found"); + case -3: + return clib_error_return (0, "FIB table %u does not exist", fib_index); + case -4: + return clib_error_return (0, "There is already one FIB entry for the" + "requested localsid non segment routing related"); + case -5: + return clib_error_return (0, + "Could not create ARP/ND entry for such next_hop. Internal error."); + case -6: + return clib_error_return (0, + "Error on the plugin based localsid creation."); + default: + return clib_error_return (0, "BUG: sr localsid returns %d", rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_localsid_command, static) = { + .path = "sr localsid", + .short_help = "sr localsid (del) address XX:XX::YY:YY" + "(fib-table 8) behavior STRING", + .long_help = + "Create SR LocalSID and binds it to a particular behavior\n" + "Arguments:\n" + "\tlocalSID IPv6_addr(128b) LocalSID IPv6 address\n" + "\t(fib-table X) Optional. VRF where to install SRv6 localsid\n" + "\tbehavior STRING Specifies the behavior\n" + "\n\tBehaviors:\n" + "\tEnd\t-> Endpoint.\n" + "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n" + "\t\tParameters: ' '\n" + "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" + "\t\tParameters: ''\n" + "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n" + "\t\tParameters: ' '\n" + "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" + "\t\tParameters: ' '\n" + "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n" + "\t\tParameters: ''\n" + "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n" + "\t\tParameters: ''\n", + .function = sr_cli_localsid_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief CLI function to 'show' all SR LocalSIDs on console. + */ +static clib_error_t * +show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_sr_main_t *sm = &sr_main; + ip6_sr_localsid_t **localsid_list = 0; + ip6_sr_localsid_t *ls; + int i; + + vlib_cli_output (vm, "SRv6 - My LocalSID Table:"); + vlib_cli_output (vm, "========================="); + /* *INDENT-OFF* */ + pool_foreach (ls, sm->localsids, ({ vec_add1 (localsid_list, ls); })); + /* *INDENT-ON* */ + for (i = 0; i < vec_len (localsid_list); i++) + { + ls = localsid_list[i]; + switch (ls->behavior) + { + case SR_BEHAVIOR_END: + vlib_cli_output (vm, "\tAddress: \t%U\n\tBehavior: \tEnd", + format_ip6_address, &ls->localsid); + break; + case SR_BEHAVIOR_X: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tX (Endpoint with Layer-3 cross-connect)" + "\n\tIface: \t%U\n\tNext hop: \t%U", + format_ip6_address, &ls->localsid, + format_vnet_sw_if_index_name, vnm, ls->sw_if_index, + format_ip6_address, &ls->next_hop.ip6); + break; + case SR_BEHAVIOR_DX4: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDX4 (Endpoint with decapsulation and IPv4 cross-connect)" + "\n\tIface: \t%U\n\tNext hop: \t%U", + format_ip6_address, &ls->localsid, + format_vnet_sw_if_index_name, vnm, ls->sw_if_index, + format_ip4_address, &ls->next_hop.ip4); + break; + case SR_BEHAVIOR_DX6: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDX6 (Endpoint with decapsulation and IPv6 cross-connect)" + "\n\tIface: \t%U\n\tNext hop: \t%U", + format_ip6_address, &ls->localsid, + format_vnet_sw_if_index_name, vnm, ls->sw_if_index, + format_ip6_address, &ls->next_hop.ip6); + break; + case SR_BEHAVIOR_DX2: + if (ls->vlan_index == (u32) ~ 0) + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDX2 (Endpoint with decapulation and Layer-2 cross-connect)" + "\n\tIface: \t%U", format_ip6_address, + &ls->localsid, format_vnet_sw_if_index_name, vnm, + ls->sw_if_index); + else + vlib_cli_output (vm, + "Unsupported yet. (DX2 with egress VLAN rewrite)"); + break; + case SR_BEHAVIOR_DT6: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDT6 (Endpoint with decapsulation and specific IPv6 table lookup)" + "\n\tTable: %u", format_ip6_address, &ls->localsid, + ls->fib_table); + break; + case SR_BEHAVIOR_DT4: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDT4 (Endpoint with decapsulation and specific IPv4 table lookup)" + "\n\tTable: \t%u", format_ip6_address, + &ls->localsid, ls->fib_table); + break; + default: + if (ls->behavior >= SR_BEHAVIOR_LAST) + { + sr_localsid_fn_registration_t *plugin = + pool_elt_at_index (sm->plugin_functions, + ls->behavior - SR_BEHAVIOR_LAST); + + vlib_cli_output (vm, "\tAddress: \t%U\n" + "\tBehavior: \t%s (%s)\n\t%U", + format_ip6_address, &ls->localsid, + plugin->keyword_str, plugin->def_str, + plugin->ls_format, ls->plugin_mem); + } + else + //Should never get here... + vlib_cli_output (vm, "Internal error"); + break; + } + if (ls->end_psp) + vlib_cli_output (vm, "\tPSP: \tTrue\n"); + + /* Print counters */ + vlib_counter_t valid, invalid; + vlib_get_combined_counter (&(sm->sr_ls_valid_counters), i, &valid); + vlib_get_combined_counter (&(sm->sr_ls_invalid_counters), i, &invalid); + vlib_cli_output (vm, "\tGood traffic: \t[%Ld packets : %Ld bytes]\n", + valid.packets, valid.bytes); + vlib_cli_output (vm, "\tBad traffic: \t[%Ld packets : %Ld bytes]\n", + invalid.packets, invalid.bytes); + vlib_cli_output (vm, "--------------------"); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_localsid_command, static) = { + .path = "show sr localsids", + .short_help = "show sr localsids", + .function = show_sr_localsid_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief Function to 'clear' ALL SR localsid counters + */ +static clib_error_t * +clear_sr_localsid_counters_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + + vlib_clear_combined_counters (&(sm->sr_ls_valid_counters)); + vlib_clear_combined_counters (&(sm->sr_ls_invalid_counters)); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (clear_sr_localsid_counters_command, static) = { + .path = "clear sr localsid counters", + .short_help = "clear sr localsid counters", + .function = clear_sr_localsid_counters_command_fn, +}; +/* *INDENT-ON* */ + +/************************ SR LocalSID graphs node ****************************/ +/** + * @brief SR localsid node trace + */ +typedef struct +{ + u32 localsid_index; + ip6_address_t src, out_dst; + u8 sr[256]; + u8 num_segments; + u8 segments_left; + //With SRv6 header update include flags here. +} sr_localsid_trace_t; + +#define foreach_sr_localsid_error \ +_(NO_INNER_HEADER, "(SR-Error) No inner IP header") \ +_(NO_MORE_SEGMENTS, "(SR-Error) No more segments") \ +_(NO_SRH, "(SR-Error) No SR header") \ +_(NO_PSP, "(SR-Error) PSP Not available (segments left > 0)") \ +_(NOT_LS, "(SR-Error) Decaps not available (segments left > 0)") \ +_(L2, "(SR-Error) SRv6 decapsulated a L2 frame without dest") + +typedef enum +{ +#define _(sym,str) SR_LOCALSID_ERROR_##sym, + foreach_sr_localsid_error +#undef _ + SR_LOCALSID_N_ERROR, +} sr_localsid_error_t; + +static char *sr_localsid_error_strings[] = { +#define _(sym,string) string, + foreach_sr_localsid_error +#undef _ +}; + +#define foreach_sr_localsid_next \ +_(ERROR, "error-drop") \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(IP4_LOOKUP, "ip4-lookup") \ +_(IP6_REWRITE, "ip6-rewrite") \ +_(IP4_REWRITE, "ip4-rewrite") \ +_(INTERFACE_OUTPUT, "interface-output") + +typedef enum +{ +#define _(s,n) SR_LOCALSID_NEXT_##s, + foreach_sr_localsid_next +#undef _ + SR_LOCALSID_N_NEXT, +} sr_localsid_next_t; + +/** + * @brief SR LocalSID graph node trace function + * + * @see sr_localsid + */ +u8 * +format_sr_localsid_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_sr_main_t *sm = &sr_main; + sr_localsid_trace_t *t = va_arg (*args, sr_localsid_trace_t *); + + ip6_sr_localsid_t *ls = + pool_elt_at_index (sm->localsids, t->localsid_index); + + s = + format (s, "SR-LOCALSID:\n\tLocalsid: %U\n", format_ip6_address, + &ls->localsid); + switch (ls->behavior) + { + case SR_BEHAVIOR_END: + s = format (s, "\tBehavior: End\n"); + break; + case SR_BEHAVIOR_DX6: + s = format (s, "\tBehavior: Decapsulation with IPv6 L3 xconnect\n"); + break; + case SR_BEHAVIOR_DX4: + s = format (s, "\tBehavior: Decapsulation with IPv4 L3 xconnect\n"); + break; + case SR_BEHAVIOR_X: + s = format (s, "\tBehavior: IPv6 L3 xconnect\n"); + break; + case SR_BEHAVIOR_DT6: + s = format (s, "\tBehavior: Decapsulation with IPv6 Table lookup\n"); + break; + case SR_BEHAVIOR_DT4: + s = format (s, "\tBehavior: Decapsulation with IPv4 Table lookup\n"); + break; + case SR_BEHAVIOR_DX2: + s = format (s, "\tBehavior: Decapsulation with L2 xconnect\n"); + break; + default: + s = format (s, "\tBehavior: defined in plugin\n"); //TODO + break; + } + if (t->num_segments != 0xFF) + { + if (t->num_segments > 0) + { + s = format (s, "\tSegments left: %d\n", t->num_segments); + s = format (s, "\tSID list: [in ietf order]"); + int i = 0; + for (i = 0; i < t->num_segments; i++) + { + s = format (s, "\n\t-> %U", format_ip6_address, + (ip6_address_t *) & t->sr[i * + sizeof (ip6_address_t)]); + } + } + } + return s; +} + +/** + * @brief Function doing End processing. + */ +static_always_inline void +end_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_sr_header_t * sr0, + ip6_sr_localsid_t * ls0, u32 * next0) +{ + ip6_address_t *new_dst0; + + if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR)) + { + if (PREDICT_TRUE (sr0->segments_left != 0)) + { + sr0->segments_left -= 1; + new_dst0 = (ip6_address_t *) (sr0->segments); + new_dst0 += sr0->segments_left; + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + + if (ls0->behavior == SR_BEHAVIOR_X) + { + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; + } + } + else + { + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NO_MORE_SEGMENTS]; + } + } + else + { + /* Error. Routing header of type != SR */ + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NO_SRH]; + } +} + +/* + * @brief Function doing SRH processing for D* variants + */ +//FixME. I must crosscheck that next_proto matches the localsid +static_always_inline void +end_decaps_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_sr_header_t * sr0, + ip6_sr_localsid_t * ls0, u32 * next0) +{ + /* Compute the size of the IPv6 header with all Ext. headers */ + u8 next_proto; + ip6_ext_header_t *next_ext_header; + u16 total_size = 0; + + next_proto = ip0->protocol; + next_ext_header = (void *) (ip0 + 1); + total_size = sizeof (ip6_header_t); + while (ip6_ext_hdr (next_proto)) + { + total_size += ip6_ext_header_len (next_ext_header); + next_proto = next_ext_header->next_hdr; + next_ext_header = ip6_ext_next_header (next_ext_header); + } + + /* Ensure this is the last segment. Otherwise drop. */ + if (sr0 && sr0->segments_left != 0) + { + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NOT_LS]; + return; + } + + switch (next_proto) + { + case IP_PROTOCOL_IPV6: + /* Encap-End IPv6. Pop outer IPv6 header. */ + if (ls0->behavior == SR_BEHAVIOR_DX6) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; + return; + } + else if (ls0->behavior == SR_BEHAVIOR_DT6) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table; + return; + } + break; + case IP_PROTOCOL_IP_IN_IP: + /* Encap-End IPv4. Pop outer IPv6 header */ + if (ls0->behavior == SR_BEHAVIOR_DX4) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP4_REWRITE; + return; + } + else if (ls0->behavior == SR_BEHAVIOR_DT4) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table; + *next0 = SR_LOCALSID_NEXT_IP4_LOOKUP; + return; + } + break; + case IP_PROTOCOL_IP6_NONXT: + /* L2 encaps */ + if (ls0->behavior == SR_BEHAVIOR_DX2) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->sw_if_index; + *next0 = SR_LOCALSID_NEXT_INTERFACE_OUTPUT; + return; + } + break; + } + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NO_INNER_HEADER]; + return; +} + +/** + * @brief Function doing End processing with PSP + */ +static_always_inline void +end_psp_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_ext_header_t * prev0, + ip6_sr_header_t * sr0, + ip6_sr_localsid_t * ls0, u32 * next0) +{ + u32 new_l0, sr_len; + u64 *copy_dst0, *copy_src0; + u32 copy_len_u64s0 = 0; + int i; + + if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR)) + { + if (PREDICT_TRUE (sr0->segments_left == 1)) + { + ip0->dst_address.as_u64[0] = sr0->segments->as_u64[0]; + ip0->dst_address.as_u64[1] = sr0->segments->as_u64[1]; + + /* Remove the SRH taking care of the rest of IPv6 ext header */ + if (prev0) + prev0->next_hdr = sr0->protocol; + else + ip0->protocol = sr0->protocol; + + sr_len = ip6_ext_header_len (sr0); + vlib_buffer_advance (b0, sr_len); + new_l0 = clib_net_to_host_u16 (ip0->payload_length) - sr_len; + ip0->payload_length = clib_host_to_net_u16 (new_l0); + copy_src0 = (u64 *) ip0; + copy_dst0 = copy_src0 + (sr0->length + 1); + /* number of 8 octet units to copy + * By default in absence of extension headers it is equal to length of ip6 header + * With extension headers it number of 8 octet units of ext headers preceding + * SR header + */ + copy_len_u64s0 = + (((u8 *) sr0 - (u8 *) ip0) - sizeof (ip6_header_t)) >> 3; + copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0]; + copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0]; + copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0]; + copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0]; + copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0]; + + for (i = copy_len_u64s0 - 1; i >= 0; i--) + { + copy_dst0[i] = copy_src0[i]; + } + + if (ls0->behavior == SR_BEHAVIOR_X) + { + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; + } + return; + } + } + /* Error. Routing header of type != SR */ + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NO_PSP]; +} + +/** + * @brief SR LocalSID graph node. Supports all default SR Endpoint variants + */ +static uword +sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, *from, *to_next; + ip6_sr_main_t *sm = &sr_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + u32 thread_index = vlib_get_thread_index (); + + while (n_left_from > 0) + { + u32 n_left_to_next; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_ext_header_t *prev0, *prev1, *prev2, *prev3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP; + ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+4 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + ls0 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls1 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls2 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls3 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE); + + end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0); + end_decaps_srh_processing (node, b1, ip1, sr1, ls1, &next1); + end_decaps_srh_processing (node, b2, ip2, sr2, ls2, &next2); + end_decaps_srh_processing (node, b3, ip3, sr3, ls3, &next3); + + //TODO: trace. + + vlib_increment_combined_counter + (((next0 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b0)); + + vlib_increment_combined_counter + (((next1 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b1)); + + vlib_increment_combined_counter + (((next2 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b2)); + + vlib_increment_combined_counter + (((next3 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b3)); + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0; + ip6_ext_header_t *prev0; + ip6_sr_header_t *sr0; + u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP; + ip6_sr_localsid_t *ls0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* Lookup the SR End behavior based on IP DA (adj) */ + ls0 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + /* Find SRH as well as previous header */ + ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); + + /* SRH processing and End variants */ + end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_localsid_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->num_segments = 0; + tr->localsid_index = ls0 - sm->localsids; + + if (ip0 == vlib_buffer_get_current (b0)) + { + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->out_dst.as_u8)); + if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE + && sr0->type == ROUTING_HEADER_TYPE_SR) + { + clib_memcpy (tr->sr, sr0->segments, sr0->length * 8); + tr->num_segments = + sr0->length * 8 / sizeof (ip6_address_t); + tr->segments_left = sr0->segments_left; + } + } + else + tr->num_segments = 0xFF; + } + + /* Increase the counters */ + vlib_increment_combined_counter + (((next0 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_localsid_d_node) = { + .function = sr_localsid_d_fn, + .name = "sr-localsid-d", + .vector_size = sizeof (u32), + .format_trace = format_sr_localsid_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_LOCALSID_N_ERROR, + .error_strings = sr_localsid_error_strings, + .n_next_nodes = SR_LOCALSID_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_LOCALSID_NEXT_##s] = n, + foreach_sr_localsid_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief SR LocalSID graph node. Supports all default SR Endpoint variants + */ +static uword +sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, *from, *to_next; + ip6_sr_main_t *sm = &sr_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + u32 thread_index = vlib_get_thread_index (); + + while (n_left_from > 0) + { + u32 n_left_to_next; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_ext_header_t *prev0, *prev1, *prev2, *prev3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP; + ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE); + + ls0 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls1 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls2 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls3 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + if (ls0->end_psp) + end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0); + else + end_srh_processing (node, b0, ip0, sr0, ls0, &next0); + + if (ls1->end_psp) + end_psp_srh_processing (node, b1, ip1, prev1, sr1, ls1, &next1); + else + end_srh_processing (node, b1, ip1, sr1, ls1, &next1); + + if (ls2->end_psp) + end_psp_srh_processing (node, b2, ip2, prev2, sr2, ls2, &next2); + else + end_srh_processing (node, b2, ip2, sr2, ls2, &next2); + + if (ls3->end_psp) + end_psp_srh_processing (node, b3, ip3, prev3, sr3, ls3, &next3); + else + end_srh_processing (node, b3, ip3, sr3, ls3, &next3); + + //TODO: proper trace. + + vlib_increment_combined_counter + (((next0 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b0)); + + vlib_increment_combined_counter + (((next1 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b1)); + + vlib_increment_combined_counter + (((next2 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b2)); + + vlib_increment_combined_counter + (((next3 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b3)); + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_ext_header_t *prev0; + ip6_sr_header_t *sr0; + u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP; + ip6_sr_localsid_t *ls0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); + + /* Lookup the SR End behavior based on IP DA (adj) */ + ls0 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + /* SRH processing */ + if (ls0->end_psp) + end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0); + else + end_srh_processing (node, b0, ip0, sr0, ls0, &next0); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_localsid_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->num_segments = 0; + tr->localsid_index = ls0 - sm->localsids; + + if (ip0 == vlib_buffer_get_current (b0)) + { + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->out_dst.as_u8)); + if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE + && sr0->type == ROUTING_HEADER_TYPE_SR) + { + clib_memcpy (tr->sr, sr0->segments, sr0->length * 8); + tr->num_segments = + sr0->length * 8 / sizeof (ip6_address_t); + tr->segments_left = sr0->segments_left; + } + } + else + { + tr->num_segments = 0xFF; + } + } + + vlib_increment_combined_counter + (((next0 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids, + 1, vlib_buffer_length_in_chain (vm, b0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_localsid_node) = { + .function = sr_localsid_fn, + .name = "sr-localsid", + .vector_size = sizeof (u32), + .format_trace = format_sr_localsid_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_LOCALSID_N_ERROR, + .error_strings = sr_localsid_error_strings, + .n_next_nodes = SR_LOCALSID_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_LOCALSID_NEXT_##s] = n, + foreach_sr_localsid_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +static u8 * +format_sr_dpo (u8 * s, va_list * args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (*args, u32); + + return (format (s, "SR: localsid_index:[%d]", index)); +} + +const static dpo_vft_t sr_loc_vft = { + .dv_lock = sr_dpo_lock, + .dv_unlock = sr_dpo_unlock, + .dv_format = format_sr_dpo, +}; + +const static char *const sr_loc_ip6_nodes[] = { + "sr-localsid", + NULL, +}; + +const static char *const *const sr_loc_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_loc_ip6_nodes, +}; + +const static char *const sr_loc_d_ip6_nodes[] = { + "sr-localsid-d", + NULL, +}; + +const static char *const *const sr_loc_d_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_loc_d_ip6_nodes, +}; + + +/*************************** SR LocalSID plugins ******************************/ +/** + * @brief SR LocalSID plugin registry + */ +int +sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name, + u8 * keyword_str, u8 * def_str, + u8 * params_str, dpo_type_t * dpo, + format_function_t * ls_format, + unformat_function_t * ls_unformat, + sr_plugin_callback_t * creation_fn, + sr_plugin_callback_t * removal_fn) +{ + ip6_sr_main_t *sm = &sr_main; + uword *p; + + sr_localsid_fn_registration_t *plugin; + + /* Did this function exist? If so update it */ + p = hash_get_mem (sm->plugin_functions_by_key, fn_name); + if (p) + { + plugin = pool_elt_at_index (sm->plugin_functions, p[0]); + } + /* Else create a new one and set hash key */ + else + { + pool_get (sm->plugin_functions, plugin); + hash_set_mem (sm->plugin_functions_by_key, fn_name, + plugin - sm->plugin_functions); + } + + memset (plugin, 0, sizeof (*plugin)); + + plugin->sr_localsid_function_number = (plugin - sm->plugin_functions); + plugin->sr_localsid_function_number += SR_BEHAVIOR_LAST; + plugin->ls_format = ls_format; + plugin->ls_unformat = ls_unformat; + plugin->creation = creation_fn; + plugin->removal = removal_fn; + clib_memcpy (&plugin->dpo, dpo, sizeof (dpo_type_t)); + plugin->function_name = format (0, "%s%c", fn_name, 0); + plugin->keyword_str = format (0, "%s%c", keyword_str, 0); + plugin->def_str = format (0, "%s%c", def_str, 0); + plugin->params_str = format (0, "%s%c", params_str, 0); + + return plugin->sr_localsid_function_number; +} + +/** + * @brief CLI function to 'show' all available SR LocalSID behaviors + */ +static clib_error_t * +show_sr_localsid_behaviors_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + sr_localsid_fn_registration_t *plugin; + sr_localsid_fn_registration_t **plugins_vec = 0; + int i; + + vlib_cli_output (vm, + "SR LocalSIDs behaviors:\n-----------------------\n\n"); + + /* *INDENT-OFF* */ + pool_foreach (plugin, sm->plugin_functions, + ({ vec_add1 (plugins_vec, plugin); })); + /* *INDENT-ON* */ + + /* Print static behaviors */ + vlib_cli_output (vm, "Default behaviors:\n" + "\tEnd\t-> Endpoint.\n" + "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n" + "\t\tParameters: ' '\n" + "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" + "\t\tParameters: ''\n" + "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n" + "\t\tParameters: ' '\n" + "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" + "\t\tParameters: ' '\n" + "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n" + "\t\tParameters: ''\n" + "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n" + "\t\tParameters: ''\n"); + vlib_cli_output (vm, "Plugin behaviors:\n"); + for (i = 0; i < vec_len (plugins_vec); i++) + { + plugin = plugins_vec[i]; + vlib_cli_output (vm, "\t%s\t-> %s.\n", plugin->keyword_str, + plugin->def_str); + vlib_cli_output (vm, "\t\tParameters: '%s'\n", plugin->params_str); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_localsid_behaviors_command, static) = { + .path = "show sr localsids behaviors", + .short_help = "show sr localsids behaviors", + .function = show_sr_localsid_behaviors_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief SR LocalSID initialization + */ +clib_error_t * +sr_localsids_init (vlib_main_t * vm) +{ + /* Init memory for function keys */ + ip6_sr_main_t *sm = &sr_main; + mhash_init (&sm->sr_localsids_index_hash, sizeof (uword), + sizeof (ip6_address_t)); + /* Init SR behaviors DPO type */ + sr_localsid_dpo_type = dpo_register_new_type (&sr_loc_vft, sr_loc_nodes); + /* Init SR behaviors DPO type */ + sr_localsid_d_dpo_type = + dpo_register_new_type (&sr_loc_vft, sr_loc_d_nodes); + /* Init memory for localsid plugins */ + sm->plugin_functions_by_key = hash_create_string (0, sizeof (uword)); + return 0; +} + +VLIB_INIT_FUNCTION (sr_localsids_init); +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/srv6/sr_localsid.md b/src/vnet/srv6/sr_localsid.md new file mode 100644 index 00000000..340af4a3 --- /dev/null +++ b/src/vnet/srv6/sr_localsid.md @@ -0,0 +1,58 @@ +# SR LocalSIDs {#srv6_localsid_doc} + +A local SID is associated to a Segment Routing behavior -or function- on the current node. + +The most basic behavior is called END. It simply activates the next SID in the current packet, by decrementing the Segments Left value and updating the IPv6 DA. + +A local END SID is instantiated using the following CLI: + + sr localsid (del) address XX::YY behavior end + +This creates a new entry in the main FIB for IPv6 address XX::YY. All packets whose IPv6 DA matches this FIB entry are redirected to the sr-localsid node, where they are processed as described above. + +Other examples of local SIDs are the following: + + sr localsid (del) address XX::YY behavior end + sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a + sr localsid (del) address XX::YY behavior end.dx6 GE0/1/0 2001::a + sr localsid (del) address XX::YY behavior end.dx4 GE0/1/0 10.0.0.1 + sr localsid (del) address XX::YY behavior end.dx2 GigabitE0/11/0 + sr localsid (del) address XX::YY behavior end.dt6 5 + sr localsid (del) address XX::YY behavior end.dt6 5 + +Note that all of these behaviors match the definitions of the SRv6 architecture (*draft-filsfils-spring-srv6-network-programming*). Please refer to this document for a detailed description of each behavior. + +Note also that you can configure the PSP flavor of the End and End.X behaviors by typing: + + sr localsid (del) address XX::YY behavior end psp + sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a psp + +Help on the available local SID behaviors and their usage can be obtained with: + + help sr localsid + +Alternatively they can be obtained using. + + show sr localsids behavior + +The difference in between those two commands is that the first one will only display the SR LocalSID behaviors that are built-in VPP, while the latter will display those behaviors plus the ones added with the SR LocalSID Development Framework. + + +VPP keeps a 'My LocalSID Table' where it stores all the SR local SIDs instantiated as well as their parameters. Every time a new local SID is instantiated, a new entry is added to this table. In addition, counters for correctly and incorrectly processed traffic are maintained for each local SID. The counters store both the number of packets and bytes. + +The contents of the 'My LocalSID Table' is shown with: + + vpp# show sr localsid + SRv6 - My LocalSID Table: + ========================= + Address: c3::1 + Behavior: DX6 (Endpoint with decapsulation and IPv6 cross-connect) + Iface: GigabitEthernet0/5/0 + Next hop: b:c3::b + Good traffic: [51277 packets : 5332808 bytes] + Bad traffic: [0 packets : 0 bytes] + -------------------- + +The traffic counters can be reset with: + + vpp# clear sr localsid counters diff --git a/src/vnet/srv6/sr_packet.h b/src/vnet/srv6/sr_packet.h new file mode 100755 index 00000000..7af4ad4d --- /dev/null +++ b/src/vnet/srv6/sr_packet.h @@ -0,0 +1,159 @@ +#ifndef included_vnet_sr_packet_h +#define included_vnet_sr_packet_h + +#include + +/* + * ipv6 segment-routing header format + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * The Segment Routing Header (SRH) is defined as follows: + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Next Header | Hdr Ext Len | Routing Type | Segments Left | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | First Segment | Flags | RESERVED | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Segment List[0] (128 bits IPv6 address) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | | + * ... + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Segment List[n] (128 bits IPv6 address) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * // // + * // Optional Type Length Value objects (variable) // + * // // + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * where: + * + * o Next Header: 8-bit selector. Identifies the type of header + * immediately following the SRH. + * + * o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH + * header in 8-octet units, not including the first 8 octets. + * + * o Routing Type: TBD, to be assigned by IANA (suggested value: 4). + * + * o Segments Left. Defined in [RFC2460], it contains the index, in + * the Segment List, of the next segment to inspect. Segments Left + * is decremented at each segment. + * + * o First Segment: contains the index, in the Segment List, of the + * first segment of the path which is in fact the last element of the + * Segment List. + * + * o Flags: 8 bits of flags. Following flags are defined: + * + * 0 1 2 3 4 5 6 7 + * +-+-+-+-+-+-+-+-+ + * |U|P|O|A|H| U | + * +-+-+-+-+-+-+-+-+ + * + * U: Unused and for future use. SHOULD be unset on transmission + * and MUST be ignored on receipt. + * + * P-flag: Protected flag. Set when the packet has been rerouted + * through FRR mechanism by an SR endpoint node. + * + * O-flag: OAM flag. When set, it indicates that this packet is + * an operations and management (OAM) packet. + * + * A-flag: Alert flag. If present, it means important Type Length + * Value (TLV) objects are present. See Section 3.1 for details + * on TLVs objects. + * + * H-flag: HMAC flag. If set, the HMAC TLV is present and is + * encoded as the last TLV of the SRH. In other words, the last + * 36 octets of the SRH represent the HMAC information. See + * Section 3.1.5 for details on the HMAC TLV. + * + * o RESERVED: SHOULD be unset on transmission and MUST be ignored on + * receipt. + * + * o Segment List[n]: 128 bit IPv6 addresses representing the nth + * segment in the Segment List. The Segment List is encoded starting + * from the last segment of the path. I.e., the first element of the + * segment list (Segment List [0]) contains the last segment of the + * path while the last segment of the Segment List (Segment List[n]) + * contains the first segment of the path. The index contained in + * "Segments Left" identifies the current active segment. + * + * o Type Length Value (TLV) are described in Section 3.1. + * + */ + +#ifndef IPPROTO_IPV6_ROUTE +#define IPPROTO_IPV6_ROUTE 43 +#endif + +#define ROUTING_HEADER_TYPE_SR 4 + +typedef struct +{ + /* Protocol for next header. */ + u8 protocol; + /* + * Length of routing header in 8 octet units, + * not including the first 8 octets + */ + u8 length; + + /* Type of routing header; type 4 = segement routing */ + u8 type; + + /* Next segment in the segment list */ + u8 segments_left; + + /* Pointer to the first segment in the header */ + u8 first_segment; + + /* Flag bits */ +#define IP6_SR_HEADER_FLAG_PROTECTED (0x40) +#define IP6_SR_HEADER_FLAG_OAM (0x20) +#define IP6_SR_HEADER_FLAG_ALERT (0x10) +#define IP6_SR_HEADER_FLAG_HMAC (0x80) + + /* values 0x0, 0x4 - 0x7 are reserved */ + u8 flags; + u16 reserved; + + /* The segment elts */ + ip6_address_t segments[0]; +} __attribute__ ((packed)) ip6_sr_header_t; + +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ + +#endif /* included_vnet_sr_packet_h */ diff --git a/src/vnet/srv6/sr_policy.md b/src/vnet/srv6/sr_policy.md new file mode 100644 index 00000000..521b8461 --- /dev/null +++ b/src/vnet/srv6/sr_policy.md @@ -0,0 +1,56 @@ +# Creating a SR Policy {#srv6_policy_doc} + +An SR Policy is defined by a Binding SID and a weighted set of Segment Lists. + +A new SR policy is created with a first SID list using: + + sr policy add bsid 2001::1 next A1:: next B1:: next C1:: (weight 5) (fib-table 3) + +* The weight parameter is only used if more than one SID list is associated with the policy. +* The fib-table parameter specifies in which table (VRF) the Binding SID is to be installed. + +An SR policy is deleted with: + + sr policy del bsid 2001::1 + sr policy del index 1 + +The existing SR policies are listed with: + + show sr policies + +## Adding/Removing SID Lists from an SR policy + +An additional SID list is associated with an existing SR policy with: + + sr policy mod bsid 2001::1 add sl next A2:: next B2:: next C2:: (weight 3) + sr policy mod index 3 add sl next A2:: next B2:: next C2:: (weight 3) + +Conversely, a SID list can be removed from an SR policy with: + + sr policy mod bsid 2001::1 del sl index 1 + sr policy mod index 3 del sl index 1 + +Note that this cannot be used to remove the last SID list of a policy. + +The weight of a SID list can also be modified with: + + sr policy mod bsid 2001::1 mod sl index 1 weight 4 + sr policy mod index 3 mod sl index 1 weight 4 + +## SR Policies: Spray policies + +Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them. + +SID list weights are ignored with this type of policies. + +A Spray policy is instantiated by appending the keyword **spray** to a regular SR policy command, as in: + + sr policy add bsid 2001::1 next A1:: next B1:: next C1:: spray + +Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node. + +## Encapsulation SR policies + +In case the user decides to create an SR policy an IPv6 Source Address must be specified for the encapsulated traffic. In order to do so the user might use the following command: + + set sr encaps source addr XXXX::YYYY diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c new file mode 100755 index 00000000..7a37a66b --- /dev/null +++ b/src/vnet/srv6/sr_policy_rewrite.c @@ -0,0 +1,3227 @@ +/* + * sr_policy_rewrite.c: ipv6 sr policy creation + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief SR policy creation and application + * + * Create an SR policy. + * An SR policy can be either of 'default' type or 'spray' type + * An SR policy has attached a list of SID lists. + * In case the SR policy is a default one it will load balance among them. + * An SR policy has associated a BindingSID. + * In case any packet arrives with IPv6 DA == BindingSID then the SR policy + * associated to such bindingSID will be applied to such packet. + * + * SR policies can be applied either by using IPv6 encapsulation or + * SRH insertion. Both methods can be found on this file. + * + * Traffic input usually is IPv6 packets. However it is possible to have + * IPv4 packets or L2 frames. (that are encapsulated into IPv6 with SRH) + * + * This file provides the appropiates VPP graph nodes to do any of these + * methods. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/** + * @brief SR policy rewrite trace + */ +typedef struct +{ + ip6_address_t src, dst; +} sr_policy_rewrite_trace_t; + +/* Graph arcs */ +#define foreach_sr_policy_rewrite_next \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(ERROR, "error-drop") + +typedef enum +{ +#define _(s,n) SR_POLICY_REWRITE_NEXT_##s, + foreach_sr_policy_rewrite_next +#undef _ + SR_POLICY_REWRITE_N_NEXT, +} sr_policy_rewrite_next_t; + +/* SR rewrite errors */ +#define foreach_sr_policy_rewrite_error \ +_(INTERNAL_ERROR, "Segment Routing undefined error") \ +_(BSID_ZERO, "BSID with SL = 0") \ +_(COUNTER_TOTAL, "SR steered IPv6 packets") \ +_(COUNTER_ENCAP, "SR: Encaps packets") \ +_(COUNTER_INSERT, "SR: SRH inserted packets") \ +_(COUNTER_BSID, "SR: BindingSID steered packets") + +typedef enum +{ +#define _(sym,str) SR_POLICY_REWRITE_ERROR_##sym, + foreach_sr_policy_rewrite_error +#undef _ + SR_POLICY_REWRITE_N_ERROR, +} sr_policy_rewrite_error_t; + +static char *sr_policy_rewrite_error_strings[] = { +#define _(sym,string) string, + foreach_sr_policy_rewrite_error +#undef _ +}; + +/** + * @brief Dynamically added SR SL DPO type + */ +static dpo_type_t sr_pr_encaps_dpo_type; +static dpo_type_t sr_pr_insert_dpo_type; +static dpo_type_t sr_pr_bsid_encaps_dpo_type; +static dpo_type_t sr_pr_bsid_insert_dpo_type; + +/** + * @brief IPv6 SA for encapsulated packets + */ +static ip6_address_t sr_pr_encaps_src; + +/******************* SR rewrite set encaps IPv6 source addr *******************/ +/* Note: This is temporal. We don't know whether to follow this path or + take the ip address of a loopback interface or even the OIF */ + +static clib_error_t * +set_sr_src_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "addr %U", unformat_ip6_address, &sr_pr_encaps_src)) + return 0; + else + return clib_error_return (0, "No address specified"); + } + return clib_error_return (0, "No address specified"); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_sr_src_command, static) = { + .path = "set sr encaps source", + .short_help = "set sr encaps source addr ", + .function = set_sr_src_command_fn, +}; +/* *INDENT-ON* */ + +/*********************** SR rewrite string computation ************************/ +/** + * @brief SR rewrite string computation for IPv6 encapsulation (inline) + * + * @param sl is a vector of IPv6 addresses composing the Segment List + * + * @return precomputed rewrite string for encapsulation + */ +static inline u8 * +compute_rewrite_encaps (ip6_address_t * sl) +{ + ip6_header_t *iph; + ip6_sr_header_t *srh; + ip6_address_t *addrp, *this_address; + u32 header_length = 0; + u8 *rs = NULL; + + header_length = 0; + header_length += IPv6_DEFAULT_HEADER_LENGTH; + if (vec_len (sl) > 1) + { + header_length += sizeof (ip6_sr_header_t); + header_length += vec_len (sl) * sizeof (ip6_address_t); + } + + vec_validate (rs, header_length - 1); + + iph = (ip6_header_t *) rs; + iph->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0 | ((6 & 0xF) << 28)); + iph->src_address.as_u64[0] = sr_pr_encaps_src.as_u64[0]; + iph->src_address.as_u64[1] = sr_pr_encaps_src.as_u64[1]; + iph->payload_length = header_length - IPv6_DEFAULT_HEADER_LENGTH; + iph->protocol = IP_PROTOCOL_IPV6; + iph->hop_limit = IPv6_DEFAULT_HOP_LIMIT; + + srh = (ip6_sr_header_t *) (iph + 1); + iph->protocol = IP_PROTOCOL_IPV6_ROUTE; + srh->protocol = IP_PROTOCOL_IPV6; + srh->type = ROUTING_HEADER_TYPE_SR; + srh->segments_left = vec_len (sl) - 1; + srh->first_segment = vec_len (sl) - 1; + srh->length = ((sizeof (ip6_sr_header_t) + + (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1; + srh->flags = 0x00; + srh->reserved = 0x00; + addrp = srh->segments + vec_len (sl) - 1; + vec_foreach (this_address, sl) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + iph->dst_address.as_u64[0] = sl->as_u64[0]; + iph->dst_address.as_u64[1] = sl->as_u64[1]; + return rs; +} + +/** + * @brief SR rewrite string computation for SRH insertion (inline) + * + * @param sl is a vector of IPv6 addresses composing the Segment List + * + * @return precomputed rewrite string for SRH insertion + */ +static inline u8 * +compute_rewrite_insert (ip6_address_t * sl) +{ + ip6_sr_header_t *srh; + ip6_address_t *addrp, *this_address; + u32 header_length = 0; + u8 *rs = NULL; + + header_length = 0; + header_length += sizeof (ip6_sr_header_t); + header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t); + + vec_validate (rs, header_length - 1); + + srh = (ip6_sr_header_t *) rs; + srh->type = ROUTING_HEADER_TYPE_SR; + srh->segments_left = vec_len (sl); + srh->first_segment = vec_len (sl); + srh->length = ((sizeof (ip6_sr_header_t) + + ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1; + srh->flags = 0x00; + srh->reserved = 0x0000; + addrp = srh->segments + vec_len (sl); + vec_foreach (this_address, sl) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + return rs; +} + +/** + * @brief SR rewrite string computation for SRH insertion with BSID (inline) + * + * @param sl is a vector of IPv6 addresses composing the Segment List + * + * @return precomputed rewrite string for SRH insertion with BSID + */ +static inline u8 * +compute_rewrite_bsid (ip6_address_t * sl) +{ + ip6_sr_header_t *srh; + ip6_address_t *addrp, *this_address; + u32 header_length = 0; + u8 *rs = NULL; + + header_length = 0; + header_length += sizeof (ip6_sr_header_t); + header_length += vec_len (sl) * sizeof (ip6_address_t); + + vec_validate (rs, header_length - 1); + + srh = (ip6_sr_header_t *) rs; + srh->type = ROUTING_HEADER_TYPE_SR; + srh->segments_left = vec_len (sl) - 1; + srh->first_segment = vec_len (sl) - 1; + srh->length = ((sizeof (ip6_sr_header_t) + + (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1; + srh->flags = 0x00; + srh->reserved = 0x0000; + addrp = srh->segments + vec_len (sl) - 1; + vec_foreach (this_address, sl) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + return rs; +} + +/*************************** SR LB helper functions **************************/ +/** + * @brief Creates a Segment List and adds it to an SR policy + * + * Creates a Segment List and adds it to the SR policy. Notice that the SL are + * not necessarily unique. Hence there might be two Segment List within the + * same SR Policy with exactly the same segments and same weight. + * + * @param sr_policy is the SR policy where the SL will be added + * @param sl is a vector of IPv6 addresses composing the Segment List + * @param weight is the weight of the SegmentList (for load-balancing purposes) + * @param is_encap represents the mode (SRH insertion vs Encapsulation) + * + * @return pointer to the just created segment list + */ +static inline ip6_sr_sl_t * +create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight, + u8 is_encap) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_sl_t *segment_list; + + pool_get (sm->sid_lists, segment_list); + memset (segment_list, 0, sizeof (*segment_list)); + + vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists); + + /* Fill in segment list */ + segment_list->weight = + (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT); + segment_list->segments = vec_dup (sl); + + if (is_encap) + { + segment_list->rewrite = compute_rewrite_encaps (sl); + segment_list->rewrite_bsid = segment_list->rewrite; + } + else + { + segment_list->rewrite = compute_rewrite_insert (sl); + segment_list->rewrite_bsid = compute_rewrite_bsid (sl); + } + + /* Create DPO */ + dpo_reset (&segment_list->bsid_dpo); + dpo_reset (&segment_list->ip6_dpo); + dpo_reset (&segment_list->ip4_dpo); + + if (is_encap) + { + dpo_set (&segment_list->ip6_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP6, + segment_list - sm->sid_lists); + dpo_set (&segment_list->ip4_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP4, + segment_list - sm->sid_lists); + dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_encaps_dpo_type, + DPO_PROTO_IP6, segment_list - sm->sid_lists); + } + else + { + dpo_set (&segment_list->ip6_dpo, sr_pr_insert_dpo_type, DPO_PROTO_IP6, + segment_list - sm->sid_lists); + dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_insert_dpo_type, + DPO_PROTO_IP6, segment_list - sm->sid_lists); + } + + return segment_list; +} + +/** + * @brief Updates the Load Balancer after an SR Policy change + * + * @param sr_policy is the modified SR Policy + */ +static inline void +update_lb (ip6_sr_policy_t * sr_policy) +{ + flow_hash_config_t fhc; + u32 *sl_index; + ip6_sr_sl_t *segment_list; + ip6_sr_main_t *sm = &sr_main; + load_balance_path_t path; + path.path_index = FIB_NODE_INDEX_INVALID; + load_balance_path_t *ip4_path_vector = 0; + load_balance_path_t *ip6_path_vector = 0; + load_balance_path_t *b_path_vector = 0; + + /* In case LB does not exist, create it */ + if (!dpo_id_is_valid (&sr_policy->bsid_dpo)) + { + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = sr_policy->bsid, + } + }; + + /* Add FIB entry for BSID */ + fhc = fib_table_get_flow_hash_config (sr_policy->fib_table, + dpo_proto_to_fib (DPO_PROTO_IP6)); + + dpo_set (&sr_policy->bsid_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6, + load_balance_create (0, DPO_PROTO_IP6, fhc)); + + dpo_set (&sr_policy->ip6_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6, + load_balance_create (0, DPO_PROTO_IP6, fhc)); + + /* Update FIB entry's to point to the LB DPO in the main FIB and hidden one */ + fib_table_entry_special_dpo_update (fib_table_find (FIB_PROTOCOL_IP6, + sr_policy->fib_table), + &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->bsid_dpo); + + fib_table_entry_special_dpo_update (sm->fib_table_ip6, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->ip6_dpo); + + if (sr_policy->is_encap) + { + dpo_set (&sr_policy->ip4_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP4, + load_balance_create (0, DPO_PROTO_IP4, fhc)); + + fib_table_entry_special_dpo_update (sm->fib_table_ip4, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->ip4_dpo); + } + + } + + /* Create the LB path vector */ + //path_vector = vec_new(load_balance_path_t, vec_len(sr_policy->segments_lists)); + vec_foreach (sl_index, sr_policy->segments_lists) + { + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + path.path_dpo = segment_list->bsid_dpo; + path.path_weight = segment_list->weight; + vec_add1 (b_path_vector, path); + path.path_dpo = segment_list->ip6_dpo; + vec_add1 (ip6_path_vector, path); + if (sr_policy->is_encap) + { + path.path_dpo = segment_list->ip4_dpo; + vec_add1 (ip4_path_vector, path); + } + } + + /* Update LB multipath */ + load_balance_multipath_update (&sr_policy->bsid_dpo, b_path_vector, + LOAD_BALANCE_FLAG_NONE); + load_balance_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector, + LOAD_BALANCE_FLAG_NONE); + if (sr_policy->is_encap) + load_balance_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector, + LOAD_BALANCE_FLAG_NONE); + + /* Cleanup */ + vec_free (b_path_vector); + vec_free (ip6_path_vector); + vec_free (ip4_path_vector); + +} + +/** + * @brief Updates the Replicate DPO after an SR Policy change + * + * @param sr_policy is the modified SR Policy (type spray) + */ +static inline void +update_replicate (ip6_sr_policy_t * sr_policy) +{ + u32 *sl_index; + ip6_sr_sl_t *segment_list; + ip6_sr_main_t *sm = &sr_main; + load_balance_path_t path; + path.path_index = FIB_NODE_INDEX_INVALID; + load_balance_path_t *b_path_vector = 0; + load_balance_path_t *ip6_path_vector = 0; + load_balance_path_t *ip4_path_vector = 0; + + /* In case LB does not exist, create it */ + if (!dpo_id_is_valid (&sr_policy->bsid_dpo)) + { + dpo_set (&sr_policy->bsid_dpo, DPO_REPLICATE, + DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6)); + + dpo_set (&sr_policy->ip6_dpo, DPO_REPLICATE, + DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6)); + + /* Update FIB entry's DPO to point to SR without LB */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = sr_policy->bsid, + } + }; + fib_table_entry_special_dpo_update (fib_table_find (FIB_PROTOCOL_IP6, + sr_policy->fib_table), + &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->bsid_dpo); + + fib_table_entry_special_dpo_update (sm->fib_table_ip6, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->ip6_dpo); + + if (sr_policy->is_encap) + { + dpo_set (&sr_policy->ip4_dpo, DPO_REPLICATE, DPO_PROTO_IP4, + replicate_create (0, DPO_PROTO_IP4)); + + fib_table_entry_special_dpo_update (sm->fib_table_ip4, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->ip4_dpo); + } + + } + + /* Create the replicate path vector */ + path.path_weight = 1; + vec_foreach (sl_index, sr_policy->segments_lists) + { + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + path.path_dpo = segment_list->bsid_dpo; + vec_add1 (b_path_vector, path); + path.path_dpo = segment_list->ip6_dpo; + vec_add1 (ip6_path_vector, path); + if (sr_policy->is_encap) + { + path.path_dpo = segment_list->ip4_dpo; + vec_add1 (ip4_path_vector, path); + } + } + + /* Update replicate multipath */ + replicate_multipath_update (&sr_policy->bsid_dpo, b_path_vector); + replicate_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector); + if (sr_policy->is_encap) + replicate_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector); +} + +/******************************* SR rewrite API *******************************/ +/* Three functions for handling sr policies: + * -> sr_policy_add + * -> sr_policy_del + * -> sr_policy_mod + * All of them are API. CLI function on sr_policy_command_fn */ + +/** + * @brief Create a new SR policy + * + * @param bsid is the bindingSID of the SR Policy + * @param segments is a vector of IPv6 address composing the segment list + * @param weight is the weight of the sid list. optional. + * @param behavior is the behavior of the SR policy. (default//spray) + * @param fib_table is the VRF where to install the FIB entry for the BSID + * @param is_encap (bool) whether SR policy should behave as Encap/SRH Insertion + * + * @return 0 if correct, else error + */ +int +sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments, + u32 weight, u8 behavior, u32 fib_table, u8 is_encap) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_policy_t *sr_policy = 0; + uword *p; + + /* Search for existing keys (BSID) */ + p = mhash_get (&sm->sr_policies_index_hash, bsid); + if (p) + { + /* Add SR policy that already exists; complain */ + return -12; + } + + /* Search collision in FIB entries */ + /* Explanation: It might be possible that some other entity has already + * created a route for the BSID. This in theory is impossible, but in + * practise we could see it. Assert it and scream if needed */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = *bsid, + } + }; + + /* Lookup the FIB index associated to the table selected */ + u32 fib_index = fib_table_find (FIB_PROTOCOL_IP6, + (fib_table != (u32) ~ 0 ? fib_table : 0)); + if (fib_index == ~0) + return -13; + + /* Lookup whether there exists an entry for the BSID */ + fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx); + if (FIB_NODE_INDEX_INVALID != fei) + return -12; //There is an entry for such lookup + + /* Add an SR policy object */ + pool_get (sm->sr_policies, sr_policy); + memset (sr_policy, 0, sizeof (*sr_policy)); + clib_memcpy (&sr_policy->bsid, bsid, sizeof (ip6_address_t)); + sr_policy->type = behavior; + sr_policy->fib_table = (fib_table != (u32) ~ 0 ? fib_table : 0); //Is default FIB 0 ? + sr_policy->is_encap = is_encap; + + /* Copy the key */ + mhash_set (&sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies, + NULL); + + /* Create a segment list and add the index to the SR policy */ + create_sl (sr_policy, segments, weight, is_encap); + + /* If FIB doesnt exist, create them */ + if (sm->fib_table_ip6 == (u32) ~ 0) + { + sm->fib_table_ip6 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, + "SRv6 steering of IP6 prefixes through BSIDs"); + sm->fib_table_ip4 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, + "SRv6 steering of IP4 prefixes through BSIDs"); + } + + /* Create IPv6 FIB for the BindingSID attached to the DPO of the only SL */ + if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) + update_lb (sr_policy); + else if (sr_policy->type == SR_POLICY_TYPE_SPRAY) + update_replicate (sr_policy); + return 0; +} + +/** + * @brief Delete a SR policy + * + * @param bsid is the bindingSID of the SR Policy + * @param index is the index of the SR policy + * + * @return 0 if correct, else error + */ +int +sr_policy_del (ip6_address_t * bsid, u32 index) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_policy_t *sr_policy = 0; + ip6_sr_sl_t *segment_list; + u32 *sl_index; + uword *p; + + if (bsid) + { + p = mhash_get (&sm->sr_policies_index_hash, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -1; + } + else + { + sr_policy = pool_elt_at_index (sm->sr_policies, index); + if (!sr_policy) + return -1; + } + + /* Remove BindingSID FIB entry */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = sr_policy->bsid, + } + , + }; + + fib_table_entry_special_remove (fib_table_find (FIB_PROTOCOL_IP6, + sr_policy->fib_table), + &pfx, FIB_SOURCE_SR); + + fib_table_entry_special_remove (sm->fib_table_ip6, &pfx, FIB_SOURCE_SR); + + if (sr_policy->is_encap) + fib_table_entry_special_remove (sm->fib_table_ip4, &pfx, FIB_SOURCE_SR); + + if (dpo_id_is_valid (&sr_policy->bsid_dpo)) + { + dpo_reset (&sr_policy->bsid_dpo); + dpo_reset (&sr_policy->ip4_dpo); + dpo_reset (&sr_policy->ip6_dpo); + } + + /* Clean SID Lists */ + vec_foreach (sl_index, sr_policy->segments_lists) + { + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + vec_free (segment_list->segments); + vec_free (segment_list->rewrite); + vec_free (segment_list->rewrite_bsid); + pool_put_index (sm->sid_lists, *sl_index); + } + + /* Remove SR policy entry */ + mhash_unset (&sm->sr_policies_index_hash, &sr_policy->bsid, NULL); + pool_put (sm->sr_policies, sr_policy); + + /* If FIB empty unlock it */ + if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies)) + { + fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6); + fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6); + sm->fib_table_ip6 = (u32) ~ 0; + sm->fib_table_ip4 = (u32) ~ 0; + } + + return 0; +} + +/** + * @brief Modify an existing SR policy + * + * The possible modifications are adding a new Segment List, modifying an + * existing Segment List (modify the weight only) and delete a given + * Segment List from the SR Policy. + * + * @param bsid is the bindingSID of the SR Policy + * @param index is the index of the SR policy + * @param fib_table is the VRF where to install the FIB entry for the BSID + * @param operation is the operation to perform (among the top ones) + * @param segments is a vector of IPv6 address composing the segment list + * @param sl_index is the index of the Segment List to modify/delete + * @param weight is the weight of the sid list. optional. + * @param is_encap Mode. Encapsulation or SRH insertion. + * + * @return 0 if correct, else error + */ +int +sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table, + u8 operation, ip6_address_t * segments, u32 sl_index, + u32 weight) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_policy_t *sr_policy = 0; + ip6_sr_sl_t *segment_list; + u32 *sl_index_iterate; + uword *p; + + if (bsid) + { + p = mhash_get (&sm->sr_policies_index_hash, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -1; + } + else + { + sr_policy = pool_elt_at_index (sm->sr_policies, index); + if (!sr_policy) + return -1; + } + + if (operation == 1) /* Add SR List to an existing SR policy */ + { + /* Create the new SL */ + segment_list = + create_sl (sr_policy, segments, weight, sr_policy->is_encap); + + /* Create a new LB DPO */ + if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) + update_lb (sr_policy); + else if (sr_policy->type == SR_POLICY_TYPE_SPRAY) + update_replicate (sr_policy); + } + else if (operation == 2) /* Delete SR List from an existing SR policy */ + { + /* Check that currently there are more than one SID list */ + if (vec_len (sr_policy->segments_lists) == 1) + return -21; + + /* Check that the SR list does exist and is assigned to the sr policy */ + vec_foreach (sl_index_iterate, sr_policy->segments_lists) + if (*sl_index_iterate == sl_index) + break; + + if (*sl_index_iterate != sl_index) + return -22; + + /* Remove the lucky SR list that is being kicked out */ + segment_list = pool_elt_at_index (sm->sid_lists, sl_index); + vec_free (segment_list->segments); + vec_free (segment_list->rewrite); + vec_free (segment_list->rewrite_bsid); + pool_put_index (sm->sid_lists, sl_index); + vec_del1 (sr_policy->segments_lists, + sl_index_iterate - sr_policy->segments_lists); + + /* Create a new LB DPO */ + if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) + update_lb (sr_policy); + else if (sr_policy->type == SR_POLICY_TYPE_SPRAY) + update_replicate (sr_policy); + } + else if (operation == 3) /* Modify the weight of an existing SR List */ + { + /* Find the corresponding SL */ + vec_foreach (sl_index_iterate, sr_policy->segments_lists) + if (*sl_index_iterate == sl_index) + break; + + if (*sl_index_iterate != sl_index) + return -32; + + /* Change the weight */ + segment_list = pool_elt_at_index (sm->sid_lists, sl_index); + segment_list->weight = weight; + + /* Update LB */ + if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) + update_lb (sr_policy); + } + else /* Incorrect op. */ + return -1; + + return 0; +} + +/** + * @brief CLI for 'sr policies' command family + */ +static clib_error_t * +sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int rv = -1; + char is_del = 0, is_add = 0, is_mod = 0; + char policy_set = 0; + ip6_address_t bsid, next_address; + u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0; + u32 weight = (u32) ~ 0, fib_table = (u32) ~ 0; + ip6_address_t *segments = 0, *this_seg; + u8 operation = 0; + char is_encap = 1; + char is_spray = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (!is_add && !is_mod && !is_del && unformat (input, "add")) + is_add = 1; + else if (!is_add && !is_mod && !is_del && unformat (input, "del")) + is_del = 1; + else if (!is_add && !is_mod && !is_del && unformat (input, "mod")) + is_mod = 1; + else if (!policy_set + && unformat (input, "bsid %U", unformat_ip6_address, &bsid)) + policy_set = 1; + else if (!is_add && !policy_set + && unformat (input, "index %d", &sr_policy_index)) + policy_set = 1; + else if (unformat (input, "weight %d", &weight)); + else + if (unformat (input, "next %U", unformat_ip6_address, &next_address)) + { + vec_add2 (segments, this_seg, 1); + clib_memcpy (this_seg->as_u8, next_address.as_u8, + sizeof (*this_seg)); + } + else if (unformat (input, "add sl")) + operation = 1; + else if (unformat (input, "del sl index %d", &sl_index)) + operation = 2; + else if (unformat (input, "mod sl index %d", &sl_index)) + operation = 3; + else if (fib_table == (u32) ~ 0 + && unformat (input, "fib-table %d", &fib_table)); + else if (unformat (input, "encap")) + is_encap = 1; + else if (unformat (input, "insert")) + is_encap = 0; + else if (unformat (input, "spray")) + is_spray = 1; + else + break; + } + + if (!is_add && !is_mod && !is_del) + return clib_error_return (0, "Incorrect CLI"); + + if (!policy_set) + return clib_error_return (0, "No SR policy BSID or index specified"); + + if (is_add) + { + if (vec_len (segments) == 0) + return clib_error_return (0, "No Segment List specified"); + rv = sr_policy_add (&bsid, segments, weight, + (is_spray ? SR_POLICY_TYPE_SPRAY : + SR_POLICY_TYPE_DEFAULT), fib_table, is_encap); + } + else if (is_del) + rv = sr_policy_del ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid), + sr_policy_index); + else if (is_mod) + { + if (!operation) + return clib_error_return (0, "No SL modification specified"); + if (operation != 1 && sl_index == (u32) ~ 0) + return clib_error_return (0, "No Segment List index specified"); + if (operation == 1 && vec_len (segments) == 0) + return clib_error_return (0, "No Segment List specified"); + if (operation == 3 && weight == (u32) ~ 0) + return clib_error_return (0, "No new weight for the SL specified"); + rv = sr_policy_mod ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid), + sr_policy_index, fib_table, operation, segments, + sl_index, weight); + } + + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -12: + return clib_error_return (0, + "There is already a FIB entry for the BindingSID address.\n" + "The SR policy could not be created."); + case -13: + return clib_error_return (0, "The specified FIB table does not exist."); + case -21: + return clib_error_return (0, + "The selected SR policy only contains ONE segment list. " + "Please remove the SR policy instead"); + case -22: + return clib_error_return (0, + "Could not delete the segment list. " + "It is not associated with that SR policy."); + case -32: + return clib_error_return (0, + "Could not modify the segment list. " + "The given SL is not associated with such SR policy."); + default: + return clib_error_return (0, "BUG: sr policy returns %d", rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_policy_command, static) = { + .path = "sr policy", + .short_help = "sr policy [add||del||mod] [bsid 2001::1||index 5] " + "next A:: next B:: next C:: (weight 1) (fib-table 2) (encap|insert)", + .long_help = + "Manipulation of SR policies.\n" + "A Segment Routing policy may contain several SID lists. Each SID list has\n" + "an associated weight (default 1), which will result in wECMP (uECMP).\n" + "Segment Routing policies might be of type encapsulation or srh insertion\n" + "Each SR policy will be associated with a unique BindingSID.\n" + "A BindingSID is a locally allocated SegmentID. For every packet that arrives\n" + "with IPv6_DA:BSID such traffic will be steered into the SR policy.\n" + "The add command will create a SR policy with its first segment list (sl)\n" + "The mod command allows you to add, remove, or modify the existing segment lists\n" + "within an SR policy.\n" + "The del command allows you to delete a SR policy along with all its associated\n" + "SID lists.\n", + .function = sr_policy_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief CLI to display onscreen all the SR policies + */ +static clib_error_t * +show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + u32 *sl_index; + ip6_sr_sl_t *segment_list = 0; + ip6_sr_policy_t *sr_policy = 0; + ip6_sr_policy_t **vec_policies = 0; + ip6_address_t *addr; + u8 *s; + int i = 0; + + vlib_cli_output (vm, "SR policies:"); + + /* *INDENT-OFF* */ + pool_foreach (sr_policy, sm->sr_policies, + {vec_add1 (vec_policies, sr_policy); } ); + /* *INDENT-ON* */ + + vec_foreach_index (i, vec_policies) + { + sr_policy = vec_policies[i]; + vlib_cli_output (vm, "[%u].-\tBSID: %U", + (u32) (sr_policy - sm->sr_policies), + format_ip6_address, &sr_policy->bsid); + vlib_cli_output (vm, "\tBehavior: %s", + (sr_policy->is_encap ? "Encapsulation" : + "SRH insertion")); + vlib_cli_output (vm, "\tType: %s", + (sr_policy->type == + SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray")); + vlib_cli_output (vm, "\tFIB table: %u", + (sr_policy->fib_table != + (u32) ~ 0 ? sr_policy->fib_table : 0)); + vlib_cli_output (vm, "\tSegment Lists:"); + vec_foreach (sl_index, sr_policy->segments_lists) + { + s = NULL; + s = format (s, "\t[%u].- ", *sl_index); + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + s = format (s, "< "); + vec_foreach (addr, segment_list->segments) + { + s = format (s, "%U, ", format_ip6_address, addr); + } + s = format (s, "\b\b > "); + s = format (s, "weight: %u", segment_list->weight); + vlib_cli_output (vm, " %s", s); + } + vlib_cli_output (vm, "-----------"); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_policies_command, static) = { + .path = "show sr policies", + .short_help = "show sr policies", + .function = show_sr_policies_command_fn, +}; +/* *INDENT-ON* */ + +/*************************** SR rewrite graph node ****************************/ +/** + * @brief Trace for the SR Policy Rewrite graph node + */ +static u8 * +format_sr_policy_rewrite_trace (u8 * s, va_list * args) +{ + //TODO + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sr_policy_rewrite_trace_t *t = va_arg (*args, sr_policy_rewrite_trace_t *); + + s = format + (s, "SR-policy-rewrite: src %U dst %U", + format_ip6_address, &t->src, format_ip6_address, &t->dst); + + return s; +} + +/** + * @brief IPv6 encapsulation processing as per RFC2473 + */ +static_always_inline void +encaps_processing_v6 (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, ip6_header_t * ip0_encap) +{ + u32 new_l0; + + ip0_encap->hop_limit -= 1; + new_l0 = + ip0->payload_length + sizeof (ip6_header_t) + + clib_net_to_host_u16 (ip0_encap->payload_length); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip0->ip_version_traffic_class_and_flow_label = + ip0_encap->ip_version_traffic_class_and_flow_label; +} + +/** + * @brief Graph node for applying a SR policy into an IPv6 packet. Encapsulation + */ +static uword +sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int encap_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl0->rewrite)); + ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl1->rewrite)); + ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl2->rewrite)); + ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl3->rewrite)); + + ip0_encap = vlib_buffer_get_current (b0); + ip1_encap = vlib_buffer_get_current (b1); + ip2_encap = vlib_buffer_get_current (b2); + ip3_encap = vlib_buffer_get_current (b3); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite), + sl1->rewrite, vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite), + sl2->rewrite, vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite), + sl3->rewrite, vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + encaps_processing_v6 (node, b0, ip0, ip0_encap); + encaps_processing_v6 (node, b1, ip1, ip1_encap); + encaps_processing_v6 (node, b2, ip2, ip2_encap); + encaps_processing_v6 (node, b3, ip3, ip3_encap); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + encap_pkts += 4; + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0, *ip0_encap = 0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl0->rewrite)); + + ip0_encap = vlib_buffer_get_current (b0); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + encaps_processing_v6 (node, b0, ip0, ip0_encap); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + encap_pkts++; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + encap_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = { + .function = sr_policy_rewrite_encaps, + .name = "sr-pl-rewrite-encaps", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief IPv4 encapsulation processing as per RFC2473 + */ +static_always_inline void +encaps_processing_v4 (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, ip4_header_t * ip0_encap) +{ + u32 new_l0; + ip6_sr_header_t *sr0; + + u32 checksum0; + + /* Inner IPv4: Decrement TTL & update checksum */ + ip0_encap->ttl -= 1; + checksum0 = ip0_encap->checksum + clib_host_to_net_u16 (0x0100); + checksum0 += checksum0 >= 0xffff; + ip0_encap->checksum = checksum0; + + /* Outer IPv6: Update length, FL, proto */ + new_l0 = ip0->payload_length + clib_net_to_host_u16 (ip0_encap->length); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip0->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0 | ((6 & 0xF) << 28) | + ((ip0_encap->tos & 0xFF) << 20)); + sr0 = (void *) (ip0 + 1); + sr0->protocol = IP_PROTOCOL_IP_IN_IP; +} + +/** + * @brief Graph node for applying a SR policy into an IPv4 packet. Encapsulation + */ +static uword +sr_policy_rewrite_encaps_v4 (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int encap_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip4_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl0->rewrite)); + ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl1->rewrite)); + ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl2->rewrite)); + ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl3->rewrite)); + + ip0_encap = vlib_buffer_get_current (b0); + ip1_encap = vlib_buffer_get_current (b1); + ip2_encap = vlib_buffer_get_current (b2); + ip3_encap = vlib_buffer_get_current (b3); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite), + sl1->rewrite, vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite), + sl2->rewrite, vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite), + sl3->rewrite, vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + encaps_processing_v4 (node, b0, ip0, ip0_encap); + encaps_processing_v4 (node, b1, ip1, ip1_encap); + encaps_processing_v4 (node, b2, ip2, ip2_encap); + encaps_processing_v4 (node, b3, ip3, ip3_encap); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + encap_pkts += 4; + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip4_header_t *ip0_encap = 0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl0->rewrite)); + + ip0_encap = vlib_buffer_get_current (b0); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + encaps_processing_v4 (node, b0, ip0, ip0_encap); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + encap_pkts++; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + encap_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = { + .function = sr_policy_rewrite_encaps_v4, + .name = "sr-pl-rewrite-encaps-v4", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +always_inline u32 +ip_flow_hash (void *data) +{ + ip4_header_t *iph = (ip4_header_t *) data; + + if ((iph->ip_version_and_header_length & 0xF0) == 0x40) + return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT); + else + return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT); +} + +always_inline u64 +mac_to_u64 (u8 * m) +{ + return (*((u64 *) m) & 0xffffffffffff); +} + +always_inline u32 +l2_flow_hash (vlib_buffer_t * b0) +{ + ethernet_header_t *eh; + u64 a, b, c; + uword is_ip, eh_size; + u16 eh_type; + + eh = vlib_buffer_get_current (b0); + eh_type = clib_net_to_host_u16 (eh->type); + eh_size = ethernet_buffer_header_size (b0); + + is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6); + + /* since we have 2 cache lines, use them */ + if (is_ip) + a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size); + else + a = eh->type; + + b = mac_to_u64 ((u8 *) eh->dst_address); + c = mac_to_u64 ((u8 *) eh->src_address); + hash_mix64 (a, b, c); + + return (u32) c; +} + +/** + * @brief Graph node for applying a SR policy into a L2 frame + */ +static uword +sr_policy_rewrite_encaps_l2 (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int encap_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ethernet_header_t *en0, *en1, *en2, *en3; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_sr_policy_t *sp0, *sp1, *sp2, *sp3; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sp0 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b0)->sw_if_index + [VLIB_RX]]); + + sp1 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b1)->sw_if_index + [VLIB_RX]]); + + sp2 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b2)->sw_if_index + [VLIB_RX]]); + + sp3 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b3)->sw_if_index + [VLIB_RX]]); + + if (vec_len (sp0->segments_lists) == 1) + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0]; + else + { + vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash & + (vec_len (sp0->segments_lists) - 1))]; + } + + if (vec_len (sp1->segments_lists) == 1) + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = sp1->segments_lists[1]; + else + { + vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1); + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = + sp1->segments_lists[(vnet_buffer (b1)->ip.flow_hash & + (vec_len (sp1->segments_lists) - 1))]; + } + + if (vec_len (sp2->segments_lists) == 1) + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = sp2->segments_lists[2]; + else + { + vnet_buffer (b2)->ip.flow_hash = l2_flow_hash (b2); + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = + sp2->segments_lists[(vnet_buffer (b2)->ip.flow_hash & + (vec_len (sp2->segments_lists) - 1))]; + } + + if (vec_len (sp3->segments_lists) == 1) + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = sp3->segments_lists[3]; + else + { + vnet_buffer (b3)->ip.flow_hash = l2_flow_hash (b3); + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = + sp3->segments_lists[(vnet_buffer (b3)->ip.flow_hash & + (vec_len (sp3->segments_lists) - 1))]; + } + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl0->rewrite)); + ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl1->rewrite)); + ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl2->rewrite)); + ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl3->rewrite)); + + en0 = vlib_buffer_get_current (b0); + en1 = vlib_buffer_get_current (b1); + en2 = vlib_buffer_get_current (b2); + en3 = vlib_buffer_get_current (b3); + + clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite, + vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) en1) - vec_len (sl1->rewrite), sl1->rewrite, + vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) en2) - vec_len (sl2->rewrite), sl2->rewrite, + vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) en3) - vec_len (sl3->rewrite), sl3->rewrite, + vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + ip0->payload_length = + clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t)); + ip1->payload_length = + clib_host_to_net_u16 (b1->current_length - sizeof (ip6_header_t)); + ip2->payload_length = + clib_host_to_net_u16 (b2->current_length - sizeof (ip6_header_t)); + ip3->payload_length = + clib_host_to_net_u16 (b3->current_length - sizeof (ip6_header_t)); + + sr0 = (void *) (ip0 + 1); + sr1 = (void *) (ip1 + 1); + sr2 = (void *) (ip2 + 1); + sr3 = (void *) (ip3 + 1); + + sr0->protocol = sr1->protocol = sr2->protocol = sr3->protocol = + IP_PROTOCOL_IP6_NONXT; + + /* Which Traffic class and flow label do I set ? */ + //ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(0|((6&0xF)<<28)|((ip0_encap->tos&0xFF)<<20)); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + encap_pkts += 4; + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_sr_header_t *sr0; + ethernet_header_t *en0; + ip6_sr_policy_t *sp0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + + /* Find the SR policy */ + sp0 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b0)->sw_if_index + [VLIB_RX]]); + + /* In case there is more than one SL, LB among them */ + if (vec_len (sp0->segments_lists) == 1) + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0]; + else + { + vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash & + (vec_len (sp0->segments_lists) - 1))]; + } + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl0->rewrite)); + + en0 = vlib_buffer_get_current (b0); + + clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite, + vec_len (sl0->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + ip0->payload_length = + clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t)); + + sr0 = (void *) (ip0 + 1); + sr0->protocol = IP_PROTOCOL_IP6_NONXT; + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + encap_pkts++; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + encap_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = { + .function = sr_policy_rewrite_encaps_l2, + .name = "sr-pl-rewrite-encaps-l2", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief Graph node for applying a SR policy into a packet. SRH insertion. + */ +static uword +sr_policy_rewrite_insert (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int insert_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + u16 new_l0, new_l1, new_l2, new_l3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl0->rewrite)); + ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl1->rewrite)); + ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl2->rewrite)); + ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr0 = + (ip6_sr_header_t *) (((void *) (ip0 + 1)) + + ip6_ext_header_len (ip0 + 1)); + else + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr1 = + (ip6_sr_header_t *) (((void *) (ip1 + 1)) + + ip6_ext_header_len (ip1 + 1)); + else + sr1 = (ip6_sr_header_t *) (ip1 + 1); + + if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr2 = + (ip6_sr_header_t *) (((void *) (ip2 + 1)) + + ip6_ext_header_len (ip2 + 1)); + else + sr2 = (ip6_sr_header_t *) (ip2 + 1); + + if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr3 = + (ip6_sr_header_t *) (((void *) (ip3 + 1)) + + ip6_ext_header_len (ip3 + 1)); + else + sr3 = (ip6_sr_header_t *) (ip3 + 1); + + clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0, + (void *) sr0 - (void *) ip0); + clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite), (u8 *) ip1, + (void *) sr1 - (void *) ip1); + clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite), (u8 *) ip2, + (void *) sr2 - (void *) ip2); + clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite), (u8 *) ip3, + (void *) sr3 - (void *) ip3); + + clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite, + vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite)), sl1->rewrite, + vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite)), sl2->rewrite, + vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite)), sl3->rewrite, + vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = ((void *) ip0) - vec_len (sl0->rewrite); + ip1 = ((void *) ip1) - vec_len (sl1->rewrite); + ip2 = ((void *) ip2) - vec_len (sl2->rewrite); + ip3 = ((void *) ip3) - vec_len (sl3->rewrite); + + ip0->hop_limit -= 1; + ip1->hop_limit -= 1; + ip2->hop_limit -= 1; + ip3->hop_limit -= 1; + + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + + vec_len (sl0->rewrite); + new_l1 = + clib_net_to_host_u16 (ip1->payload_length) + + vec_len (sl1->rewrite); + new_l2 = + clib_net_to_host_u16 (ip2->payload_length) + + vec_len (sl2->rewrite); + new_l3 = + clib_net_to_host_u16 (ip3->payload_length) + + vec_len (sl3->rewrite); + + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip1->payload_length = clib_host_to_net_u16 (new_l1); + ip2->payload_length = clib_host_to_net_u16 (new_l2); + ip3->payload_length = clib_host_to_net_u16 (new_l3); + + sr0 = ((void *) sr0) - vec_len (sl0->rewrite); + sr1 = ((void *) sr1) - vec_len (sl1->rewrite); + sr2 = ((void *) sr2) - vec_len (sl2->rewrite); + sr3 = ((void *) sr3) - vec_len (sl3->rewrite); + + sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0]; + sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1]; + sr1->segments->as_u64[0] = ip1->dst_address.as_u64[0]; + sr1->segments->as_u64[1] = ip1->dst_address.as_u64[1]; + sr2->segments->as_u64[0] = ip2->dst_address.as_u64[0]; + sr2->segments->as_u64[1] = ip2->dst_address.as_u64[1]; + sr3->segments->as_u64[0] = ip3->dst_address.as_u64[0]; + sr3->segments->as_u64[1] = ip3->dst_address.as_u64[1]; + + ip0->dst_address.as_u64[0] = + (sr0->segments + sr0->segments_left)->as_u64[0]; + ip0->dst_address.as_u64[1] = + (sr0->segments + sr0->segments_left)->as_u64[1]; + ip1->dst_address.as_u64[0] = + (sr1->segments + sr1->segments_left)->as_u64[0]; + ip1->dst_address.as_u64[1] = + (sr1->segments + sr1->segments_left)->as_u64[1]; + ip2->dst_address.as_u64[0] = + (sr2->segments + sr2->segments_left)->as_u64[0]; + ip2->dst_address.as_u64[1] = + (sr2->segments + sr2->segments_left)->as_u64[1]; + ip3->dst_address.as_u64[0] = + (sr3->segments + sr3->segments_left)->as_u64[0]; + ip3->dst_address.as_u64[1] = + (sr3->segments + sr3->segments_left)->as_u64[1]; + + ip6_ext_header_t *ip_ext; + if (ip0 + 1 == (void *) sr0) + { + sr0->protocol = ip0->protocol; + ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip0 + 1); + sr0->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip1 + 1 == (void *) sr1) + { + sr1->protocol = ip1->protocol; + ip1->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip2 + 1); + sr2->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip2 + 1 == (void *) sr2) + { + sr2->protocol = ip2->protocol; + ip2->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip2 + 1); + sr2->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip3 + 1 == (void *) sr3) + { + sr3->protocol = ip3->protocol; + ip3->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip3 + 1); + sr3->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + insert_pkts += 4; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_sr_header_t *sr0 = 0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + u16 new_l0 = 0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr0 = + (ip6_sr_header_t *) (((void *) (ip0 + 1)) + + ip6_ext_header_len (ip0 + 1)); + else + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0, + (void *) sr0 - (void *) ip0); + clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite, + vec_len (sl0->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = ((void *) ip0) - vec_len (sl0->rewrite); + ip0->hop_limit -= 1; + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + + vec_len (sl0->rewrite); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + + sr0 = ((void *) sr0) - vec_len (sl0->rewrite); + sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0]; + sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1]; + + ip0->dst_address.as_u64[0] = + (sr0->segments + sr0->segments_left)->as_u64[0]; + ip0->dst_address.as_u64[1] = + (sr0->segments + sr0->segments_left)->as_u64[1]; + + if (ip0 + 1 == (void *) sr0) + { + sr0->protocol = ip0->protocol; + ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip6_ext_header_t *ip_ext = (void *) (ip0 + 1); + sr0->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + insert_pkts++; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + insert_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = { + .function = sr_policy_rewrite_insert, + .name = "sr-pl-rewrite-insert", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief Graph node for applying a SR policy into a packet. BSID - SRH insertion. + */ +static uword +sr_policy_rewrite_b_insert (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int insert_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + u16 new_l0, new_l1, new_l2, new_l3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl0->rewrite_bsid)); + ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl1->rewrite_bsid)); + ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl2->rewrite_bsid)); + ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl3->rewrite_bsid)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr0 = + (ip6_sr_header_t *) (((void *) (ip0 + 1)) + + ip6_ext_header_len (ip0 + 1)); + else + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr1 = + (ip6_sr_header_t *) (((void *) (ip1 + 1)) + + ip6_ext_header_len (ip1 + 1)); + else + sr1 = (ip6_sr_header_t *) (ip1 + 1); + + if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr2 = + (ip6_sr_header_t *) (((void *) (ip2 + 1)) + + ip6_ext_header_len (ip2 + 1)); + else + sr2 = (ip6_sr_header_t *) (ip2 + 1); + + if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr3 = + (ip6_sr_header_t *) (((void *) (ip3 + 1)) + + ip6_ext_header_len (ip3 + 1)); + else + sr3 = (ip6_sr_header_t *) (ip3 + 1); + + clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0, + (void *) sr0 - (void *) ip0); + clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite_bsid), (u8 *) ip1, + (void *) sr1 - (void *) ip1); + clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite_bsid), (u8 *) ip2, + (void *) sr2 - (void *) ip2); + clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite_bsid), (u8 *) ip3, + (void *) sr3 - (void *) ip3); + + clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)), + sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid)); + clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite_bsid)), + sl1->rewrite_bsid, vec_len (sl1->rewrite_bsid)); + clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite_bsid)), + sl2->rewrite_bsid, vec_len (sl2->rewrite_bsid)); + clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite_bsid)), + sl3->rewrite_bsid, vec_len (sl3->rewrite_bsid)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite_bsid)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite_bsid)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite_bsid)); + + ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid); + ip1 = ((void *) ip1) - vec_len (sl1->rewrite_bsid); + ip2 = ((void *) ip2) - vec_len (sl2->rewrite_bsid); + ip3 = ((void *) ip3) - vec_len (sl3->rewrite_bsid); + + ip0->hop_limit -= 1; + ip1->hop_limit -= 1; + ip2->hop_limit -= 1; + ip3->hop_limit -= 1; + + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + + vec_len (sl0->rewrite_bsid); + new_l1 = + clib_net_to_host_u16 (ip1->payload_length) + + vec_len (sl1->rewrite_bsid); + new_l2 = + clib_net_to_host_u16 (ip2->payload_length) + + vec_len (sl2->rewrite_bsid); + new_l3 = + clib_net_to_host_u16 (ip3->payload_length) + + vec_len (sl3->rewrite_bsid); + + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip1->payload_length = clib_host_to_net_u16 (new_l1); + ip2->payload_length = clib_host_to_net_u16 (new_l2); + ip3->payload_length = clib_host_to_net_u16 (new_l3); + + sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid); + sr1 = ((void *) sr1) - vec_len (sl1->rewrite_bsid); + sr2 = ((void *) sr2) - vec_len (sl2->rewrite_bsid); + sr3 = ((void *) sr3) - vec_len (sl3->rewrite_bsid); + + ip0->dst_address.as_u64[0] = + (sr0->segments + sr0->segments_left)->as_u64[0]; + ip0->dst_address.as_u64[1] = + (sr0->segments + sr0->segments_left)->as_u64[1]; + ip1->dst_address.as_u64[0] = + (sr1->segments + sr1->segments_left)->as_u64[0]; + ip1->dst_address.as_u64[1] = + (sr1->segments + sr1->segments_left)->as_u64[1]; + ip2->dst_address.as_u64[0] = + (sr2->segments + sr2->segments_left)->as_u64[0]; + ip2->dst_address.as_u64[1] = + (sr2->segments + sr2->segments_left)->as_u64[1]; + ip3->dst_address.as_u64[0] = + (sr3->segments + sr3->segments_left)->as_u64[0]; + ip3->dst_address.as_u64[1] = + (sr3->segments + sr3->segments_left)->as_u64[1]; + + ip6_ext_header_t *ip_ext; + if (ip0 + 1 == (void *) sr0) + { + sr0->protocol = ip0->protocol; + ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip0 + 1); + sr0->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip1 + 1 == (void *) sr1) + { + sr1->protocol = ip1->protocol; + ip1->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip2 + 1); + sr2->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip2 + 1 == (void *) sr2) + { + sr2->protocol = ip2->protocol; + ip2->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip2 + 1); + sr2->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip3 + 1 == (void *) sr3) + { + sr3->protocol = ip3->protocol; + ip3->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip3 + 1); + sr3->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + insert_pkts += 4; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_sr_header_t *sr0 = 0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + u16 new_l0 = 0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl0->rewrite_bsid)); + + ip0 = vlib_buffer_get_current (b0); + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr0 = + (ip6_sr_header_t *) (((void *) (ip0 + 1)) + + ip6_ext_header_len (ip0 + 1)); + else + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0, + (void *) sr0 - (void *) ip0); + clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)), + sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid)); + + ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid); + ip0->hop_limit -= 1; + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + + vec_len (sl0->rewrite_bsid); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + + sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid); + + ip0->dst_address.as_u64[0] = + (sr0->segments + sr0->segments_left)->as_u64[0]; + ip0->dst_address.as_u64[1] = + (sr0->segments + sr0->segments_left)->as_u64[1]; + + if (ip0 + 1 == (void *) sr0) + { + sr0->protocol = ip0->protocol; + ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip6_ext_header_t *ip_ext = (void *) (ip0 + 1); + sr0->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + insert_pkts++; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + insert_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = { + .function = sr_policy_rewrite_b_insert, + .name = "sr-pl-rewrite-b-insert", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief Function BSID encapsulation + */ +static_always_inline void +end_bsid_encaps_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_sr_header_t * sr0, u32 * next0) +{ + ip6_address_t *new_dst0; + + if (PREDICT_FALSE (!sr0)) + goto error_bsid_encaps; + + if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR)) + { + if (PREDICT_TRUE (sr0->segments_left != 0)) + { + sr0->segments_left -= 1; + new_dst0 = (ip6_address_t *) (sr0->segments); + new_dst0 += sr0->segments_left; + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + return; + } + } + +error_bsid_encaps: + *next0 = SR_POLICY_REWRITE_NEXT_ERROR; + b0->error = node->errors[SR_POLICY_REWRITE_ERROR_BSID_ZERO]; +} + +/** + * @brief Graph node for applying a SR policy BSID - Encapsulation + */ +static uword +sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int encap_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_ext_header_t *prev0, *prev1, *prev2, *prev3; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl0->rewrite)); + ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl1->rewrite)); + ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl2->rewrite)); + ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl3->rewrite)); + + ip0_encap = vlib_buffer_get_current (b0); + ip1_encap = vlib_buffer_get_current (b1); + ip2_encap = vlib_buffer_get_current (b2); + ip3_encap = vlib_buffer_get_current (b3); + + ip6_ext_header_find_t (ip0_encap, prev0, sr0, + IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip1_encap, prev1, sr1, + IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip2_encap, prev2, sr2, + IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip3_encap, prev3, sr3, + IP_PROTOCOL_IPV6_ROUTE); + + end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0); + end_bsid_encaps_srh_processing (node, b1, ip1_encap, sr1, &next1); + end_bsid_encaps_srh_processing (node, b2, ip2_encap, sr2, &next2); + end_bsid_encaps_srh_processing (node, b3, ip3_encap, sr3, &next3); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite), + sl1->rewrite, vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite), + sl2->rewrite, vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite), + sl3->rewrite, vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + encaps_processing_v6 (node, b0, ip0, ip0_encap); + encaps_processing_v6 (node, b1, ip1, ip1_encap); + encaps_processing_v6 (node, b2, ip2, ip2_encap); + encaps_processing_v6 (node, b3, ip3, ip3_encap); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + encap_pkts += 4; + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0, *ip0_encap = 0; + ip6_ext_header_t *prev0; + ip6_sr_header_t *sr0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= + vec_len (sl0->rewrite)); + + ip0_encap = vlib_buffer_get_current (b0); + ip6_ext_header_find_t (ip0_encap, prev0, sr0, + IP_PROTOCOL_IPV6_ROUTE); + end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + encaps_processing_v6 (node, b0, ip0, ip0_encap); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + encap_pkts++; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + encap_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = { + .function = sr_policy_rewrite_b_encaps, + .name = "sr-pl-rewrite-b-encaps", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/*************************** SR Segment Lists DPOs ****************************/ +static u8 * +format_sr_segment_list_dpo (u8 * s, va_list * args) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_address_t *addr; + ip6_sr_sl_t *sl; + + index_t index = va_arg (*args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (*args, u32); + s = format (s, "SR: Segment List index:[%d]", index); + s = format (s, "\n\tSegments:"); + + sl = pool_elt_at_index (sm->sid_lists, index); + + s = format (s, "< "); + vec_foreach (addr, sl->segments) + { + s = format (s, "%U, ", format_ip6_address, addr); + } + s = format (s, "\b\b > - "); + s = format (s, "Weight: %u", sl->weight); + + return s; +} + +const static dpo_vft_t sr_policy_rewrite_vft = { + .dv_lock = sr_dpo_lock, + .dv_unlock = sr_dpo_unlock, + .dv_format = format_sr_segment_list_dpo, +}; + +const static char *const sr_pr_encaps_ip6_nodes[] = { + "sr-pl-rewrite-encaps", + NULL, +}; + +const static char *const sr_pr_encaps_ip4_nodes[] = { + "sr-pl-rewrite-encaps-v4", + NULL, +}; + +const static char *const *const sr_pr_encaps_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_pr_encaps_ip6_nodes, + [DPO_PROTO_IP4] = sr_pr_encaps_ip4_nodes, +}; + +const static char *const sr_pr_insert_ip6_nodes[] = { + "sr-pl-rewrite-insert", + NULL, +}; + +const static char *const *const sr_pr_insert_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_pr_insert_ip6_nodes, +}; + +const static char *const sr_pr_bsid_insert_ip6_nodes[] = { + "sr-pl-rewrite-b-insert", + NULL, +}; + +const static char *const *const sr_pr_bsid_insert_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_pr_bsid_insert_ip6_nodes, +}; + +const static char *const sr_pr_bsid_encaps_ip6_nodes[] = { + "sr-pl-rewrite-b-encaps", + NULL, +}; + +const static char *const *const sr_pr_bsid_encaps_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_pr_bsid_encaps_ip6_nodes, +}; + +/********************* SR Policy Rewrite initialization ***********************/ +/** + * @brief SR Policy Rewrite initialization + */ +clib_error_t * +sr_policy_rewrite_init (vlib_main_t * vm) +{ + ip6_sr_main_t *sm = &sr_main; + + /* Init memory for sr policy keys (bsid <-> ip6_address_t) */ + mhash_init (&sm->sr_policies_index_hash, sizeof (uword), + sizeof (ip6_address_t)); + + /* Init SR VPO DPOs type */ + sr_pr_encaps_dpo_type = + dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_encaps_nodes); + + sr_pr_insert_dpo_type = + dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_insert_nodes); + + sr_pr_bsid_encaps_dpo_type = + dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_encaps_nodes); + + sr_pr_bsid_insert_dpo_type = + dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_insert_nodes); + + /* Register the L2 encaps node used in HW redirect */ + sm->l2_sr_policy_rewrite_index = sr_policy_rewrite_encaps_node.index; + + sm->fib_table_ip6 = (u32) ~ 0; + sm->fib_table_ip4 = (u32) ~ 0; + + return 0; +} + +VLIB_INIT_FUNCTION (sr_policy_rewrite_init); + + +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c new file mode 100755 index 00000000..a7903751 --- /dev/null +++ b/src/vnet/srv6/sr_steering.c @@ -0,0 +1,573 @@ +/* + * sr_steering.c: ipv6 segment routing steering into SR policy + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Packet steering into SR Policies + * + * This file is in charge of handling the FIB appropiatly to steer packets + * through SR Policies as defined in 'sr_policy_rewrite.c'. Notice that here + * we are only doing steering. SR policy application is done in + * sr_policy_rewrite.c + * + * Supports: + * - Steering of IPv6 traffic Destination Address based + * - Steering of IPv4 traffic Destination Address based + * - Steering of L2 frames, interface based (sw interface) + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/** + * @brief Steer traffic L2 and L3 traffic through a given SR policy + * + * @param is_del + * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index) + * @param sr_policy is the index of the SR Policy (alt to bsid) + * @param table_id is the VRF where to install the FIB entry for the BSID + * @param prefix is the IPv4/v6 address for L3 traffic type + * @param mask_width is the mask for L3 traffic type + * @param sw_if_index is the incoming interface for L2 traffic + * @param traffic_type describes the type of traffic + * + * @return 0 if correct, else error + */ +int +sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index, + u32 table_id, ip46_address_t * prefix, u32 mask_width, + u32 sw_if_index, u8 traffic_type) +{ + ip6_sr_main_t *sm = &sr_main; + sr_steering_key_t key; + ip6_sr_steering_policy_t *steer_pl; + fib_prefix_t pfx = { 0 }; + + ip6_sr_policy_t *sr_policy = 0; + uword *p = 0; + + memset (&key, 0, sizeof (sr_steering_key_t)); + + /* Compute the steer policy key */ + if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6) + { + key.l3.prefix.as_u64[0] = prefix->as_u64[0]; + key.l3.prefix.as_u64[1] = prefix->as_u64[1]; + key.l3.mask_width = mask_width; + key.l3.fib_table = (table_id != (u32) ~ 0 ? table_id : 0); + } + else if (traffic_type == SR_STEER_L2) + { + key.l2.sw_if_index = sw_if_index; + + /* Sanitise the SW_IF_INDEX */ + if (pool_is_free_index (sm->vnet_main->interface_main.sw_interfaces, + sw_if_index)) + return -3; + + vnet_sw_interface_t *sw = + vnet_get_sw_interface (sm->vnet_main, sw_if_index); + if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE) + return -3; + } + else + return -1; + + key.traffic_type = traffic_type; + + /* Search for the item */ + p = mhash_get (&sm->sr_steer_policies_hash, &key); + + if (p) + { + /* Retrieve Steer Policy function */ + steer_pl = pool_elt_at_index (sm->steer_policies, p[0]); + + if (is_del) + { + if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6; + + fib_table_entry_delete (fib_table_find + (FIB_PROTOCOL_IP6, + steer_pl->classify.l3.fib_table), + &pfx, FIB_SOURCE_SR); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4; + + fib_table_entry_delete (fib_table_find + (FIB_PROTOCOL_IP4, + steer_pl->classify.l3.fib_table), &pfx, + FIB_SOURCE_SR); + } + else if (steer_pl->classify.traffic_type == SR_STEER_L2) + { + /* Remove HW redirection */ + vnet_feature_enable_disable ("device-input", + "sr-policy-rewrite-encaps-l2", + sw_if_index, 0, 0, 0); + sm->sw_iface_sr_policies[sw_if_index] = ~(u32) 0; + + /* Remove promiscous mode from interface */ + vnet_main_t *vnm = vnet_get_main (); + ethernet_main_t *em = ðernet_main; + ethernet_interface_t *eif = + ethernet_get_interface (em, sw_if_index); + + if (!eif) + goto cleanup_error_redirection; + + ethernet_set_flags (vnm, sw_if_index, 0); + } + + /* Delete SR steering policy entry */ + pool_put (sm->steer_policies, steer_pl); + mhash_unset (&sm->sr_steer_policies_hash, &key, NULL); + + /* If no more SR policies or steering policies */ + if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies)) + { + fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6); + fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6); + sm->fib_table_ip6 = (u32) ~ 0; + sm->fib_table_ip4 = (u32) ~ 0; + } + + return 1; + } + else /* It means user requested to update an existing SR steering policy */ + { + /* Retrieve SR steering policy */ + if (bsid) + { + p = mhash_get (&sm->sr_policies_index_hash, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -2; + } + else + sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index); + + if (!sr_policy) + return -2; + + steer_pl->sr_policy = sr_policy - sm->sr_policies; + + /* Remove old FIB/hw redirection and create a new one */ + if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6; + + fib_table_entry_delete (fib_table_find + (FIB_PROTOCOL_IP6, + steer_pl->classify.l3.fib_table), + &pfx, FIB_SOURCE_SR); + + /* Create a new one */ + goto update_fib; + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4; + + fib_table_entry_delete (fib_table_find + (FIB_PROTOCOL_IP4, + steer_pl->classify.l3.fib_table), + &pfx, FIB_SOURCE_SR); + + /* Create a new one */ + goto update_fib; + } + else if (steer_pl->classify.traffic_type == SR_STEER_L2) + { + /* Update L2-HW redirection */ + goto update_fib; + } + } + } + else + /* delete; steering policy does not exist; complain */ + if (is_del) + return -4; + + /* Retrieve SR policy */ + if (bsid) + { + p = mhash_get (&sm->sr_policies_index_hash, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -2; + } + else + sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index); + + /* Create a new steering policy */ + pool_get (sm->steer_policies, steer_pl); + memset (steer_pl, 0, sizeof (*steer_pl)); + + if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6) + { + clib_memcpy (&steer_pl->classify.l3.prefix, prefix, + sizeof (ip46_address_t)); + steer_pl->classify.l3.mask_width = mask_width; + steer_pl->classify.l3.fib_table = + (table_id != (u32) ~ 0 ? table_id : 0); + steer_pl->classify.traffic_type = traffic_type; + } + else if (traffic_type == SR_STEER_L2) + { + steer_pl->classify.l2.sw_if_index = sw_if_index; + steer_pl->classify.traffic_type = traffic_type; + } + else + { + /* Incorrect API usage. Should never get here */ + pool_put (sm->steer_policies, steer_pl); + mhash_unset (&sm->sr_steer_policies_hash, &key, NULL); + return -1; + } + steer_pl->sr_policy = sr_policy - sm->sr_policies; + + /* Create and store key */ + mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies, + NULL); + + if (traffic_type == SR_STEER_L2) + { + if (!sr_policy->is_encap) + goto cleanup_error_encap; + + if (vnet_feature_enable_disable + ("device-input", "sr-pl-rewrite-encaps-l2", sw_if_index, 1, 0, 0)) + goto cleanup_error_redirection; + + /* Set promiscous mode on interface */ + vnet_main_t *vnm = vnet_get_main (); + ethernet_main_t *em = ðernet_main; + ethernet_interface_t *eif = ethernet_get_interface (em, sw_if_index); + + if (!eif) + goto cleanup_error_redirection; + + ethernet_set_flags (vnm, sw_if_index, + ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); + } + else if (traffic_type == SR_STEER_IPV4) + if (!sr_policy->is_encap) + goto cleanup_error_encap; + +update_fib: + /* FIB API calls - Recursive route through the BindingSID */ + if (traffic_type == SR_STEER_IPV6) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6; + + fib_table_entry_path_add (fib_table_find (FIB_PROTOCOL_IP6, + (table_id != + (u32) ~ 0 ? + table_id : 0)), + &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, + FIB_PROTOCOL_IP6, + (ip46_address_t *) & sr_policy->bsid, ~0, + sm->fib_table_ip6, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + } + else if (traffic_type == SR_STEER_IPV4) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4; + + fib_table_entry_path_add (fib_table_find (FIB_PROTOCOL_IP4, + (table_id != + (u32) ~ 0 ? + table_id : 0)), + &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, + FIB_PROTOCOL_IP6, + (ip46_address_t *) & sr_policy->bsid, ~0, + sm->fib_table_ip4, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + } + else if (traffic_type == SR_STEER_L2) + { + if (sw_if_index < vec_len (sm->sw_iface_sr_policies)) + sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy; + else + { + vec_resize (sm->sw_iface_sr_policies, + (pool_len (sm->vnet_main->interface_main.sw_interfaces) + - vec_len (sm->sw_iface_sr_policies))); + sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy; + } + } + + return 0; + +cleanup_error_encap: + pool_put (sm->steer_policies, steer_pl); + mhash_unset (&sm->sr_steer_policies_hash, &key, NULL); + return -5; + +cleanup_error_redirection: + pool_put (sm->steer_policies, steer_pl); + mhash_unset (&sm->sr_steer_policies_hash, &key, NULL); + return -3; +} + +static clib_error_t * +sr_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + + int is_del = 0; + + ip46_address_t prefix; + u32 dst_mask_width = 0; + u32 sw_if_index = (u32) ~ 0; + u8 traffic_type = 0; + u32 fib_table = (u32) ~ 0; + + ip6_address_t bsid; + u32 sr_policy_index = (u32) ~ 0; + + u8 sr_policy_set = 0; + + memset (&prefix, 0, sizeof (ip46_address_t)); + + int rv; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (!traffic_type + && unformat (input, "l3 %U/%d", unformat_ip6_address, + &prefix.ip6, &dst_mask_width)) + traffic_type = SR_STEER_IPV6; + else if (!traffic_type + && unformat (input, "l3 %U/%d", unformat_ip4_address, + &prefix.ip4, &dst_mask_width)) + traffic_type = SR_STEER_IPV4; + else if (!traffic_type + && unformat (input, "l2 %U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + traffic_type = SR_STEER_L2; + else if (!sr_policy_set + && unformat (input, "via sr policy index %d", + &sr_policy_index)) + sr_policy_set = 1; + else if (!sr_policy_set + && unformat (input, "via sr policy bsid %U", + unformat_ip6_address, &bsid)) + sr_policy_set = 1; + else if (fib_table == (u32) ~ 0 + && unformat (input, "fib-table %d", &fib_table)); + else + break; + } + + if (!traffic_type) + return clib_error_return (0, "No L2/L3 traffic specified"); + if (!sr_policy_set) + return clib_error_return (0, "No SR policy specified"); + + /* Make sure that the prefixes are clean */ + if (traffic_type == SR_STEER_IPV4) + { + u32 mask = + (dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0); + prefix.ip4.as_u32 &= mask; + } + else if (traffic_type == SR_STEER_IPV6) + { + ip6_address_t mask; + ip6_address_mask_from_width (&mask, dst_mask_width); + ip6_address_mask (&prefix.ip6, &mask); + } + + rv = + sr_steering_policy (is_del, (sr_policy_index == ~(u32) 0 ? &bsid : NULL), + sr_policy_index, fib_table, &prefix, dst_mask_width, + sw_if_index, traffic_type); + + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -1: + return clib_error_return (0, "Incorrect API usage."); + case -2: + return clib_error_return (0, + "The requested SR policy could not be located. Review the BSID/index."); + case -3: + return clib_error_return (0, + "Unable to do SW redirect. Incorrect interface."); + case -4: + return clib_error_return (0, + "The requested SR steering policy could not be deleted."); + case -5: + return clib_error_return (0, + "The SR policy is not an encapsulation one."); + default: + return clib_error_return (0, "BUG: sr steer policy returns %d", rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_steer_policy_command, static) = { + .path = "sr steer", + .short_help = "sr steer (del) [l3 |l2 ]" + "via sr policy [index |bsid ]" + "(fib-table )", + .long_help = + "\tSteer a L2 or L3 traffic through an existing SR policy.\n" + "\tExamples:\n" + "\t\tsr steer l3 2001::/64 via sr_policy index 5\n" + "\t\tsr steer l3 2001::/64 via sr_policy bsid 2010::9999:1\n" + "\t\tsr steer l2 GigabitEthernet0/5/0 via sr_policy index 5\n" + "\t\tsr steer del l3 2001::/64 via sr_policy index 5\n", + .function = sr_steer_policy_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_sr_steering_policies_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_steering_policy_t **steer_policies = 0; + ip6_sr_steering_policy_t *steer_pl; + + vnet_main_t *vnm = vnet_get_main (); + + ip6_sr_policy_t *pl = 0; + int i; + + vlib_cli_output (vm, "SR steering policies:"); + /* *INDENT-OFF* */ + pool_foreach (steer_pl, sm->steer_policies, ({vec_add1(steer_policies, steer_pl);})); + /* *INDENT-ON* */ + vlib_cli_output (vm, "Traffic\t\tSR policy BSID"); + for (i = 0; i < vec_len (steer_policies); i++) + { + steer_pl = steer_policies[i]; + pl = pool_elt_at_index (sm->sr_policies, steer_pl->sr_policy); + if (steer_pl->classify.traffic_type == SR_STEER_L2) + { + vlib_cli_output (vm, "L2 %U\t%U", + format_vnet_sw_if_index_name, vnm, + steer_pl->classify.l2.sw_if_index, + format_ip6_address, &pl->bsid); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + vlib_cli_output (vm, "L3 %U/%d\t%U", + format_ip4_address, + &steer_pl->classify.l3.prefix.ip4, + steer_pl->classify.l3.mask_width, + format_ip6_address, &pl->bsid); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + vlib_cli_output (vm, "L3 %U/%d\t%U", + format_ip6_address, + &steer_pl->classify.l3.prefix.ip6, + steer_pl->classify.l3.mask_width, + format_ip6_address, &pl->bsid); + } + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_steering_policies_command, static) = { + .path = "show sr steering policies", + .short_help = "show sr steering policies", + .function = show_sr_steering_policies_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +sr_steering_init (vlib_main_t * vm) +{ + ip6_sr_main_t *sm = &sr_main; + + /* Init memory for function keys */ + mhash_init (&sm->sr_steer_policies_hash, sizeof (uword), + sizeof (sr_steering_key_t)); + + sm->sw_iface_sr_policies = 0; + + sm->vnet_main = vnet_get_main (); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_INIT_FUNCTION (sr_steering_init); +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VNET_FEATURE_INIT (sr_pl_rewrite_encaps_l2, static) = +{ + .arc_name = "device-input", + .node_name = "sr-pl-rewrite-encaps-l2", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; +/* *INDENT-ON* */ + +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/srv6/sr_steering.md b/src/vnet/srv6/sr_steering.md new file mode 100644 index 00000000..cf446f81 --- /dev/null +++ b/src/vnet/srv6/sr_steering.md @@ -0,0 +1,11 @@ +# Steering packets into a SR Policy {#srv6_steering_doc} + +To steer packets in Transit into an SR policy (T.Insert, T.Encaps and T.Encaps.L2 behaviors), the user needs to create an 'sr steering policy'. + + sr steer l3 2001::/64 via sr policy index 1 + sr steer l3 2001::/64 via sr policy bsid cafe::1 + sr steer l3 2001::/64 via sr policy bsid cafe::1 fib-table 3 + sr steer l3 10.0.0.0/16 via sr policy bsid cafe::1 + sr steer l2 TenGE0/1/0 via sr policy bsid cafe::1 + +Disclaimer: The T.Encaps.L2 will steer L2 frames into an SR Policy. Notice that creating an SR steering policy for L2 frames will actually automatically *put the interface into promiscous mode*. diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index 9d3abae5..566e22ec 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index baf45d5c..16d51225 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -53,7 +53,7 @@ #include #include #if WITH_LIBSSL > 0 -#include +#include #endif #include #include diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 000fe0d4..107e83f3 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 7c07c822..99ae4784 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -40,7 +40,7 @@ * LISP-GPE APIs: see .../src/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c} * SESSION APIs: .../vnet/session/{session.api session_api.c} * MPLS APIs: see .../src/vnet/mpls/{mpls.api, mpls_api.c} - * SR APIs: see .../src/vnet/sr/{sr.api, sr_api.c} + * SR APIs: see .../src/vnet/srv6/{sr.api, sr_api.c} * CLASSIFY APIs: see ... /src/vnet/classify/{classify.api, classify_api.c} * FLOW APIs: see ... /src/vnet/flow/{flow.api, flow_api.c} * DHCP APIs: see ... /src/vnet/dhcp/{dhcpk.api, dhcp_api.c} -- cgit 1.2.3-korg From 7537e717d1ca6de0e33478bc50b9f7125f04c808 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Thu, 27 Apr 2017 14:07:55 +0300 Subject: L2FIB:CLI/API to flush all non-static entries added CLI l2fib flush-mac all added API l2fib_flush_all flushes all non static l2fib entries on all valid BDs Change-Id: Ic963c88f4bed56308c03ab43106033132a0e87be Signed-off-by: Eyal Bari --- src/vnet/buffer.h | 3 +- src/vnet/l2/l2.api | 10 +++ src/vnet/l2/l2_api.c | 18 +++-- src/vnet/l2/l2_fib.c | 168 ++++++++++++++++++++++++++++------------------ src/vnet/l2/l2_fib.h | 21 +++++- src/vnet/l2/l2_input.c | 13 ++-- src/vnet/l2/l2_learn.c | 12 ++-- src/vpp/api/custom_dump.c | 12 ++++ 8 files changed, 169 insertions(+), 88 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 5d1b1c4d..ec5e2f75 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -176,8 +176,7 @@ typedef struct u16 bd_index; /* bridge-domain index */ u8 l2_len; /* ethernet header length */ u8 shg; /* split-horizon group */ - u8 bd_sn; /* bridge domain seq# */ - u8 int_sn; /* interface seq# */ + u16 l2fib_sn; /* l2fib bd/int seq_num */ } l2; /* l2tpv3 softwire encap, only valid there */ diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index db42d635..e9a1f361 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -76,6 +76,16 @@ autoreply define l2_fib_clear_table u32 context; }; +/** \brief L2 FIB flush all entries + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +autoreply define l2fib_flush_all +{ + u32 client_index; + u32 context; +}; + /** \brief L2 FIB flush bridge domain entries @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index 8cc7c794..5f371ccd 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -48,6 +48,7 @@ _(L2_XCONNECT_DUMP, l2_xconnect_dump) \ _(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \ _(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \ +_(L2FIB_FLUSH_ALL, l2fib_flush_all) \ _(L2FIB_FLUSH_INT, l2fib_flush_int) \ _(L2FIB_FLUSH_BD, l2fib_flush_bd) \ _(L2FIB_ADD_DEL, l2fib_add_del) \ @@ -106,11 +107,8 @@ vl_api_l2_fib_clear_table_t_handler (vl_api_l2_fib_clear_table_t * mp) int rv = 0; vl_api_l2_fib_clear_table_reply_t *rmp; - /* DAW-FIXME: This API should only clear non-static l2fib entries, but - * that is not currently implemented. When that TODO is fixed - * this call should be changed to pass 1 instead of 0. - */ - l2fib_clear_table (0); + /* Clear all MACs including static MACs */ + l2fib_clear_table (); REPLY_MACRO (VL_API_L2_FIB_CLEAR_TABLE_REPLY); } @@ -258,6 +256,16 @@ vl_api_l2fib_flush_int_t_handler (vl_api_l2fib_flush_int_t * mp) REPLY_MACRO (VL_API_L2FIB_FLUSH_INT_REPLY); } +static void +vl_api_l2fib_flush_all_t_handler (vl_api_l2fib_flush_all_t * mp) +{ + int rv = 0; + vl_api_l2fib_flush_all_reply_t *rmp; + + l2fib_flush_all_mac (vlib_get_main ()); + REPLY_MACRO (VL_API_L2FIB_FLUSH_ALL_REPLY); +} + static void vl_api_l2fib_flush_bd_t_handler (vl_api_l2fib_flush_bd_t * mp) { diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index 028a7326..d4207e35 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -54,7 +54,6 @@ typedef struct l2fib_main_t l2fib_main; - /** Format sw_if_index. If the value is ~0, use the text "N/A" */ u8 * format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args) @@ -198,7 +197,7 @@ show_l2fib (vlib_main_t * vm, key.fields.bd_index, result.fields.sw_if_index == ~0 ? -1 : result.fields.sw_if_index, - result.fields.bd_sn, result.fields.int_sn, + result.fields.sn.bd, result.fields.sn.swif, s, result.fields.static_mac ? "*" : "-", result.fields.filter ? "*" : "-", result.fields.bvi ? "*" : "-", @@ -259,22 +258,14 @@ VLIB_CLI_COMMAND (show_l2fib_cli, static) = { /* Remove all entries from the l2fib */ void -l2fib_clear_table (uint keep_static) +l2fib_clear_table (void) { l2fib_main_t *mp = &l2fib_main; - if (keep_static) - { - /* TODO: remove only non-static entries */ - } - else - { - /* Remove all entries */ - BV (clib_bihash_free) (&mp->mac_table); - BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table", - L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE); - } - + /* Remove all entries */ + BV (clib_bihash_free) (&mp->mac_table); + BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table", + L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE); l2learn_main.global_learn_count = 0; } @@ -285,7 +276,7 @@ static clib_error_t * clear_l2fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - l2fib_clear_table (0); + l2fib_clear_table (); return 0; } @@ -308,14 +299,25 @@ VLIB_CLI_COMMAND (clear_l2fib_cli, static) = { }; /* *INDENT-ON* */ +static inline l2fib_seq_num_t +l2fib_cur_seq_num (u32 bd_index, u32 sw_if_index) +{ + l2_input_config_t *int_config = l2input_intf_config (sw_if_index); + l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index); + /* *INDENT-OFF* */ + return (l2fib_seq_num_t) { + .swif = int_config->seq_num, + .bd = bd_config->seq_num, + }; + /* *INDENT-ON* */ +} /** * Add an entry to the l2fib. * If the entry already exists then overwrite it */ void -l2fib_add_entry (u64 mac, - u32 bd_index, +l2fib_add_entry (u64 mac, u32 bd_index, u32 sw_if_index, u32 static_mac, u32 filter_mac, u32 bvi_mac) { l2fib_entry_key_t key; @@ -334,14 +336,7 @@ l2fib_add_entry (u64 mac, result.fields.filter = filter_mac; result.fields.bvi = bvi_mac; if (!static_mac) - { - l2_input_config_t *int_config = l2input_intf_config (sw_if_index); - l2_bridge_domain_t *bd_config = - vec_elt_at_index (l2input_main.bd_configs, - bd_index); - result.fields.int_sn = int_config->seq_num; - result.fields.bd_sn = bd_config->seq_num; - } + result.fields.sn = l2fib_cur_seq_num (bd_index, sw_if_index); kv.key = key.raw; kv.value = result.raw; @@ -620,8 +615,8 @@ VLIB_CLI_COMMAND (l2fib_test_command, static) = { * Delete an entry from the l2fib. * Return 0 if the entry was deleted, or 1 if it was not found */ -u32 -l2fib_del_entry (u64 mac, u32 bd_index) +static u32 +l2fib_del_entry_by_key (u64 raw_key) { l2fib_entry_result_t result; @@ -629,7 +624,7 @@ l2fib_del_entry (u64 mac, u32 bd_index) BVT (clib_bihash_kv) kv; /* set up key */ - kv.key = l2fib_make_key ((u8 *) & mac, bd_index); + kv.key = raw_key; if (BV (clib_bihash_search) (&mp->mac_table, &kv, &kv)) return 1; @@ -650,6 +645,16 @@ l2fib_del_entry (u64 mac, u32 bd_index) return 0; } +/** + * Delete an entry from the l2fib. + * Return 0 if the entry was deleted, or 1 if it was not found + */ +u32 +l2fib_del_entry (u64 mac, u32 bd_index) +{ + return l2fib_del_entry_by_key (l2fib_make_key ((u8 *) & mac, bd_index)); +} + /** * Delete an entry from the L2FIB. * The CLI format is: @@ -735,29 +740,42 @@ l2fib_start_ager_scan (vlib_main_t * vm) } /** - Flush all learned MACs from an interface + Flush all non static MACs from an interface */ void l2fib_flush_int_mac (vlib_main_t * vm, u32 sw_if_index) { - l2_input_config_t *int_config; - int_config = l2input_intf_config (sw_if_index); + l2_input_config_t *int_config = l2input_intf_config (sw_if_index); int_config->seq_num += 1; l2fib_start_ager_scan (vm); } /** - Flush all learned MACs in a bridge domain + Flush all non static MACs in a bridge domain */ void l2fib_flush_bd_mac (vlib_main_t * vm, u32 bd_index) { - l2_bridge_domain_t *bd_config; - bd_config = l2input_bd_config (bd_index); + l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index); bd_config->seq_num += 1; l2fib_start_ager_scan (vm); } +/** + Flush all non static MACs - flushes all valid BDs +*/ +void +l2fib_flush_all_mac (vlib_main_t * vm) +{ + l2_bridge_domain_t *bd_config; + vec_foreach (bd_config, l2input_main.bd_configs) + if (bd_is_valid (bd_config)) + bd_config->seq_num += 1; + + l2fib_start_ager_scan (vm); +} + + /** Flush MACs, except static ones, associated with an interface The CLI format is: @@ -784,6 +802,35 @@ done: return error; } +/** + Flush all MACs, except static ones + The CLI format is: + l2fib flush-mac all +*/ +static clib_error_t * +l2fib_flush_mac_all (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + l2fib_flush_all_mac (vm); + return 0; +} + +/*? + * This command kick off ager to delete all existing MAC Address entries, + * except static ones, associated with an interface from the L2 FIB table. + * + * @cliexpar + * Example of how to flush MAC Address entries learned on an interface from the L2 FIB table: + * @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_flush_mac_all_cli, static) = { + .path = "l2fib flush-mac all", + .short_help = "l2fib flush-mac all", + .function = l2fib_flush_mac_all, +}; +/* *INDENT-ON* */ + /*? * This command kick off ager to delete all existing MAC Address entries, * except static ones, associated with an interface from the L2 FIB table. @@ -872,17 +919,8 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, { uword event_type, *event_data = 0; l2fib_main_t *msm = &l2fib_main; - l2_input_config_t *int_config; - l2_bridge_domain_t *bd_config; - BVT (clib_bihash) * h = &msm->mac_table; - clib_bihash_bucket_t *b; - BVT (clib_bihash_value) * v; - l2fib_entry_key_t key; - l2fib_entry_result_t result; - int i, j, k; bool enabled = 0; f64 start_time, last_run_duration = 0, t; - i16 delta; while (1) { @@ -911,6 +949,9 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, ASSERT (0); } last_run_duration = start_time = vlib_time_now (vm); + + BVT (clib_bihash) * h = &msm->mac_table; + int i, j, k; for (i = 0; i < h->nbuckets; i++) { /* Allow no more than 10us without a pause */ @@ -923,20 +964,22 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (i < (h->nbuckets - 3)) { - b = &h->buckets[i + 3]; + clib_bihash_bucket_t *b = &h->buckets[i + 3]; CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); b = &h->buckets[i + 1]; if (b->offset) { - v = BV (clib_bihash_get_value) (h, b->offset); + BVT (clib_bihash_value) * v = + BV (clib_bihash_get_value) (h, b->offset); CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, LOAD); } } - b = &h->buckets[i]; + clib_bihash_bucket_t *b = &h->buckets[i]; if (b->offset == 0) continue; - v = BV (clib_bihash_get_value) (h, b->offset); + BVT (clib_bihash_value) * v = + BV (clib_bihash_get_value) (h, b->offset); for (j = 0; j < (1 << b->log2_pages); j++) { for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) @@ -944,37 +987,32 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) continue; - key.raw = v->kvp[k].key; - result.raw = v->kvp[k].value; + l2fib_entry_key_t key = {.raw = v->kvp[k].key }; + l2fib_entry_result_t result = {.raw = v->kvp[k].value }; if (result.fields.static_mac) continue; - int_config = - l2input_intf_config (result.fields.sw_if_index); - bd_config = - vec_elt_at_index (l2input_main.bd_configs, - key.fields.bd_index); - - if ((result.fields.int_sn != int_config->seq_num) || - (result.fields.bd_sn != bd_config->seq_num)) + u32 bd_index = key.fields.bd_index; + u32 sw_if_index = result.fields.sw_if_index; + u16 sn = l2fib_cur_seq_num (bd_index, sw_if_index).as_u16; + if (result.fields.sn.as_u16 != sn) { - void *p = &key.fields.mac; - l2fib_del_entry (*(u64 *) p, key.fields.bd_index); + l2fib_del_entry_by_key (key.raw); continue; } + l2_bridge_domain_t *bd_config = + vec_elt_at_index (l2input_main.bd_configs, bd_index); if (bd_config->mac_age == 0) continue; - delta = (u8) (start_time / 60) - result.fields.timestamp; + i16 delta = + (u8) (start_time / 60) - result.fields.timestamp; delta += delta < 0 ? 256 : 0; if (delta > bd_config->mac_age) - { - void *p = &key.fields.mac; - l2fib_del_entry (*(u64 *) p, key.fields.bd_index); - } + l2fib_del_entry_by_key (key.raw); } v++; } diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h index 7e49d74b..e571a210 100644 --- a/src/vnet/l2/l2_fib.h +++ b/src/vnet/l2/l2_fib.h @@ -50,6 +50,20 @@ typedef struct STATIC_ASSERT_SIZEOF (l2fib_entry_key_t, 8); + +typedef struct +{ + union + { + struct + { + u8 swif; + u8 bd; + }; + u16 as_u16; + }; +} l2fib_seq_num_t; + /* * The l2fib entry results */ @@ -66,8 +80,7 @@ typedef struct u8 filter:1; /* drop packets to/from this mac */ u8 unused1:5; u8 timestamp; /* timestamp for aging */ - u8 int_sn; /* interface seq num */ - u8 bd_sn; /* bridge domain seq num */ + l2fib_seq_num_t sn; /* bd/int seq num */ } fields; u64 raw; }; @@ -314,7 +327,7 @@ l2fib_lookup_4 (BVT (clib_bihash) * mac_table, } } -void l2fib_clear_table (uint keep_static); +void l2fib_clear_table (void); void l2fib_add_entry (u64 mac, @@ -329,6 +342,8 @@ void l2fib_flush_int_mac (vlib_main_t * vm, u32 sw_if_index); void l2fib_flush_bd_mac (vlib_main_t * vm, u32 bd_index); +void l2fib_flush_all_mac (vlib_main_t * vm); + void l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key, l2fib_entry_result_t ** l2fe_res); diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c index fe65e694..41a93f56 100644 --- a/src/vnet/l2/l2_input.c +++ b/src/vnet/l2/l2_input.c @@ -202,8 +202,14 @@ classify_and_dispatch (vlib_main_t * vm, /* Get config for the bridge domain interface */ bd_config = vec_elt_at_index (msm->bd_configs, bd_index0); - /* Save bridge domain seq_num */ - vnet_buffer (b0)->l2.bd_sn = bd_config->seq_num; + /* Save bridge domain and interface seq_num */ + /* *INDENT-OFF* */ + l2fib_seq_num_t sn = { + .swif = config->seq_num, + .bd = bd_config->seq_num, + }; + /* *INDENT-ON* */ + vnet_buffer (b0)->l2.l2fib_sn = sn.as_u16;; /* * Process bridge domain feature enables. @@ -218,9 +224,6 @@ classify_and_dispatch (vlib_main_t * vm, /* mask out features from bitmap using packet type and bd config */ feature_bitmap = config->feature_bitmap & feat_mask; - /* Save interface seq_num */ - vnet_buffer (b0)->l2.int_sn = config->seq_num; - /* save for next feature graph nodes */ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap; diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c index faed0d66..adc5e70f 100644 --- a/src/vnet/l2/l2_learn.c +++ b/src/vnet/l2/l2_learn.c @@ -141,10 +141,8 @@ l2learn_process (vlib_node_runtime_t * node, if (PREDICT_FALSE (result0->fields.timestamp != timestamp)) result0->fields.timestamp = timestamp; if (PREDICT_FALSE - (result0->fields.int_sn != vnet_buffer (b0)->l2.int_sn)) - result0->fields.int_sn = vnet_buffer (b0)->l2.int_sn; - if (PREDICT_FALSE (result0->fields.bd_sn != vnet_buffer (b0)->l2.bd_sn)) - result0->fields.bd_sn = vnet_buffer (b0)->l2.bd_sn; + (result0->fields.sn.as_u16 != vnet_buffer (b0)->l2.l2fib_sn)) + result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; } else if (result0->raw == ~0) { @@ -171,8 +169,7 @@ l2learn_process (vlib_node_runtime_t * node, result0->raw = 0; /* clear all fields */ result0->fields.sw_if_index = sw_if_index0; result0->fields.timestamp = timestamp; - result0->fields.bd_sn = vnet_buffer (b0)->l2.bd_sn; - result0->fields.int_sn = vnet_buffer (b0)->l2.int_sn; + result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; kv.key = key0->raw; kv.value = result0->raw; @@ -210,8 +207,7 @@ l2learn_process (vlib_node_runtime_t * node, result0->raw = 0; /* clear all fields */ result0->fields.sw_if_index = sw_if_index0; result0->fields.timestamp = timestamp; - result0->fields.bd_sn = vnet_buffer (b0)->l2.bd_sn; - result0->fields.int_sn = vnet_buffer (b0)->l2.int_sn; + result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; kv.key = key0->raw; kv.value = result0->raw; diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 107e83f3..c073c52d 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -298,6 +298,17 @@ static void *vl_api_bridge_domain_dump_t_print FINISH; } +static void *vl_api_l2fib_flush_all_t_print + (vl_api_l2fib_flush_all_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: l2fib_flush_all "); + + FINISH; +} + + static void *vl_api_l2fib_flush_bd_t_print (vl_api_l2fib_flush_bd_t * mp, void *handle) { @@ -2979,6 +2990,7 @@ _(SR_POLICY_MOD, sr_policy_mod) \ _(SR_POLICY_DEL, sr_policy_del) \ _(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \ _(L2FIB_ADD_DEL, l2fib_add_del) \ +_(L2FIB_FLUSH_ALL, l2fib_flush_all) \ _(L2FIB_FLUSH_BD, l2fib_flush_bd) \ _(L2FIB_FLUSH_INT, l2fib_flush_int) \ _(L2_FLAGS, l2_flags) \ -- cgit 1.2.3-korg From 9793477a28c45e4eb5bba3f2050fe415e57e8ad8 Mon Sep 17 00:00:00 2001 From: John Lo Date: Thu, 18 May 2017 22:26:47 -0400 Subject: Enforce Bridge Domain ID range to match 24-bit VNI range Enforce bridge domain ID range to allow a maximum value of 16M which matches the range of 24-bit VNI used for virtual overlay network ID. Fix "show bridge-domain" output to allow full 16M BD ID range to be displayed using 8-digit spaces. Change-Id: I80d9c76ea7c001bcccd3c19df1f3e55d2970f01c Signed-off-by: John Lo --- src/vlibapi/api_helper_macros.h | 14 ++++++++++++++ src/vnet/api_errno.h | 5 +++-- src/vnet/l2/l2_bd.c | 17 +++++++++++------ src/vnet/l2/l2_bd.h | 4 +++- src/vnet/l2/l2_input.c | 6 ++++++ src/vnet/lisp-gpe/interface.c | 6 ++++++ src/vpp/api/api.c | 2 ++ 7 files changed, 45 insertions(+), 9 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vlibapi/api_helper_macros.h b/src/vlibapi/api_helper_macros.h index a492c3f4..2e29d4d7 100644 --- a/src/vlibapi/api_helper_macros.h +++ b/src/vlibapi/api_helper_macros.h @@ -155,6 +155,20 @@ bad_tx_sw_if_index: \ ; \ } while (0); +#define VALIDATE_BD_ID(mp) \ + do { u32 __rx_bd_id = ntohl(mp->bd_id); \ + if (__rx_bd_id > L2_BD_ID_MAX) { \ + rv = VNET_API_ERROR_BD_ID_EXCEED_MAX; \ + goto bad_bd_id; \ + } \ +} while(0); + +#define BAD_BD_ID_LABEL \ +do { \ +bad_bd_id: \ + ; \ +} while (0); + #define pub_sub_handler(lca,UCA) \ static void vl_api_want_##lca##_t_handler ( \ vl_api_want_##lca##_t *mp) \ diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index e694fbf1..b22bb3a8 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -110,8 +110,9 @@ _(SVM_SEGMENT_CREATE_FAIL, -117, "svm segment create fail") \ _(APPLICATION_NOT_ATTACHED, -118, "application not attached") \ _(BD_ALREADY_EXISTS, -119, "Bridge domain already exists") \ _(BD_IN_USE, -120, "Bridge domain has member interfaces") \ -_(BD_NOT_MODIFIABLE, -121, "Default bridge domain 0 can be neither deleted nor modified") \ -_(UNSUPPORTED, -122, "Unsupported") +_(BD_NOT_MODIFIABLE, -121, "Bridge domain 0 can't be deleted/modified") \ +_(BD_ID_EXCEED_MAX, -122, "Bridge domain ID exceed 16M limit") \ +_(UNSUPPORTED, -123, "Unsupported") typedef enum { diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index 351e6987..f68b6638 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -984,10 +984,10 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { printed = 1; vlib_cli_output (vm, - "%=5s %=7s %=4s %=9s %=9s %=9s %=9s %=9s %=9s %=9s", - "ID", "Index", "BSN", "Age(min)", "Learning", - "U-Forwrd", "UU-Flood", "Flooding", "ARP-Term", - "BVI-Intf"); + "%=8s %=7s %=4s %=9s %=9s %=9s %=9s %=9s %=9s %=9s", + "BD-ID", "Index", "BSN", "Age(min)", + "Learning", "U-Forwrd", "UU-Flood", "Flooding", + "ARP-Term", "BVI-Intf"); } if (bd_config->mac_age) @@ -995,7 +995,7 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) else as = format (as, "off"); vlib_cli_output (vm, - "%=5d %=7d %=4d %=9v %=9s %=9s %=9s %=9s %=9s %=9U", + "%=8d %=7d %=4d %=9v %=9s %=9s %=9s %=9s %=9s %=9U", bd_config->bd_id, bd_index, bd_config->seq_num, as, bd_config->feature_bitmap & L2INPUT_FEAT_LEARN ? "on" : "off", @@ -1123,6 +1123,8 @@ bd_add_del (l2_bridge_domain_add_del_args_t * a) { if (bd_index != ~0) return VNET_API_ERROR_BD_ALREADY_EXISTS; + if (a->bd_id > L2_BD_ID_MAX) + return VNET_API_ERROR_BD_ID_EXCEED_MAX; bd_index = bd_add_bd_index (bdm, a->bd_id); u32 enable_flags = 0, disable_flags = 0; @@ -1262,11 +1264,14 @@ bd_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input, error = clib_error_return (0, "bridge domain in use - remove members"); goto done; case VNET_API_ERROR_NO_SUCH_ENTRY: - error = clib_error_return (0, "bridge domain id does not exist"); + error = clib_error_return (0, "bridge domain ID does not exist"); goto done; case VNET_API_ERROR_BD_NOT_MODIFIABLE: error = clib_error_return (0, "bridge domain 0 can not be modified"); goto done; + case VNET_API_ERROR_BD_ID_EXCEED_MAX: + error = clib_error_return (0, "bridge domain ID exceed 16M limit"); + goto done; default: error = clib_error_return (0, "bd_add_del returned %d", rv); goto done; diff --git a/src/vnet/l2/l2_bd.h b/src/vnet/l2/l2_bd.h index e502d497..93ed1a85 100644 --- a/src/vnet/l2/l2_bd.h +++ b/src/vnet/l2/l2_bd.h @@ -49,7 +49,6 @@ typedef struct u16 spare; } l2_flood_member_t; - /* Per-bridge domain configuration */ typedef struct @@ -91,6 +90,9 @@ typedef struct } l2_bridge_domain_t; +/* Limit Bridge Domain ID to 24 bits to match 24-bit VNI range */ +#define L2_BD_ID_MAX ((1<<24)-1) + typedef struct { u32 bd_id; diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c index 41a93f56..aca23fe0 100644 --- a/src/vnet/l2/l2_input.c +++ b/src/vnet/l2/l2_input.c @@ -791,6 +791,12 @@ int_l2_bridge (vlib_main_t * vm, goto done; } + if (bd_id > L2_BD_ID_MAX) + { + error = clib_error_return (0, "bridge domain ID exceed 16M limit", + format_unformat_error, input); + goto done; + } bd_index = bd_find_or_add_bd_index (&bd_main, bd_id); /* optional bvi */ diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c index ff750563..94703abc 100644 --- a/src/vnet/lisp-gpe/interface.c +++ b/src/vnet/lisp-gpe/interface.c @@ -653,6 +653,12 @@ lisp_gpe_add_l2_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id) uword *hip, *si; u16 bd_index; + if (bd_id > L2_BD_ID_MAX) + { + clib_warning ("bridge domain ID %d exceed 16M limit", bd_id); + return ~0; + } + bd_index = bd_find_or_add_bd_index (&bd_main, bd_id); hip = hash_get (l2_ifaces->hw_if_index_by_dp_table, bd_index); diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 16d51225..7e4c341e 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -419,6 +419,7 @@ static void if (mp->enable) { + VALIDATE_BD_ID (mp); u32 bd_id = ntohl (mp->bd_id); u32 bd_index = bd_find_or_add_bd_index (bdm, bd_id); u32 bvi = mp->bvi; @@ -432,6 +433,7 @@ static void } BAD_RX_SW_IF_INDEX_LABEL; + BAD_BD_ID_LABEL; REPLY_MACRO (VL_API_SW_INTERFACE_SET_L2_BRIDGE_REPLY); } -- cgit 1.2.3-korg From 01384fe3d4c8f9d5c082cd602087a8eb71facd15 Mon Sep 17 00:00:00 2001 From: Ole Troan Date: Fri, 12 May 2017 11:55:35 +0200 Subject: API: Cleaning up message naming that does not follow the conventions is_address_reachable - Disabled so deleted cli_request - Renamed to cli vnet_summary_stats_reply - Renamed to vnet_get_summary_stats_reply bridge_domain_sw_if_details - Deleted, incorporated in main message l2_fib_table_entry - Renamed to l2_fib_table_details Change-Id: I93b7e8769a3ba7b4989b3c270270f575f386464f Signed-off-by: Ole Troan Signed-off-by: Marek Gradzki Signed-off-by: Ole Troan --- src/vat/api_format.c | 114 +++++++++++---------- src/vnet/l2/l2.api | 32 +++--- src/vnet/l2/l2_api.c | 61 +++++------ .../io/fd/vpp/jvpp/core/test/FutureApiTest.java | 4 +- src/vpp-api/java/jvpp/gen/jvppgen/util.py | 9 +- src/vpp/api/api.c | 85 +-------------- src/vpp/api/custom_dump.c | 8 +- src/vpp/api/summary_stats_client.c | 6 +- src/vpp/api/vpe.api | 23 +---- 9 files changed, 116 insertions(+), 226 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index efb71ef6..22a91666 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -1323,6 +1323,9 @@ vl_api_ip6_nd_event_t_handler_json (vl_api_ip6_nd_event_t * mp) /* JSON output not supported */ } +#define vl_api_bridge_domain_details_t_endian vl_noop_handler +#define vl_api_bridge_domain_details_t_print vl_noop_handler + /* * Special-case: build the bridge domain table, maintain * the next bd id vbl. @@ -1332,6 +1335,7 @@ static void vl_api_bridge_domain_details_t_handler { vat_main_t *vam = &vat_main; u32 n_sw_ifs = ntohl (mp->n_sw_ifs); + int i; print (vam->ofp, "\n%-3s %-3s %-3s %-3s %-3s %-3s", " ID", "LRN", "FWD", "FLD", "BVI", "#IF"); @@ -1341,7 +1345,37 @@ static void vl_api_bridge_domain_details_t_handler mp->flood, ntohl (mp->bvi_sw_if_index), n_sw_ifs); if (n_sw_ifs) - print (vam->ofp, "\n\n%s %s %s", "sw_if_index", "SHG", "Interface Name"); + { + vl_api_bridge_domain_sw_if_t *sw_ifs; + print (vam->ofp, "\n\n%s %s %s", "sw_if_index", "SHG", + "Interface Name"); + + sw_ifs = mp->sw_if_details; + for (i = 0; i < n_sw_ifs; i++) + { + u8 *sw_if_name = 0; + u32 sw_if_index; + hash_pair_t *p; + + sw_if_index = ntohl (sw_ifs->sw_if_index); + + /* *INDENT-OFF* */ + hash_foreach_pair (p, vam->sw_if_index_by_interface_name, + ({ + if ((u32) p->value[0] == sw_if_index) + { + sw_if_name = (u8 *)(p->key); + break; + } + })); + /* *INDENT-ON* */ + print (vam->ofp, "%7d %3d %s", sw_if_index, + sw_ifs->shg, sw_if_name ? (char *) sw_if_name : + "sw_if_index not found!"); + + sw_ifs++; + } + } } static void vl_api_bridge_domain_details_t_handler_json @@ -1349,6 +1383,7 @@ static void vl_api_bridge_domain_details_t_handler_json { vat_main_t *vam = &vat_main; vat_json_node_t *node, *array = NULL; + u32 n_sw_ifs = ntohl (mp->n_sw_ifs); if (VAT_JSON_ARRAY != vam->json_tree.type) { @@ -1364,58 +1399,28 @@ static void vl_api_bridge_domain_details_t_handler_json vat_json_object_add_uint (node, "learn", mp->learn); vat_json_object_add_uint (node, "bvi_sw_if_index", ntohl (mp->bvi_sw_if_index)); - vat_json_object_add_uint (node, "n_sw_ifs", ntohl (mp->n_sw_ifs)); + vat_json_object_add_uint (node, "n_sw_ifs", n_sw_ifs); array = vat_json_object_add (node, "sw_if"); vat_json_init_array (array); -} - -/* - * Special-case: build the bridge domain sw if table. - */ -static void vl_api_bridge_domain_sw_if_details_t_handler - (vl_api_bridge_domain_sw_if_details_t * mp) -{ - vat_main_t *vam = &vat_main; - hash_pair_t *p; - u8 *sw_if_name = 0; - u32 sw_if_index; - - sw_if_index = ntohl (mp->sw_if_index); - /* *INDENT-OFF* */ - hash_foreach_pair (p, vam->sw_if_index_by_interface_name, - ({ - if ((u32) p->value[0] == sw_if_index) - { - sw_if_name = (u8 *)(p->key); - break; - } - })); - /* *INDENT-ON* */ - print (vam->ofp, "%7d %3d %s", sw_if_index, - mp->shg, sw_if_name ? (char *) sw_if_name : - "sw_if_index not found!"); -} -static void vl_api_bridge_domain_sw_if_details_t_handler_json - (vl_api_bridge_domain_sw_if_details_t * mp) -{ - vat_main_t *vam = &vat_main; - vat_json_node_t *node = NULL; - uword last_index = 0; - ASSERT (VAT_JSON_ARRAY == vam->json_tree.type); - ASSERT (vec_len (vam->json_tree.array) >= 1); - last_index = vec_len (vam->json_tree.array) - 1; - node = &vam->json_tree.array[last_index]; - node = vat_json_object_get_element (node, "sw_if"); - ASSERT (NULL != node); - node = vat_json_array_add (node); + if (n_sw_ifs) + { + vl_api_bridge_domain_sw_if_t *sw_ifs; + int i; - vat_json_init_object (node); - vat_json_object_add_uint (node, "bd_id", ntohl (mp->bd_id)); - vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index)); - vat_json_object_add_uint (node, "shg", mp->shg); + sw_ifs = mp->sw_if_details; + for (i = 0; i < n_sw_ifs; i++) + { + node = vat_json_array_add (array); + vat_json_init_object (node); + vat_json_object_add_uint (node, "sw_if_index", + ntohl (sw_ifs->sw_if_index)); + vat_json_object_add_uint (node, "shg", sw_ifs->shg); + sw_ifs++; + } + } } static void vl_api_control_ping_reply_t_handler @@ -4334,7 +4339,6 @@ _(SW_INTERFACE_SET_L2_BRIDGE_REPLY, \ sw_interface_set_l2_bridge_reply) \ _(BRIDGE_DOMAIN_ADD_DEL_REPLY, bridge_domain_add_del_reply) \ _(BRIDGE_DOMAIN_DETAILS, bridge_domain_details) \ -_(BRIDGE_DOMAIN_SW_IF_DETAILS, bridge_domain_sw_if_details) \ _(BRIDGE_DOMAIN_SET_MAC_AGE_REPLY, bridge_domain_set_mac_age_reply) \ _(L2FIB_ADD_DEL_REPLY, l2fib_add_del_reply) \ _(L2FIB_FLUSH_INT_REPLY, l2fib_flush_int_reply) \ @@ -4409,7 +4413,7 @@ _(CREATE_VHOST_USER_IF_REPLY, create_vhost_user_if_reply) \ _(MODIFY_VHOST_USER_IF_REPLY, modify_vhost_user_if_reply) \ _(DELETE_VHOST_USER_IF_REPLY, delete_vhost_user_if_reply) \ _(SHOW_VERSION_REPLY, show_version_reply) \ -_(L2_FIB_TABLE_ENTRY, l2_fib_table_entry) \ +_(L2_FIB_TABLE_DETAILS, l2_fib_table_details) \ _(VXLAN_GPE_ADD_DEL_TUNNEL_REPLY, vxlan_gpe_add_del_tunnel_reply) \ _(VXLAN_GPE_TUNNEL_DETAILS, vxlan_gpe_tunnel_details) \ _(INTERFACE_NAME_RENUMBER_REPLY, interface_name_renumber_reply) \ @@ -4935,7 +4939,7 @@ int exec (vat_main_t * vam) { api_main_t *am = &api_main; - vl_api_cli_request_t *mp; + vl_api_cli_t *mp; f64 timeout; void *oldheap; u8 *cmd = 0; @@ -4956,7 +4960,7 @@ exec (vat_main_t * vam) } - M (CLI_REQUEST, mp); + M (CLI, mp); /* * Copy cmd into shared memory. @@ -11896,8 +11900,8 @@ format_l2_fib_mac_address (u8 * s, va_list * args) a[2], a[3], a[4], a[5], a[6], a[7]); } -static void vl_api_l2_fib_table_entry_t_handler - (vl_api_l2_fib_table_entry_t * mp) +static void vl_api_l2_fib_table_details_t_handler + (vl_api_l2_fib_table_details_t * mp) { vat_main_t *vam = &vat_main; @@ -11908,8 +11912,8 @@ static void vl_api_l2_fib_table_entry_t_handler mp->bvi_mac); } -static void vl_api_l2_fib_table_entry_t_handler_json - (vl_api_l2_fib_table_entry_t * mp) +static void vl_api_l2_fib_table_details_t_handler_json + (vl_api_l2_fib_table_details_t * mp) { vat_main_t *vam = &vat_main; vat_json_node_t *node = NULL; diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index e9a1f361..bb3990c6 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -36,7 +36,7 @@ define l2_xconnect_dump u32 context; }; -/** \brief l2 fib table entry structure +/** \brief l2 fib table details structure @param bd_id - the l2 fib / bridge domain table id @param mac - the entry's mac address @param sw_if_index - index of the interface @@ -44,7 +44,7 @@ define l2_xconnect_dump @param filter_mac - the entry is a mac filter entry. @param bvi_mac - the mac address is a bridge virtual interface */ -define l2_fib_table_entry +define l2_fib_table_details { u32 context; u32 bd_id; @@ -212,6 +212,18 @@ define bridge_domain_dump u32 bd_id; }; +/** \brief L2 bridge domain sw interface operational state response + @param bd_id - the bridge domain id + @param sw_if_index - sw_if_index in the domain + @param shg - split horizon group for the interface +*/ +typeonly manual_print manual_endian define bridge_domain_sw_if +{ + u32 context; + u32 sw_if_index; + u8 shg; +}; + /** \brief L2 bridge domain operational state response @param bd_id - the bridge domain id @param flood - bcast/mcast flooding state on all interfaces in the bd @@ -222,7 +234,7 @@ define bridge_domain_dump @param mac_age - mac aging time in min, 0 for disabled @param n_sw_ifs - number of sw_if_index's in the domain */ -define bridge_domain_details +manual_print manual_endian define bridge_domain_details { u32 context; u32 bd_id; @@ -234,19 +246,7 @@ define bridge_domain_details u8 mac_age; u32 bvi_sw_if_index; u32 n_sw_ifs; -}; - -/** \brief L2 bridge domain sw interface operational state response - @param bd_id - the bridge domain id - @param sw_if_index - sw_if_index in the domain - @param shg - split horizon group for the interface -*/ -define bridge_domain_sw_if_details -{ - u32 context; - u32 bd_id; - u32 sw_if_index; - u8 shg; + vl_api_bridge_domain_sw_if_t sw_if_details[n_sw_ifs]; }; /** \brief Set bridge flags (such as L2_LEARN, L2_FWD, L2_FLOOD, diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index 7c4b0423..aa3dcb7e 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -36,6 +36,9 @@ #include #undef vl_endianfun +#define vl_api_bridge_domain_details_t_endian vl_noop_handler +#define vl_api_bridge_domain_details_t_print vl_noop_handler + /* instantiate all the print functions we know about */ #define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) #define vl_printfun @@ -119,11 +122,11 @@ send_l2fib_table_entry (vpe_api_main_t * am, l2fib_entry_key_t * l2fe_key, l2fib_entry_result_t * l2fe_res, u32 context) { - vl_api_l2_fib_table_entry_t *mp; + vl_api_l2_fib_table_details_t *mp; mp = vl_msg_api_alloc (sizeof (*mp)); memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_L2_FIB_TABLE_ENTRY); + mp->_vl_msg_id = ntohs (VL_API_L2_FIB_TABLE_DETAILS); mp->bd_id = ntohl (l2input_main.bd_configs[l2fe_key->fields.bd_index].bd_id); @@ -358,13 +361,18 @@ vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp) } static void -send_bridge_domain_details (unix_shared_memory_queue_t * q, +send_bridge_domain_details (l2input_main_t * l2im, + unix_shared_memory_queue_t * q, l2_bridge_domain_t * bd_config, u32 n_sw_ifs, u32 context) { vl_api_bridge_domain_details_t *mp; + l2_flood_member_t *m; + vl_api_bridge_domain_sw_if_t *sw_ifs; + l2_input_config_t *input_cfg; - mp = vl_msg_api_alloc (sizeof (*mp)); + mp = vl_msg_api_alloc (sizeof (*mp) + + (n_sw_ifs * sizeof (vl_api_bridge_domain_sw_if_t))); memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_BRIDGE_DOMAIN_DETAILS); mp->bd_id = ntohl (bd_config->bd_id); @@ -375,28 +383,18 @@ send_bridge_domain_details (unix_shared_memory_queue_t * q, mp->arp_term = bd_feature_arp_term (bd_config); mp->bvi_sw_if_index = ntohl (bd_config->bvi_sw_if_index); mp->mac_age = bd_config->mac_age; - mp->n_sw_ifs = ntohl (n_sw_ifs); mp->context = context; - vl_msg_api_send_shmem (q, (u8 *) & mp); -} - -static void -send_bd_sw_if_details (l2input_main_t * l2im, - unix_shared_memory_queue_t * q, - l2_flood_member_t * member, u32 bd_id, u32 context) -{ - vl_api_bridge_domain_sw_if_details_t *mp; - l2_input_config_t *input_cfg; - - mp = vl_msg_api_alloc (sizeof (*mp)); - memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_BRIDGE_DOMAIN_SW_IF_DETAILS); - mp->bd_id = ntohl (bd_id); - mp->sw_if_index = ntohl (member->sw_if_index); - input_cfg = vec_elt_at_index (l2im->configs, member->sw_if_index); - mp->shg = input_cfg->shg; - mp->context = context; + sw_ifs = (vl_api_bridge_domain_sw_if_t *) mp->sw_if_details; + vec_foreach (m, bd_config->members) + { + sw_ifs->sw_if_index = ntohl (m->sw_if_index); + input_cfg = vec_elt_at_index (l2im->configs, m->sw_if_index); + sw_ifs->shg = input_cfg->shg; + sw_ifs++; + mp->n_sw_ifs++; + } + mp->n_sw_ifs = htonl (mp->n_sw_ifs); vl_msg_api_send_shmem (q, (u8 *) & mp); } @@ -434,18 +432,9 @@ vl_api_bridge_domain_dump_t_handler (vl_api_bridge_domain_dump_t * mp) l2input_bd_config_from_index (l2im, bd_index); /* skip dummy bd_id 0 */ if (bd_config && (bd_config->bd_id > 0)) - { - u32 n_sw_ifs; - l2_flood_member_t *m; - - n_sw_ifs = vec_len (bd_config->members); - send_bridge_domain_details (q, bd_config, n_sw_ifs, mp->context); - - vec_foreach (m, bd_config->members) - { - send_bd_sw_if_details (l2im, q, m, bd_config->bd_id, mp->context); - } - } + send_bridge_domain_details (l2im, q, bd_config, + vec_len (bd_config->members), + mp->context); } } diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/FutureApiTest.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/FutureApiTest.java index 0d7c7471..63659f82 100644 --- a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/FutureApiTest.java +++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/test/FutureApiTest.java @@ -67,8 +67,8 @@ public class FutureApiTest { } else { LOG.info( String.format( - "Received empty bridge-domain dump reply with list of bridge-domains: %s, %s", - reply.bridgeDomainDetails, reply.bridgeDomainSwIfDetails)); + "Received bridge-domain dump reply with list of bridge-domains: %s", + reply.bridgeDomainDetails)); } } diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/util.py b/src/vpp-api/java/jvpp/gen/jvppgen/util.py index fc971c17..947fc31d 100644 --- a/src/vpp-api/java/jvpp/gen/jvppgen/util.py +++ b/src/vpp-api/java/jvpp/gen/jvppgen/util.py @@ -156,13 +156,6 @@ jni_field_accessors = {'u8': 'ByteField', # vpe.api calls that do not follow naming conventions and have to be handled exceptionally when finding reply -> request mapping # FIXME in vpe.api unconventional_naming_rep_req = { - 'cli_reply': 'cli_request', - 'vnet_summary_stats_reply': 'vnet_get_summary_stats', - # This below is actually a sub-details callback. We cannot derive the mapping of dump request - # belonging to this sub-details from naming conventions. We need special mapping - 'bridge_domain_sw_if_details': 'bridge_domain', - # This is standard dump call + details reply. However it's not called details but entry - 'l2_fib_table_entry': 'l2_fib_table' } # @@ -172,7 +165,7 @@ notification_messages_reused = ["sw_interface_set_flags"] # messages that must be ignored. These messages are INSUFFICIENTLY marked as disabled in vpe.api # FIXME -ignored_messages = ["is_address_reachable"] +ignored_messages = [] def is_notification(name): diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 7e4c341e..60eb5331 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -104,7 +104,6 @@ #define foreach_vpe_api_msg \ _(WANT_OAM_EVENTS, want_oam_events) \ _(OAM_ADD_DEL, oam_add_del) \ -_(IS_ADDRESS_REACHABLE, is_address_reachable) \ _(SW_INTERFACE_SET_MPLS_ENABLE, sw_interface_set_mpls_enable) \ _(SW_INTERFACE_SET_VPATH, sw_interface_set_vpath) \ _(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \ @@ -118,7 +117,7 @@ _(RESET_FIB, reset_fib) \ _(CREATE_LOOPBACK, create_loopback) \ _(CREATE_LOOPBACK_INSTANCE, create_loopback_instance) \ _(CONTROL_PING, control_ping) \ -_(CLI_REQUEST, cli_request) \ +_(CLI, cli) \ _(CLI_INBAND, cli_inband) \ _(SET_ARP_NEIGHBOR_LIMIT, set_arp_neighbor_limit) \ _(L2_PATCH_ADD_DEL, l2_patch_add_del) \ @@ -693,82 +692,6 @@ out: REPLY_MACRO (VL_API_PROXY_ARP_INTFC_ENABLE_DISABLE_REPLY); } -static void -vl_api_is_address_reachable_t_handler (vl_api_is_address_reachable_t * mp) -{ -#if 0 - vpe_main_t *rm = &vpe_main; - ip4_main_t *im4 = &ip4_main; - ip6_main_t *im6 = &ip6_main; - ip_lookup_main_t *lm; - union - { - ip4_address_t ip4; - ip6_address_t ip6; - } addr; - u32 adj_index, sw_if_index; - vl_api_is_address_reachable_t *rmp; - ip_adjacency_t *adj; - unix_shared_memory_queue_t *q; - - q = vl_api_client_index_to_input_queue (mp->client_index); - if (!q) - { - increment_missing_api_client_counter (rm->vlib_main); - return; - } - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - clib_memcpy (rmp, mp, sizeof (*rmp)); - - sw_if_index = mp->next_hop_sw_if_index; - clib_memcpy (&addr, mp->address, sizeof (addr)); - if (mp->is_ipv6) - { - lm = &im6->lookup_main; - adj_index = ip6_fib_lookup (im6, sw_if_index, &addr.ip6); - } - else - { - lm = &im4->lookup_main; - // FIXME NOT an ADJ - adj_index = ip4_fib_lookup (im4, sw_if_index, &addr.ip4); - } - if (adj_index == ~0) - { - rmp->is_error = 1; - goto send; - } - adj = ip_get_adjacency (lm, adj_index); - - if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE - && adj->rewrite_header.sw_if_index == sw_if_index) - { - rmp->is_known = 1; - } - else - { - if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP - && adj->rewrite_header.sw_if_index == sw_if_index) - { - if (mp->is_ipv6) - ip6_probe_neighbor (rm->vlib_main, &addr.ip6, sw_if_index); - else - ip4_probe_neighbor (rm->vlib_main, &addr.ip4, sw_if_index); - } - else if (adj->lookup_next_index == IP_LOOKUP_NEXT_DROP) - { - rmp->is_known = 1; - goto send; - } - rmp->is_known = 0; - } - -send: - vl_msg_api_send_shmem (q, (u8 *) & rmp); -#endif -} - static void vl_api_sw_interface_set_mpls_enable_t_handler (vl_api_sw_interface_set_mpls_enable_t * mp) @@ -828,7 +751,7 @@ vl_api_vnet_get_summary_stats_t_handler (vl_api_vnet_get_summary_stats_t * mp) { stats_main_t *sm = &stats_main; vnet_interface_main_t *im = sm->interface_main; - vl_api_vnet_summary_stats_reply_t *rmp; + vl_api_vnet_get_summary_stats_reply_t *rmp; vlib_combined_counter_main_t *cm; vlib_counter_t v; int i, which; @@ -842,7 +765,7 @@ vl_api_vnet_get_summary_stats_t_handler (vl_api_vnet_get_summary_stats_t * mp) return; rmp = vl_msg_api_alloc (sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_VNET_SUMMARY_STATS_REPLY); + rmp->_vl_msg_id = ntohs (VL_API_VNET_GET_SUMMARY_STATS_REPLY); rmp->context = mp->context; rmp->retval = 0; @@ -1115,7 +1038,7 @@ shmem_cli_output (uword arg, u8 * buffer, uword buffer_bytes) static void -vl_api_cli_request_t_handler (vl_api_cli_request_t * mp) +vl_api_cli_t_handler (vl_api_cli_t * mp) { vl_api_cli_reply_t *rp; unix_shared_memory_queue_t *q; diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index c073c52d..9071883b 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -1656,12 +1656,12 @@ static void *vl_api_want_interface_events_t_print FINISH; } -static void *vl_api_cli_request_t_print - (vl_api_cli_request_t * mp, void *handle) +static void * +vl_api_cli_t_print (vl_api_cli_t * mp, void *handle) { u8 *s; - s = format (0, "SCRIPT: cli_request "); + s = format (0, "SCRIPT: cli "); FINISH; } @@ -3023,7 +3023,7 @@ _(DELETE_VHOST_USER_IF, delete_vhost_user_if) \ _(SW_INTERFACE_DUMP, sw_interface_dump) \ _(CONTROL_PING, control_ping) \ _(WANT_INTERFACE_EVENTS, want_interface_events) \ -_(CLI_REQUEST, cli_request) \ +_(CLI, cli) \ _(CLI_INBAND, cli_inband) \ _(MEMCLNT_CREATE, memclnt_create) \ _(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) \ diff --git a/src/vpp/api/summary_stats_client.c b/src/vpp/api/summary_stats_client.c index 03999567..2c81d667 100644 --- a/src/vpp/api/summary_stats_client.c +++ b/src/vpp/api/summary_stats_client.c @@ -101,8 +101,8 @@ vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) static void -vl_api_vnet_summary_stats_reply_t_handler (vl_api_vnet_summary_stats_reply_t * - mp) + vl_api_vnet_get_summary_stats_reply_t_handler + (vl_api_vnet_get_summary_stats_reply_t * mp) { test_main_t *tm = &test_main; static u8 *sb; @@ -134,7 +134,7 @@ vl_api_vnet_summary_stats_reply_t_handler (vl_api_vnet_summary_stats_reply_t * } #define foreach_api_msg \ -_(VNET_SUMMARY_STATS_REPLY, vnet_summary_stats_reply) +_(VNET_GET_SUMMARY_STATS_REPLY, vnet_get_summary_stats_reply) int connect_to_vpe (char *name) diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 99ae4784..d3c7e985 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -135,25 +135,6 @@ autoreply define reset_vrf u32 vrf_id; }; -/** \brief Is Address Reachable request - DISABLED - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param next_hop_sw_if_index - index of interface used to get to next hop - @param is_ipv6 - 1 for IPv6, 0 for IPv4 - @param is_error - address not found or does not match intf - @param address[] - Address in question -*/ -define is_address_reachable -{ - u32 client_index; /* (api_main_t *) am->my_client_index */ - u32 context; - u32 next_hop_sw_if_index; - u8 is_known; /* on reply, this is the answer */ - u8 is_ipv6; - u8 is_error; /* address not found or does not match intf */ - u8 address[16]; -}; - /** \brief Want Stats, register for stats updates @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -256,7 +237,7 @@ define vnet_get_summary_stats @param total_bytes - @param vector_rate - */ -define vnet_summary_stats_reply +define vnet_get_summary_stats_reply { u32 context; i32 retval; @@ -414,7 +395,7 @@ define control_ping_reply @param context - sender context, to match reply w/ request @param cmd_in_shmem - pointer to cli command string */ -define cli_request +define cli { u32 client_index; u32 context; -- cgit 1.2.3-korg From 630198f04916deb35c5b7774823ae1a5dd168a6c Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Mon, 22 May 2017 09:20:20 -0400 Subject: IPv6 Performance bugs - inline the FIB lookup function; this requires access to the bihash, so for files that use more than one type this casues problems. those files that include ip6_fib.h unnecessarily have been updated - better use of the feature arcs. ip6-lookup and interface-output are now sentinels (end-node-index in the cm speak) rather than enabled features. Change-Id: I9d1375fee63f7dbb2d327da6124d8e60b63367ec Signed-off-by: Neale Ranns --- src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c | 1 - src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c | 2 +- src/vnet/dhcp/dhcp6_proxy_node.c | 9 +++--- src/vnet/fib/fib.h | 2 -- src/vnet/fib/ip6_fib.c | 39 ----------------------- src/vnet/fib/ip6_fib.h | 42 +++++++++++++++++++++++-- src/vnet/ip/ip6_forward.c | 32 +++++++++---------- src/vnet/vxlan-gpe/vxlan_gpe.c | 8 ++--- src/vpp/api/api.c | 2 -- 9 files changed, 63 insertions(+), 74 deletions(-) (limited to 'src/vpp/api') diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c index f334c983..d90cd5e4 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c index cfc550cd..8558c505 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index de73154d..885313a5 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -19,9 +19,9 @@ #include #include #include -#include #include #include +#include static char * dhcpv6_proxy_error_strings[] = { #define dhcpv6_proxy_error(n,s) s, @@ -966,7 +966,7 @@ static u8 * format_dhcp6_proxy_server (u8 * s, va_list * args) { dhcp_proxy_t * proxy = va_arg (*args, dhcp_proxy_t *); - ip6_fib_t *server_fib; + fib_table_t *server_fib; dhcp_server_t *server; ip6_mfib_t *rx_fib; @@ -985,9 +985,10 @@ format_dhcp6_proxy_server (u8 * s, va_list * args) vec_foreach(server, proxy->dhcp_servers) { - server_fib = ip6_fib_get(server->server_fib_index); + server_fib = fib_table_get(server->server_fib_index, + FIB_PROTOCOL_IP6); s = format (s, "%u,%U ", - server_fib->table_id, + server_fib->ft_table_id, format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY); } diff --git a/src/vnet/fib/fib.h b/src/vnet/fib/fib.h index 7cf1d136..ec97c565 100644 --- a/src/vnet/fib/fib.h +++ b/src/vnet/fib/fib.h @@ -646,7 +646,5 @@ #include #include -#include -#include #endif diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index 4a24c212..527f9114 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -341,45 +341,6 @@ ip6_fib_table_entry_insert (u32 fib_index, compute_prefix_lengths_in_search_order (table); } -u32 -ip6_fib_table_fwding_lookup (ip6_main_t * im, - u32 fib_index, - const ip6_address_t * dst) -{ - const ip6_fib_table_instance_t *table; - int i, len; - int rv; - BVT(clib_bihash_kv) kv, value; - u64 fib; - - table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING]; - len = vec_len (table->prefix_lengths_in_search_order); - - kv.key[0] = dst->as_u64[0]; - kv.key[1] = dst->as_u64[1]; - fib = ((u64)((fib_index))<<32); - - for (i = 0; i < len; i++) - { - int dst_address_length = table->prefix_lengths_in_search_order[i]; - ip6_address_t * mask = &ip6_main.fib_masks[dst_address_length]; - - ASSERT(dst_address_length >= 0 && dst_address_length <= 128); - //As lengths are decreasing, masks are increasingly specific. - kv.key[0] &= mask->as_u64[0]; - kv.key[1] &= mask->as_u64[1]; - kv.key[2] = fib | dst_address_length; - - rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value); - if (rv == 0) - return value.value; - } - - /* default route is always present */ - ASSERT(0); - return 0; -} - u32 ip6_fib_table_fwding_lookup_with_if_index (ip6_main_t * im, u32 sw_if_index, const ip6_address_t * dst) diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h index 2bf8ef78..9789da4f 100644 --- a/src/vnet/fib/ip6_fib.h +++ b/src/vnet/fib/ip6_fib.h @@ -53,9 +53,6 @@ extern void ip6_fib_table_fwding_dpo_remove(u32 fib_index, u32 ip6_fib_table_fwding_lookup_with_if_index(ip6_main_t * im, u32 sw_if_index, const ip6_address_t * dst); -u32 ip6_fib_table_fwding_lookup(ip6_main_t * im, - u32 fib_index, - const ip6_address_t * dst); /** * @brief Walk all entries in a FIB table @@ -66,6 +63,45 @@ extern void ip6_fib_table_walk(u32 fib_index, fib_table_walk_fn_t fn, void *ctx); +always_inline u32 +ip6_fib_table_fwding_lookup (ip6_main_t * im, + u32 fib_index, + const ip6_address_t * dst) +{ + const ip6_fib_table_instance_t *table; + int i, len; + int rv; + BVT(clib_bihash_kv) kv, value; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING]; + len = vec_len (table->prefix_lengths_in_search_order); + + kv.key[0] = dst->as_u64[0]; + kv.key[1] = dst->as_u64[1]; + fib = ((u64)((fib_index))<<32); + + for (i = 0; i < len; i++) + { + int dst_address_length = table->prefix_lengths_in_search_order[i]; + ip6_address_t * mask = &ip6_main.fib_masks[dst_address_length]; + + ASSERT(dst_address_length >= 0 && dst_address_length <= 128); + //As lengths are decreasing, masks are increasingly specific. + kv.key[0] &= mask->as_u64[0]; + kv.key[1] &= mask->as_u64[1]; + kv.key[2] = fib | dst_address_length; + + rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value); + if (rv == 0) + return value.value; + } + + /* default route is always present */ + ASSERT(0); + return 0; +} + /** * @brief return the DPO that the LB stacks on. */ diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 25714e48..28c84d1c 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -444,12 +444,11 @@ ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) return; } - vnet_feature_enable_disable ("ip6-unicast", "ip6-lookup", sw_if_index, - is_enable, 0, 0); - - vnet_feature_enable_disable ("ip6-multicast", "ip6-mfib-forward-lookup", - sw_if_index, is_enable, 0, 0); + vnet_feature_enable_disable ("ip6-unicast", "ip6-drop", sw_if_index, + !is_enable, 0, 0); + vnet_feature_enable_disable ("ip6-multicast", "ip6-drop", sw_if_index, + !is_enable, 0, 0); } /* get first interface address */ @@ -624,17 +623,17 @@ VNET_FEATURE_INIT (ip6_vxlan_bypass, static) = .runs_before = VNET_FEATURES ("ip6-lookup"), }; -VNET_FEATURE_INIT (ip6_lookup, static) = +VNET_FEATURE_INIT (ip6_drop, static) = { .arc_name = "ip6-unicast", - .node_name = "ip6-lookup", - .runs_before = VNET_FEATURES ("ip6-drop"), + .node_name = "ip6-drop", + .runs_before = VNET_FEATURES ("ip6-lookup"), }; -VNET_FEATURE_INIT (ip6_drop, static) = +VNET_FEATURE_INIT (ip6_lookup, static) = { .arc_name = "ip6-unicast", - .node_name = "ip6-drop", + .node_name = "ip6-lookup", .runs_before = 0, /*last feature*/ }; @@ -652,15 +651,15 @@ VNET_FEATURE_INIT (ip6_vpath_mc, static) = { .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"), }; -VNET_FEATURE_INIT (ip6_mc_lookup, static) = { +VNET_FEATURE_INIT (ip6_drop_mc, static) = { .arc_name = "ip6-multicast", - .node_name = "ip6-mfib-forward-lookup", - .runs_before = VNET_FEATURES ("ip6-drop"), + .node_name = "ip6-drop", + .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"), }; -VNET_FEATURE_INIT (ip6_drop_mc, static) = { +VNET_FEATURE_INIT (ip6_mc_lookup, static) = { .arc_name = "ip6-multicast", - .node_name = "ip6-drop", + .node_name = "ip6-mfib-forward-lookup", .runs_before = 0, /* last feature */ }; @@ -699,9 +698,6 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) vnet_feature_enable_disable ("ip6-multicast", "ip6-drop", sw_if_index, is_add, 0, 0); - vnet_feature_enable_disable ("ip6-output", "interface-output", sw_if_index, - is_add, 0, 0); - return /* no error */ 0; } diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c index 2cba596f..1e674085 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.c +++ b/src/vnet/vxlan-gpe/vxlan_gpe.c @@ -490,9 +490,9 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "encap-vrf-id %d", &tmp)) { if (ipv6_set) - encap_fib_index = ip6_fib_index_from_table_id (tmp); + encap_fib_index = fib_table_find (FIB_PROTOCOL_IP6, tmp); else - encap_fib_index = ip4_fib_index_from_table_id (tmp); + encap_fib_index = fib_table_find (FIB_PROTOCOL_IP4, tmp); if (encap_fib_index == ~0) { @@ -503,9 +503,9 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "decap-vrf-id %d", &tmp)) { if (ipv6_set) - decap_fib_index = ip6_fib_index_from_table_id (tmp); + decap_fib_index = fib_table_find (FIB_PROTOCOL_IP6, tmp); else - decap_fib_index = ip4_fib_index_from_table_id (tmp); + decap_fib_index = fib_table_find (FIB_PROTOCOL_IP4, tmp); if (decap_fib_index == ~0) { diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 60eb5331..c1dcfb03 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -81,8 +81,6 @@ #include #include #include -#include -#include #include #include #include -- cgit 1.2.3-korg From 6899a30bd70f219cfd182dfb0e9ac96faf5d9892 Mon Sep 17 00:00:00 2001 From: Pavel Kotucek Date: Thu, 8 Jun 2017 08:46:10 +0200 Subject: P2P Ethernet - API API for P2P Ethernet feature Change-Id: Id0280f42b9ce2428262e79c4dc309595037cd10e Signed-off-by: Pavel Kotucek --- src/vat/api_format.c | 107 ++++++++++++++++++++++++++++- src/vnet.am | 10 ++- src/vnet/ethernet/p2p_ethernet.api | 49 ++++++++++++++ src/vnet/ethernet/p2p_ethernet.c | 102 ++++++++++++++++++++++++++++ src/vnet/ethernet/p2p_ethernet.h | 23 +++++++ src/vnet/ethernet/p2p_ethernet_api.c | 128 +++++++++++++++++++++++++++++++++++ src/vnet/vnet_all_api_h.h | 1 + src/vpp/api/custom_dump.c | 28 +++++++- 8 files changed, 442 insertions(+), 6 deletions(-) create mode 100644 src/vnet/ethernet/p2p_ethernet.api create mode 100644 src/vnet/ethernet/p2p_ethernet.c create mode 100644 src/vnet/ethernet/p2p_ethernet.h create mode 100644 src/vnet/ethernet/p2p_ethernet_api.c (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 79be42c8..f33b4592 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -4445,7 +4445,9 @@ _(l2_interface_pbb_tag_rewrite_reply) \ _(punt_reply) \ _(feature_enable_disable_reply) \ _(sw_interface_tag_add_del_reply) \ -_(sw_interface_set_mtu_reply) +_(sw_interface_set_mtu_reply) \ +_(p2p_ethernet_add_reply) \ +_(p2p_ethernet_del_reply) #define _(n) \ static void vl_api_##n##_t_handler \ @@ -4720,7 +4722,9 @@ _(SW_INTERFACE_TAG_ADD_DEL_REPLY, sw_interface_tag_add_del_reply) \ _(L2_XCONNECT_DETAILS, l2_xconnect_details) \ _(SW_INTERFACE_SET_MTU_REPLY, sw_interface_set_mtu_reply) \ _(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ -_(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) +_(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) \ +_(P2P_ETHERNET_ADD_REPLY, p2p_ethernet_add_reply) \ +_(P2P_ETHERNET_DEL_REPLY, p2p_ethernet_del_reply) #define foreach_standalone_reply_msg \ _(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ @@ -18689,6 +18693,101 @@ api_sw_interface_set_mtu (vat_main_t * vam) return ret; } +static int +api_p2p_ethernet_add (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_p2p_ethernet_add_t *mp; + u32 parent_if_index = ~0; + u8 remote_mac[6]; + u8 mac_set = 0; + int ret; + + memset (remote_mac, 0, sizeof (remote_mac)); + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", api_unformat_sw_if_index, vam, &parent_if_index)) + ; + else if (unformat (i, "sw_if_index %d", &parent_if_index)) + ; + else + if (unformat + (i, "remote_mac %U", unformat_ethernet_address, remote_mac)) + mac_set++; + else + { + clib_warning ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + if (parent_if_index == ~0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + if (mac_set == 0) + { + errmsg ("missing remote mac address"); + return -99; + } + + M (P2P_ETHERNET_ADD, mp); + mp->parent_if_index = ntohl (parent_if_index); + clib_memcpy (mp->remote_mac, remote_mac, sizeof (remote_mac)); + + S (mp); + W (ret); + return ret; +} + +static int +api_p2p_ethernet_del (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_p2p_ethernet_del_t *mp; + u32 parent_if_index = ~0; + u8 remote_mac[6]; + u8 mac_set = 0; + int ret; + + memset (remote_mac, 0, sizeof (remote_mac)); + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", api_unformat_sw_if_index, vam, &parent_if_index)) + ; + else if (unformat (i, "sw_if_index %d", &parent_if_index)) + ; + else + if (unformat + (i, "remote_mac %U", unformat_ethernet_address, remote_mac)) + mac_set++; + else + { + clib_warning ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + if (parent_if_index == ~0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + if (mac_set == 0) + { + errmsg ("missing remote mac address"); + return -99; + } + + M (P2P_ETHERNET_DEL, mp); + mp->parent_if_index = ntohl (parent_if_index); + clib_memcpy (mp->remote_mac, remote_mac, sizeof (remote_mac)); + + S (mp); + W (ret); + return ret; +} static int q_or_quit (vat_main_t * vam) @@ -19444,7 +19543,9 @@ _(sw_interface_tag_add_del, " | sw_if_index tag " \ _(l2_xconnect_dump, "") \ _(sw_interface_set_mtu, " | sw_if_index mtu ") \ _(ip_neighbor_dump, "[ip6] | sw_if_index ") \ -_(sw_interface_get_table, " | sw_if_index [ipv6]") +_(sw_interface_get_table, " | sw_if_index [ipv6]") \ +_(p2p_ethernet_add, " | sw_if_index remote_mac ") \ +_(p2p_ethernet_del, " | sw_if_index remote_mac ") /* List of command functions, CLI names map directly to functions */ #define foreach_cli_function \ diff --git a/src/vnet.am b/src/vnet.am index 361a838b..b5ce6d5a 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -113,7 +113,9 @@ libvnet_la_SOURCES += \ vnet/ethernet/interface.c \ vnet/ethernet/node.c \ vnet/ethernet/pg.c \ - vnet/ethernet/sfp.c + vnet/ethernet/sfp.c \ + vnet/ethernet/p2p_ethernet.c \ + vnet/ethernet/p2p_ethernet_api.c nobase_include_HEADERS += \ vnet/ethernet/arp_packet.h \ @@ -121,7 +123,11 @@ nobase_include_HEADERS += \ vnet/ethernet/ethernet.h \ vnet/ethernet/packet.h \ vnet/ethernet/types.def \ - vnet/ethernet/sfp.h + vnet/ethernet/sfp.h \ + vnet/ethernet/p2p_ethernet.api.h \ + vnet/ethernet/p2p_ethernet.h + +API_FILES += vnet/ethernet/p2p_ethernet.api ######################################## # Layer 2 protocol: Ethernet bridging diff --git a/src/vnet/ethernet/p2p_ethernet.api b/src/vnet/ethernet/p2p_ethernet.api new file mode 100644 index 00000000..72a73423 --- /dev/null +++ b/src/vnet/ethernet/p2p_ethernet.api @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +define p2p_ethernet_add +{ + u32 client_index; + u32 context; + u32 parent_if_index; + u8 remote_mac[6]; +}; + +define p2p_ethernet_add_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +define p2p_ethernet_del +{ + u32 client_index; + u32 context; + u32 parent_if_index; + u8 remote_mac[6]; +}; + +define p2p_ethernet_del_reply +{ + u32 context; + i32 retval; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ \ No newline at end of file diff --git a/src/vnet/ethernet/p2p_ethernet.c b/src/vnet/ethernet/p2p_ethernet.c new file mode 100644 index 00000000..3c077318 --- /dev/null +++ b/src/vnet/ethernet/p2p_ethernet.c @@ -0,0 +1,102 @@ +/* + * p2p_ethernet.c: p2p ethernet + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +int +p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index, + u8 * client_mac, int is_add) +{ + return 0; +} + +static clib_error_t * +vnet_p2p_ethernet_add_del (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + + int is_add = 1; + int remote_mac = 0; + u32 hw_if_index = ~0; + u8 client_mac[6]; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + ; + else if (unformat (input, "%U", unformat_ethernet_address, &client_mac)) + remote_mac = 1; + else if (unformat (input, "del")) + is_add = 0; + else + break; + } + + if (hw_if_index == ~0) + return clib_error_return (0, "Please specify parent interface ..."); + if (!remote_mac) + return clib_error_return (0, "Please specify client MAC address ..."); + + u32 rv; + rv = p2p_ethernet_add_del (vm, hw_if_index, client_mac, is_add); + switch (rv) + { + case VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED: + return clib_error_return (0, + "not allowed as parent interface belongs to a BondEthernet interface"); + case -1: + return clib_error_return (0, + "p2p ethernet for given parent interface and client mac already exists"); + case -2: + return clib_error_return (0, + "couldn't create p2p ethernet subinterface"); + case -3: + return clib_error_return (0, + "p2p ethernet for given parent interface and client mac doesn't exist"); + default: + break; + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) = +{ + .path = "p2p_ethernet ", + .function = vnet_p2p_ethernet_add_del, + .short_help = "p2p_ethernet [del]",}; +/* *INDENT-ON* */ + +static clib_error_t * +p2p_ethernet_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (p2p_ethernet_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/p2p_ethernet.h b/src/vnet/ethernet/p2p_ethernet.h new file mode 100644 index 00000000..31b93d82 --- /dev/null +++ b/src/vnet/ethernet/p2p_ethernet.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_p2p_ethernet_h +#define included_vnet_p2p_ethernet_h + +#include +#include + +int p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index, u8 * client_mac, int is_add); + +#endif /* included_vnet_p2p_ethernet_h */ diff --git a/src/vnet/ethernet/p2p_ethernet_api.c b/src/vnet/ethernet/p2p_ethernet_api.c new file mode 100644 index 00000000..1d9eaeb0 --- /dev/null +++ b/src/vnet/ethernet/p2p_ethernet_api.c @@ -0,0 +1,128 @@ +/* + *------------------------------------------------------------------ + * p2p_ethernet_api.c - p2p ethernet api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(P2P_ETHERNET_ADD, p2p_ethernet_add) \ +_(P2P_ETHERNET_DEL, p2p_ethernet_del) + +void +vl_api_p2p_ethernet_add_t_handler (vl_api_p2p_ethernet_add_t * mp) +{ + vl_api_p2p_ethernet_add_reply_t *rmp; + vlib_main_t *vm = vlib_get_main (); + int rv; + + u32 parent_if_index = htonl (mp->parent_if_index); + u8 remote_mac[6]; + + clib_memcpy (remote_mac, mp->remote_mac, 6); + rv = p2p_ethernet_add_del (vm, parent_if_index, remote_mac, 1); + + REPLY_MACRO (VL_API_P2P_ETHERNET_ADD_REPLY); +} + +void +vl_api_p2p_ethernet_del_t_handler (vl_api_p2p_ethernet_del_t * mp) +{ + vl_api_p2p_ethernet_del_reply_t *rmp; + vlib_main_t *vm = vlib_get_main (); + int rv; + + u32 parent_if_index = htonl (mp->parent_if_index); + u8 remote_mac[6]; + + clib_memcpy (remote_mac, mp->remote_mac, 6); + rv = p2p_ethernet_add_del (vm, parent_if_index, remote_mac, 0); + + REPLY_MACRO (VL_API_P2P_ETHERNET_DEL_REPLY); +} + +/* + * p2p_ethernet_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_p2p_ethernet; +#undef _ +} + +static clib_error_t * +p2p_ethernet_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (p2p_ethernet_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index 566e22ec..5da2acb7 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -56,6 +56,7 @@ #include #include #include +#include /* * fd.io coding-style-patch-verification: ON diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 9071883b..dc9fee9a 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -2932,6 +2932,30 @@ static void *vl_api_sw_interface_set_mtu_t_print FINISH; } +static void *vl_api_p2p_ethernet_add_t_print + (vl_api_p2p_ethernet_add_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: p2p_ethernet_add "); + s = format (s, "sw_if_index %d ", ntohl (mp->parent_if_index)); + s = format (s, "remote_mac %U ", format_ethernet_address, mp->remote_mac); + + FINISH; +} + +static void *vl_api_p2p_ethernet_del_t_print + (vl_api_p2p_ethernet_del_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: p2p_ethernet_del "); + s = format (s, "sw_if_index %d ", ntohl (mp->parent_if_index)); + s = format (s, "remote_mac %U ", format_ethernet_address, mp->remote_mac); + + FINISH; +} + #define foreach_custom_print_no_arg_function \ _(lisp_eid_table_vni_dump) \ _(lisp_map_resolver_dump) \ @@ -3112,7 +3136,9 @@ _(IP_FIB_DUMP, ip_fib_dump) \ _(IP6_FIB_DUMP, ip6_fib_dump) \ _(FEATURE_ENABLE_DISABLE, feature_enable_disable) \ _(SW_INTERFACE_TAG_ADD_DEL, sw_interface_tag_add_del) \ -_(SW_INTERFACE_SET_MTU, sw_interface_set_mtu) +_(SW_INTERFACE_SET_MTU, sw_interface_set_mtu) \ +_(P2P_ETHERNET_ADD, p2p_ethernet_add) \ +_(P2P_ETHERNET_DEL, p2p_ethernet_del) void vl_msg_api_custom_dump_configure (api_main_t * am) { -- cgit 1.2.3-korg From ef2a5bf0a31c9c0a94f9f497cb6353f46073e6ec Mon Sep 17 00:00:00 2001 From: Filip Tehlar Date: Tue, 30 May 2017 07:14:46 +0200 Subject: LISP: add NSH support Change-Id: I971c110ed126f1a24a963f9d3b88cf8f8c308816 Signed-off-by: Filip Tehlar --- src/tests/vnet/lisp-cp/test_lisp_types.c | 76 ++++++++++++- src/vat/api_format.c | 180 +++++++++++++++++++++++++++++-- src/vnet/lisp-cp/control.c | 139 +++++++++++++++++++++--- src/vnet/lisp-cp/control.h | 4 + src/vnet/lisp-cp/gid_dictionary.c | 79 +++++++++++++- src/vnet/lisp-cp/gid_dictionary.h | 17 +++ src/vnet/lisp-cp/lisp_cp_messages.h | 31 ++++++ src/vnet/lisp-cp/lisp_types.c | 108 ++++++++++++++++--- src/vnet/lisp-cp/lisp_types.h | 10 +- src/vnet/lisp-cp/one.api | 49 +++++++++ src/vnet/lisp-cp/one_api.c | 105 +++++++++++++++++- src/vnet/lisp-cp/one_cli.c | 58 +++++++++- src/vnet/lisp-gpe/interface.c | 8 +- src/vnet/lisp-gpe/lisp_gpe.h | 3 + src/vpp/api/test_client.c | 3 +- 15 files changed, 826 insertions(+), 44 deletions(-) (limited to 'src/vpp/api') diff --git a/src/tests/vnet/lisp-cp/test_lisp_types.c b/src/tests/vnet/lisp-cp/test_lisp_types.c index 21575015..7c55a9c1 100644 --- a/src/tests/vnet/lisp-cp/test_lisp_types.c +++ b/src/tests/vnet/lisp-cp/test_lisp_types.c @@ -90,7 +90,7 @@ static clib_error_t * test_gid_parse_ip_pref () { clib_error_t * error = 0; gid_address_t _gid_addr, * gid_addr = &_gid_addr; - gid_address_t _gid_addr_copy, * gid_addr_copy = &_gid_addr_copy; + gid_address_t _gid_addr_copy, * copy = &_gid_addr_copy; u8 data[] = { 0x00, 0x01, /* AFI = IPv4 */ @@ -99,8 +99,8 @@ static clib_error_t * test_gid_parse_ip_pref () u32 len = gid_address_parse (data, gid_addr); _assert (6 == len); - gid_address_copy (gid_addr_copy, gid_addr); - _assert (0 == gid_address_cmp (gid_addr_copy, gid_addr)); + gid_address_copy (copy, gid_addr); + _assert (0 == gid_address_cmp (copy, gid_addr)); done: return error; } @@ -127,6 +127,74 @@ done: return error; } +static clib_error_t * +test_gid_write_nsh (void) +{ + clib_error_t * error = 0; + + u8 * b = clib_mem_alloc(500); + memset(b, 0, 500); + + gid_address_t g = + { + .vni = 0, + .nsh.spi = 0x112233, + .nsh.si = 0x42, + .type = GID_ADDR_NSH, + }; + + u16 len = gid_address_put (b, &g); + + u8 expected[] = + { + 0x40, 0x03, 0x00, 0x00, /* AFI = LCAF*/ + 0x11, 0x00, 0x00, 0x04, /* type = SPI LCAF, length = 4 */ + + /* Service Path ID, Service index */ + 0x11, 0x22, 0x33, 0x42, /* SPI, SI */ + }; + + _assert (sizeof (expected) == len); + _assert (0 == memcmp (expected, b, len)); +done: + clib_mem_free (b); + return error; +} + +static clib_error_t * +test_gid_parse_nsh () +{ + clib_error_t * error = 0; + gid_address_t _gid_addr, * gid_addr = &_gid_addr; + gid_address_t _gid_addr_copy, * copy = &_gid_addr_copy; + + memset (gid_addr, 0, sizeof (gid_addr[0])); + memset (copy, 0, sizeof (copy[0])); + + u8 data[] = + { + 0x40, 0x03, 0x00, 0x00, /* AFI = LCAF*/ + 0x11, 0x00, 0x00, 0x04, /* type = SPI LCAF, length = 4 */ + + /* Service Path ID, Service index */ + 0x55, 0x99, 0x42, 0x09, /* SPI, SI */ + }; + + u32 len = gid_address_parse (data, gid_addr); + _assert (sizeof (data) == len); + gid_address_copy (copy, gid_addr); + _assert (0 == gid_address_cmp (gid_addr, copy)); + _assert (GID_ADDR_NSH == gid_address_type (copy)); + _assert (0 == gid_address_vni (copy)); + _assert (gid_address_nsh_spi (copy) == 0x559942); + _assert (gid_address_nsh_si (copy) == 0x09); + +done: + gid_address_free (copy); + gid_address_free (gid_addr); + return error; +} + static clib_error_t * test_gid_parse_lcaf () { clib_error_t * error = 0; @@ -555,6 +623,8 @@ done: _(gid_parse_ip_pref) \ _(gid_parse_mac) \ _(gid_parse_lcaf) \ + _(gid_parse_nsh) \ + _(gid_write_nsh) \ _(mac_address_write) \ _(gid_address_write) \ _(src_dst_serdes) \ diff --git a/src/vat/api_format.c b/src/vat/api_format.c index f33b4592..766624a0 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -2583,6 +2583,26 @@ static void vec_free (ls_name); } +typedef struct +{ + u32 spi; + u8 si; +} __attribute__ ((__packed__)) lisp_nsh_api_t; + +uword +unformat_nsh_address (unformat_input_t * input, va_list * args) +{ + lisp_nsh_api_t *nsh = va_arg (*args, lisp_nsh_api_t *); + return unformat (input, "SPI:%d SI:%d", &nsh->spi, &nsh->si); +} + +u8 * +format_nsh_address_vat (u8 * s, va_list * args) +{ + nsh_t *a = va_arg (*args, nsh_t *); + return format (s, "SPI:%d SI:%d", clib_net_to_host_u32 (a->spi), a->si); +} + static u8 * format_lisp_flat_eid (u8 * s, va_list * args) { @@ -2598,6 +2618,8 @@ format_lisp_flat_eid (u8 * s, va_list * args) return format (s, "%U/%d", format_ip6_address, eid, eid_len); case 2: return format (s, "%U", format_ethernet_address, eid); + case 3: + return format (s, "%U", format_nsh_address_vat, eid); } return 0; } @@ -2672,13 +2694,26 @@ vl_api_one_eid_table_details_t_handler_json (vl_api_one_eid_table_details_t clib_net_to_host_u32 (mp->locator_set_index)); vat_json_object_add_uint (node, "is_local", mp->is_local ? 1 : 0); - eid = format (0, "%U", format_lisp_eid_vat, - mp->eid_type, - mp->eid, - mp->eid_prefix_len, - mp->seid, mp->seid_prefix_len, mp->is_src_dst); - vec_add1 (eid, 0); - vat_json_object_add_string_copy (node, "eid", eid); + if (mp->eid_type == 3) + { + vat_json_node_t *nsh_json = vat_json_object_add (node, "eid"); + vat_json_init_object (nsh_json); + lisp_nsh_api_t *nsh = (lisp_nsh_api_t *) mp->eid; + vat_json_object_add_uint (nsh_json, "spi", + clib_net_to_host_u32 (nsh->spi)); + vat_json_object_add_uint (nsh_json, "si", nsh->si); + } + else + { + eid = format (0, "%U", format_lisp_eid_vat, + mp->eid_type, + mp->eid, + mp->eid_prefix_len, + mp->seid, mp->seid_prefix_len, mp->is_src_dst); + vec_add1 (eid, 0); + vat_json_object_add_string_copy (node, "eid", eid); + vec_free (eid); + } vat_json_object_add_uint (node, "vni", clib_net_to_host_u32 (mp->vni)); vat_json_object_add_uint (node, "ttl", clib_net_to_host_u32 (mp->ttl)); vat_json_object_add_uint (node, "authoritative", (mp->authoritative)); @@ -2689,7 +2724,6 @@ vl_api_one_eid_table_details_t_handler_json (vl_api_one_eid_table_details_t clib_net_to_host_u16 (mp->key_id)); vat_json_object_add_string_copy (node, "key", mp->key); } - vec_free (eid); } static void @@ -3665,6 +3699,52 @@ static void vam->result_ready = 1; } +static void + vl_api_show_one_nsh_mapping_reply_t_handler + (vl_api_show_one_nsh_mapping_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + + if (0 <= retval) + { + print (vam->ofp, "%-20s%-16s", + mp->is_set ? "set" : "not-set", + mp->is_set ? (char *) mp->locator_set_name : ""); + } + + vam->retval = retval; + vam->result_ready = 1; +} + +static void + vl_api_show_one_nsh_mapping_reply_t_handler_json + (vl_api_show_one_nsh_mapping_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + vat_json_node_t node; + u8 *status = 0; + + status = format (0, "%s", mp->is_set ? "yes" : "no"); + vec_add1 (status, 0); + + vat_json_init_object (&node); + vat_json_object_add_string_copy (&node, "is_set", status); + if (mp->is_set) + { + vat_json_object_add_string_copy (&node, "locator_set", + mp->locator_set_name); + } + + vec_free (status); + + vat_json_print (vam->ofp, &node); + vat_json_free (&node); + + vam->retval = ntohl (mp->retval); + vam->result_ready = 1; +} + static void vl_api_show_one_pitr_reply_t_handler (vl_api_show_one_pitr_reply_t * mp) { @@ -4672,6 +4752,7 @@ _(ONE_ADD_DEL_MAP_REQUEST_ITR_RLOCS_REPLY, \ one_add_del_map_request_itr_rlocs_reply) \ _(ONE_GET_MAP_REQUEST_ITR_RLOCS_REPLY, \ one_get_map_request_itr_rlocs_reply) \ +_(SHOW_ONE_NSH_MAPPING_REPLY, show_one_nsh_mapping_reply) \ _(SHOW_ONE_PITR_REPLY, show_one_pitr_reply) \ _(SHOW_ONE_USE_PETR_REPLY, show_one_use_petr_reply) \ _(SHOW_ONE_MAP_REQUEST_MODE_REPLY, show_one_map_request_mode_reply) \ @@ -14044,6 +14125,12 @@ unformat_lisp_eid_vat (unformat_input_t * input, va_list * args) { a->type = 2; /* mac type */ } + else if (unformat (input, "%U", unformat_nsh_address, a->addr)) + { + a->type = 3; /* NSH type */ + lisp_nsh_api_t *nsh = (lisp_nsh_api_t *) a->addr; + nsh->spi = clib_host_to_net_u32 (nsh->spi); + } else { return 0; @@ -14068,6 +14155,8 @@ lisp_eid_size_vat (u8 type) return 16; case 2: return 6; + case 3: + return 5; } return 0; } @@ -15166,6 +15255,50 @@ api_one_pitr_set_locator_set (vat_main_t * vam) #define api_lisp_pitr_set_locator_set api_one_pitr_set_locator_set +static int +api_one_nsh_set_locator_set (vat_main_t * vam) +{ + u8 ls_name_set = 0; + unformat_input_t *input = vam->input; + vl_api_one_nsh_set_locator_set_t *mp; + u8 is_add = 1; + u8 *ls_name = 0; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_add = 0; + else if (unformat (input, "ls %s", &ls_name)) + ls_name_set = 1; + else + { + errmsg ("parse error '%U'", format_unformat_error, input); + return -99; + } + } + + if (!ls_name_set && is_add) + { + errmsg ("locator-set name not set!"); + return -99; + } + + M (ONE_NSH_SET_LOCATOR_SET, mp); + + mp->is_add = is_add; + clib_memcpy (mp->ls_name, ls_name, vec_len (ls_name)); + vec_free (ls_name); + + /* send */ + S (mp); + + /* wait for reply */ + W (ret); + return ret; +} + static int api_show_one_pitr (vat_main_t * vam) { @@ -15245,6 +15378,26 @@ api_one_use_petr (vat_main_t * vam) #define api_lisp_use_petr api_one_use_petr +static int +api_show_one_nsh_mapping (vat_main_t * vam) +{ + vl_api_show_one_use_petr_t *mp; + int ret; + + if (!vam->json_output) + { + print (vam->ofp, "%=20s", "local ONE NSH mapping:"); + } + + M (SHOW_ONE_NSH_MAPPING, mp); + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + static int api_show_one_use_petr (vat_main_t * vam) { @@ -16031,6 +16184,7 @@ api_one_eid_table_dump (vat_main_t * vam) u32 prefix_length = ~0, t, vni = 0; u8 filter = 0; int ret; + lisp_nsh_api_t nsh; while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { @@ -16051,6 +16205,11 @@ api_one_eid_table_dump (vat_main_t * vam) eid_set = 1; eid_type = 2; } + else if (unformat (i, "eid %U", unformat_nsh_address, &nsh)) + { + eid_set = 1; + eid_type = 3; + } else if (unformat (i, "vni %d", &t)) { vni = t; @@ -16097,6 +16256,9 @@ api_one_eid_table_dump (vat_main_t * vam) case 2: clib_memcpy (mp->eid, mac, sizeof (mac)); break; + case 3: + clib_memcpy (mp->eid, &nsh, sizeof (nsh)); + break; default: errmsg ("unknown EID type %d!", eid_type); return -99; @@ -19423,12 +19585,14 @@ _(one_eid_table_map_dump, "l2|l3") \ _(one_map_resolver_dump, "") \ _(one_map_server_dump, "") \ _(one_adjacencies_get, "vni ") \ +_(one_nsh_set_locator_set, "[del] ls ") \ _(show_one_rloc_probe_state, "") \ _(show_one_map_register_state, "") \ _(show_one_status, "") \ _(one_stats_dump, "") \ _(one_stats_flush, "") \ _(one_get_map_request_itr_rlocs, "") \ +_(show_one_nsh_mapping, "") \ _(show_one_pitr, "") \ _(show_one_use_petr, "") \ _(show_one_map_request_mode, "") \ diff --git a/src/vnet/lisp-cp/control.c b/src/vnet/lisp-cp/control.c index 19bbd618..db78678d 100644 --- a/src/vnet/lisp-cp/control.c +++ b/src/vnet/lisp-cp/control.c @@ -28,6 +28,8 @@ #include #include +#define MAX_VALUE_U24 0xffffff + lisp_cp_main_t lisp_control_main; u8 *format_lisp_cp_input_trace (u8 * s, va_list * args); @@ -697,6 +699,20 @@ vnet_lisp_map_cache_add_del (vnet_lisp_add_del_mapping_args_t * a, mapping_t *m, *old_map; u32 **eid_indexes; + if (gid_address_type (&a->eid) == GID_ADDR_NSH) + { + if (gid_address_vni (&a->eid) != 0) + { + clib_warning ("Supported only default VNI for NSH!"); + return VNET_API_ERROR_INVALID_ARGUMENT; + } + if (gid_address_nsh_spi (&a->eid) > MAX_VALUE_U24) + { + clib_warning ("SPI is greater than 24bit!"); + return VNET_API_ERROR_INVALID_ARGUMENT; + } + } + mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &a->eid); old_map = mi != ~0 ? pool_elt_at_index (lcm->mapping_pool, mi) : 0; if (a->is_add) @@ -812,7 +828,7 @@ vnet_lisp_add_del_local_mapping (vnet_lisp_add_del_mapping_args_t * a, else if (GID_ADDR_MAC == type) dp_table = hash_get (lcm->bd_id_by_vni, vni); - if (!dp_table) + if (!dp_table && GID_ADDR_NSH != type) { clib_warning ("vni %d not associated to a %s!", vni, GID_ADDR_IP_PREFIX == type ? "vrf" : "bd"); @@ -1329,8 +1345,23 @@ vnet_lisp_add_del_adjacency (vnet_lisp_add_del_adjacency_args_t * a) { /* check if source eid has an associated mapping. If pitr mode is on, * just use the pitr's mapping */ - local_mi = lcm->lisp_pitr ? lcm->pitr_map_index : - gid_dictionary_lookup (&lcm->mapping_index_by_gid, &a->leid); + if (lcm->lisp_pitr) + local_mi = lcm->pitr_map_index; + else + { + if (gid_address_type (&a->reid) == GID_ADDR_NSH) + { + if (lcm->nsh_map_index == ~0) + local_mi = GID_LOOKUP_MISS; + else + local_mi = lcm->nsh_map_index; + } + else + { + local_mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, + &a->leid); + } + } if (GID_LOOKUP_MISS == local_mi) { @@ -1370,6 +1401,57 @@ vnet_lisp_set_map_request_mode (u8 mode) return 0; } +int +vnet_lisp_nsh_set_locator_set (u8 * locator_set_name, u8 is_add) +{ + lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); + lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main (); + u32 locator_set_index = ~0; + mapping_t *m; + uword *p; + + if (vnet_lisp_enable_disable_status () == 0) + { + clib_warning ("LISP is disabled!"); + return VNET_API_ERROR_LISP_DISABLED; + } + + if (is_add) + { + if (lcm->nsh_map_index == (u32) ~ 0) + { + p = hash_get_mem (lcm->locator_set_index_by_name, locator_set_name); + if (!p) + { + clib_warning ("locator-set %v doesn't exist", locator_set_name); + return -1; + } + locator_set_index = p[0]; + + pool_get (lcm->mapping_pool, m); + memset (m, 0, sizeof *m); + m->locator_set_index = locator_set_index; + m->local = 1; + m->nsh_set = 1; + lcm->nsh_map_index = m - lcm->mapping_pool; + + if (~0 == vnet_lisp_gpe_add_nsh_iface (lgm)) + return -1; + } + } + else + { + if (lcm->nsh_map_index != (u32) ~ 0) + { + /* remove NSH mapping */ + pool_put_index (lcm->mapping_pool, lcm->nsh_map_index); + lcm->nsh_map_index = ~0; + vnet_lisp_gpe_del_nsh_iface (lgm); + } + } + return 0; +} + int vnet_lisp_pitr_set_locator_set (u8 * locator_set_name, u8 is_add) { @@ -2667,7 +2749,7 @@ _send_encapsulated_map_request (lisp_cp_main_t * lcm, } /* get locator-set for seid */ - if (!lcm->lisp_pitr) + if (!lcm->lisp_pitr && gid_address_type (deid) != GID_ADDR_NSH) { map_index = gid_dictionary_lookup (&lcm->mapping_index_by_gid, seid); if (map_index == ~0) @@ -2690,9 +2772,24 @@ _send_encapsulated_map_request (lisp_cp_main_t * lcm, } else { - map_index = lcm->pitr_map_index; - map = pool_elt_at_index (lcm->mapping_pool, lcm->pitr_map_index); - ls_index = map->locator_set_index; + if (lcm->lisp_pitr) + { + map = pool_elt_at_index (lcm->mapping_pool, lcm->pitr_map_index); + ls_index = map->locator_set_index; + } + else + { + if (lcm->nsh_map_index == (u32) ~ 0) + { + clib_warning ("No locator-set defined for NSH!"); + return -1; + } + else + { + map = pool_elt_at_index (lcm->mapping_pool, lcm->nsh_map_index); + ls_index = map->locator_set_index; + } + } } /* overwrite locator set if map-request itr-rlocs configured */ @@ -2843,6 +2940,7 @@ get_src_and_dst_eids_from_buffer (lisp_cp_main_t * lcm, vlib_buffer_t * b, gid_address_t * src, gid_address_t * dst, u16 type) { + ethernet_header_t *eh; u32 vni = 0; memset (src, 0, sizeof (*src)); @@ -2873,7 +2971,6 @@ get_src_and_dst_eids_from_buffer (lisp_cp_main_t * lcm, vlib_buffer_t * b, } else if (LISP_AFI_MAC == type) { - ethernet_header_t *eh; ethernet_arp_header_t *ah; eh = vlib_buffer_get_current (b); @@ -2906,8 +3003,19 @@ get_src_and_dst_eids_from_buffer (lisp_cp_main_t * lcm, vlib_buffer_t * b, } else if (LISP_AFI_LCAF == type) { - /* Eventually extend this to support NSH and other */ - ASSERT (0); + lisp_nsh_hdr_t *nh; + eh = vlib_buffer_get_current (b); + + if (clib_net_to_host_u16 (eh->type) == ETHERNET_TYPE_NSH) + { + nh = (lisp_nsh_hdr_t *) (((u8 *) eh) + sizeof (*eh)); + u32 spi = clib_net_to_host_u32 (nh->spi_si << 8); + u8 si = (u8) clib_net_to_host_u32 (nh->spi_si); + gid_address_nsh_spi (dst) = spi; + gid_address_nsh_si (dst) = si; + + gid_address_type (dst) = GID_ADDR_NSH; + } } } @@ -3009,8 +3117,14 @@ lisp_cp_lookup_inline (vlib_main_t * vm, } else { - si = gid_dictionary_lookup (&lcm->mapping_index_by_gid, - &src); + if (GID_ADDR_NSH != gid_address_type (&dst)) + { + si = gid_dictionary_lookup (&lcm->mapping_index_by_gid, + &src); + } + else + si = lcm->nsh_map_index; + if (~0 != si) { dp_add_fwd_entry_from_mt (si, di); @@ -3862,6 +3976,7 @@ lisp_cp_init (vlib_main_t * vm) u64 now = clib_cpu_time_now (); timing_wheel_init (&lcm->wheel, now, vm->clib_time.clocks_per_second); + lcm->nsh_map_index = ~0; return 0; } diff --git a/src/vnet/lisp-cp/control.h b/src/vnet/lisp-cp/control.h index feb8cfa4..ad90b526 100644 --- a/src/vnet/lisp-cp/control.h +++ b/src/vnet/lisp-cp/control.h @@ -211,6 +211,9 @@ typedef struct /* LISP PITR mode */ u8 lisp_pitr; + /* mapping index for NSH */ + u32 nsh_map_index; + /* map request mode */ u8 map_request_mode; @@ -353,6 +356,7 @@ u8 vnet_lisp_rloc_probe_state_get (void); int vnet_lisp_add_del_l2_arp_entry (gid_address_t * key, u8 * mac, u8 is_add); u32 *vnet_lisp_l2_arp_bds_get (void); lisp_api_l2_arp_entry_t *vnet_lisp_l2_arp_entries_get_by_bd (u32 bd); +int vnet_lisp_nsh_set_locator_set (u8 * locator_set_name, u8 is_add); map_records_arg_t *parse_map_reply (vlib_buffer_t * b); diff --git a/src/vnet/lisp-cp/gid_dictionary.c b/src/vnet/lisp-cp/gid_dictionary.c index 80d59faf..cf9a741a 100644 --- a/src/vnet/lisp-cp/gid_dictionary.c +++ b/src/vnet/lisp-cp/gid_dictionary.c @@ -345,6 +345,14 @@ make_arp_key (BVT (clib_bihash_kv) * kv, u32 bd, ip4_address_t * addr) kv->key[2] = (u64) 0; } +static void +make_nsh_key (BVT (clib_bihash_kv) * kv, u32 vni, u32 spi, u8 si) +{ + kv->key[0] = (u64) vni; + kv->key[1] = (u64) spi; + kv->key[2] = (u64) si; +} + static u64 arp_lookup (gid_l2_arp_table_t * db, u32 bd, ip4_address_t * key) { @@ -360,6 +368,21 @@ arp_lookup (gid_l2_arp_table_t * db, u32 bd, ip4_address_t * key) return GID_LOOKUP_MISS_L2; } +static u32 +nsh_lookup (gid_nsh_table_t * db, u32 vni, u32 spi, u8 si) +{ + int rv; + BVT (clib_bihash_kv) kv, value; + + make_nsh_key (&kv, vni, spi, si); + rv = BV (clib_bihash_search_inline_2) (&db->nsh_lookup_table, &kv, &value); + + if (rv == 0) + return value.value; + + return GID_LOOKUP_MISS; +} + u64 gid_dictionary_lookup (gid_dictionary_t * db, gid_address_t * key) { @@ -393,6 +416,9 @@ gid_dictionary_lookup (gid_dictionary_t * db, gid_address_t * key) case GID_ADDR_ARP: return arp_lookup (&db->arp_table, gid_address_arp_bd (key), &gid_address_arp_ip4 (key)); + case GID_ADDR_NSH: + return nsh_lookup (&db->nsh_table, gid_address_vni (key), + gid_address_nsh_spi (key), gid_address_nsh_si (key)); default: clib_warning ("address type %d not supported!", gid_address_type (key)); break; @@ -432,6 +458,9 @@ gid_dictionary_sd_lookup (gid_dictionary_t * db, gid_address_t * dst, break; } break; + case GID_ADDR_NSH: + return gid_dictionary_lookup (db, dst); + break; default: clib_warning ("address type %d not supported!", gid_address_type (dst)); break; @@ -860,7 +889,7 @@ add_del_sd (gid_dictionary_t * db, u32 vni, source_dest_t * key, u32 value, return ~0; } -static u32 +static u64 add_del_arp (gid_l2_arp_table_t * db, u32 bd, ip4_address_t * key, u64 value, u8 is_add) { @@ -885,6 +914,31 @@ add_del_arp (gid_l2_arp_table_t * db, u32 bd, ip4_address_t * key, u64 value, return old_val; } +static u32 +add_del_nsh (gid_nsh_table_t * db, u32 vni, u32 spi, u8 si, u32 value, + u8 is_add) +{ + BVT (clib_bihash_kv) kv, result; + u32 old_val = ~0; + + make_nsh_key (&kv, vni, spi, si); + if (BV (clib_bihash_search) (&db->nsh_lookup_table, &kv, &result) == 0) + old_val = result.value; + + if (is_add) + { + kv.value = value; + BV (clib_bihash_add_del) (&db->nsh_lookup_table, &kv, 1 /* is_add */ ); + db->count++; + } + else + { + BV (clib_bihash_add_del) (&db->nsh_lookup_table, &kv, 0 /* is_add */ ); + db->count--; + } + return old_val; +} + u32 gid_dictionary_add_del (gid_dictionary_t * db, gid_address_t * key, u64 value, u8 is_add) @@ -903,6 +957,11 @@ gid_dictionary_add_del (gid_dictionary_t * db, gid_address_t * key, u64 value, case GID_ADDR_ARP: return add_del_arp (&db->arp_table, gid_address_arp_bd (key), &gid_address_arp_ip4 (key), value, is_add); + case GID_ADDR_NSH: + return add_del_nsh (&db->nsh_table, gid_address_vni (key), + gid_address_nsh_spi (key), gid_address_nsh_si (key), + value, is_add); + default: clib_warning ("address type %d not supported!", gid_address_type (key)); break; @@ -944,6 +1003,23 @@ arp_lookup_init (gid_l2_arp_table_t * db) db->arp_lookup_table_size); } +static void +nsh_lookup_init (gid_nsh_table_t * db) +{ + if (db->nsh_lookup_table_nbuckets == 0) + db->nsh_lookup_table_nbuckets = MAC_LOOKUP_DEFAULT_HASH_NUM_BUCKETS; + + db->nsh_lookup_table_nbuckets = + 1 << max_log2 (db->nsh_lookup_table_nbuckets); + + if (db->nsh_lookup_table_size == 0) + db->nsh_lookup_table_size = MAC_LOOKUP_DEFAULT_HASH_MEMORY_SIZE; + + BV (clib_bihash_init) (&db->nsh_lookup_table, "nsh lookup table", + db->nsh_lookup_table_nbuckets, + db->nsh_lookup_table_size); +} + void gid_dictionary_init (gid_dictionary_t * db) { @@ -951,6 +1027,7 @@ gid_dictionary_init (gid_dictionary_t * db) ip6_lookup_init (&db->dst_ip6_table); mac_lookup_init (&db->sd_mac_table); arp_lookup_init (&db->arp_table); + nsh_lookup_init (&db->nsh_table); } /* diff --git a/src/vnet/lisp-cp/gid_dictionary.h b/src/vnet/lisp-cp/gid_dictionary.h index 9612fb13..51806bd6 100644 --- a/src/vnet/lisp-cp/gid_dictionary.h +++ b/src/vnet/lisp-cp/gid_dictionary.h @@ -40,6 +40,10 @@ #define ARP_LOOKUP_DEFAULT_HASH_NUM_BUCKETS (64 * 1024) #define ARP_LOOKUP_DEFAULT_HASH_MEMORY_SIZE (32<<20) +/* Default size of the NSH hash table */ +#define NSH_LOOKUP_DEFAULT_HASH_NUM_BUCKETS (64 * 1024) +#define NSH_LOOKUP_DEFAULT_HASH_MEMORY_SIZE (32<<20) + typedef void (*foreach_subprefix_match_cb_t) (u32, void *); typedef struct @@ -84,6 +88,16 @@ typedef struct gid_mac_table u64 count; } gid_mac_table_t; +typedef struct gid_nsh_table +{ + BVT (clib_bihash) nsh_lookup_table; + + /* nsh lookup table config parameters */ + u32 nsh_lookup_table_nbuckets; + uword nsh_lookup_table_size; + u64 count; +} gid_nsh_table_t; + typedef struct { BVT (clib_bihash) arp_lookup_table; @@ -97,6 +111,9 @@ typedef struct /** L2 ARP table */ gid_l2_arp_table_t arp_table; + /** NSH lookup table */ + gid_nsh_table_t nsh_table; + /** destination IP LPM ip4 lookup table */ gid_ip4_table_t dst_ip4_table; diff --git a/src/vnet/lisp-cp/lisp_cp_messages.h b/src/vnet/lisp-cp/lisp_cp_messages.h index 278f60e1..69510a0e 100644 --- a/src/vnet/lisp-cp/lisp_cp_messages.h +++ b/src/vnet/lisp-cp/lisp_cp_messages.h @@ -473,6 +473,22 @@ typedef struct _lcaf_src_dst_hdr_t #define LCAF_SD_SRC_ML(_h) (_h)->src_mask_len #define LCAF_SD_DST_ML(_h) (_h)->dst_mask_len +/* + * SPI LCAF + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Service Path ID | Service index | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct _lcaf_spi_hdr_t +{ + u32 spi_si; +} __attribute__ ((__packed__)) lcaf_spi_hdr_t; + +#define LCAF_SPI_SI(_h) (_h)->spi_si + /* * The Map-Register message format is: * @@ -602,6 +618,21 @@ typedef struct #define MNOTIFY_AUTH_DATA_LEN(h_) (MREG_HDR_CAST(h_))->auth_data_len #define MNOTIFY_DATA(h_) (MREG_HDR_CAST(h_))->data +/* + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Ver|O|C|R|R|R|R|R|R| Length | MD type=0x1 | Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Service Path Identifer | Service Index | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + +typedef struct +{ + u32 header; + u32 spi_si; +} __attribute__ ((__packed__)) lisp_nsh_hdr_t; + #endif /* VNET_LISP_GPE_LISP_CP_MESSAGES_H_ */ /* diff --git a/src/vnet/lisp-cp/lisp_types.c b/src/vnet/lisp-cp/lisp_types.c index 85cefae0..e50c3aa1 100644 --- a/src/vnet/lisp-cp/lisp_types.c +++ b/src/vnet/lisp-cp/lisp_types.c @@ -31,16 +31,28 @@ typedef int (*cmp_fct) (void *, void *); size_to_write_fct size_to_write_fcts[GID_ADDR_TYPES] = { ip_prefix_size_to_write, lcaf_size_to_write, mac_size_to_write, - sd_size_to_write, nsh_size_to_write + sd_size_to_write, nsh_size_to_write, 0 /* arp */ , no_addr_size_to_write }; + serdes_fct write_fcts[GID_ADDR_TYPES] = - { ip_prefix_write, lcaf_write, mac_write, sd_write, nsh_write }; + { ip_prefix_write, lcaf_write, mac_write, sd_write, nsh_write, 0 /* arp */ , + no_addr_write +}; + cast_fct cast_fcts[GID_ADDR_TYPES] = - { ip_prefix_cast, lcaf_cast, mac_cast, sd_cast, nsh_cast }; + { ip_prefix_cast, lcaf_cast, mac_cast, sd_cast, nsh_cast, 0 /* arp */ , + no_addr_cast +}; + addr_len_fct addr_len_fcts[GID_ADDR_TYPES] = - { ip_prefix_length, lcaf_length, mac_length, sd_length, nsh_length }; + { ip_prefix_length, lcaf_length, mac_length, sd_length, nsh_length, + 0 /* arp */ , no_addr_length +}; + copy_fct copy_fcts[GID_ADDR_TYPES] = - { ip_prefix_copy, lcaf_copy, mac_copy, sd_copy, nsh_copy }; + { ip_prefix_copy, lcaf_copy, mac_copy, sd_copy, nsh_copy, 0 /* arp */ , + no_addr_copy +}; #define foreach_lcaf_type \ _(1, no_addr) \ @@ -55,7 +67,12 @@ copy_fct copy_fcts[GID_ADDR_TYPES] = _(0, NULL) \ _(0, NULL) \ _(0, NULL) \ - _(1, sd) + _(1, sd) \ + _(0, NULL) \ + _(0, NULL) \ + _(0, NULL) \ + _(0, NULL) \ + _(1, nsh) #define _(cond, name) \ u16 name ## _write (u8 * p, void * a); \ @@ -254,11 +271,12 @@ format_gid_address (u8 * s, va_list * args) case GID_ADDR_MAC: return format (s, "[%d] %U", gid_address_vni (a), format_mac_address, &gid_address_mac (a)); - case GID_ADDR_NSH: - return format (s, "%U", format_nsh_address, &gid_address_nsh (a)); case GID_ADDR_ARP: return format (s, "[%d, %U]", gid_address_arp_bd (a), format_ip4_address, &gid_address_arp_ip4 (a)); + case GID_ADDR_NSH: + return format (s, "%U", format_nsh_address, &gid_address_nsh (a)); + default: clib_warning ("Can't format gid type %d", type); return 0; @@ -287,7 +305,7 @@ unformat_fid_address (unformat_input_t * i, va_list * args) else if (unformat (i, "%U", unformat_nsh_address, &nsh)) { fid_addr_type (a) = FID_ADDR_NSH; - nsh_copy (&fid_addr_nsh (a), mac); + nsh_copy (&fid_addr_nsh (a), &nsh); } else return 0; @@ -673,6 +691,38 @@ do { \ dst += _sum; \ } while (0); +void +nsh_free (void *a) +{ + /* nothing to do */ +} + +u16 +nsh_parse (u8 * p, void *a) +{ + lcaf_spi_hdr_t *h = (lcaf_spi_hdr_t *) p; + gid_address_t *g = a; + + gid_address_type (g) = GID_ADDR_NSH; + gid_address_nsh_spi (g) = clib_net_to_host_u32 (LCAF_SPI_SI (h)) >> 8; + gid_address_nsh_si (g) = (u8) clib_net_to_host_u32 (LCAF_SPI_SI (h)); + + return sizeof (lcaf_spi_hdr_t); +} + +int +nsh_cmp (void *a1, void *a2) +{ + nsh_t *n1 = a1; + nsh_t *n2 = a2; + + if (n1->spi != n2->spi) + return 1; + if (n1->si != n2->si) + return 1; + return 0; +} + u16 sd_parse (u8 * p, void *a) { @@ -1094,6 +1144,12 @@ mac_cast (gid_address_t * a) return &gid_address_mac (a); } +void * +no_addr_cast (gid_address_t * a) +{ + return (void *) a; +} + void * sd_cast (gid_address_t * a) { @@ -1227,8 +1283,33 @@ sd_write (u8 * p, void *a) u16 nsh_write (u8 * p, void *a) { - clib_warning ("not done"); - return 0; + lcaf_spi_hdr_t spi; + lcaf_hdr_t lcaf; + gid_address_t *g = a; + u16 size = 0; + + ASSERT (gid_address_type (g) == GID_ADDR_NSH); + + memset (&lcaf, 0, sizeof (lcaf)); + memset (&spi, 0, sizeof (spi)); + + LCAF_TYPE (&lcaf) = LCAF_NSH; + LCAF_LENGTH (&lcaf) = clib_host_to_net_u16 (sizeof (lcaf_spi_hdr_t)); + + u32 s = clib_host_to_net_u32 (gid_address_nsh_spi (g) << 8 | + gid_address_nsh_si (g)); + LCAF_SPI_SI (&spi) = s; + + *(u16 *) p = clib_host_to_net_u16 (LISP_AFI_LCAF); + size += sizeof (u16); + + clib_memcpy (p + size, &lcaf, sizeof (lcaf)); + size += sizeof (lcaf); + + clib_memcpy (p + size, &spi, sizeof (spi)); + size += sizeof (spi); + + return size; } u16 @@ -1354,7 +1435,7 @@ mac_size_to_write (void *a) u16 nsh_size_to_write (void *a) { - return sizeof (u16) + 4; + return sizeof (u16) + sizeof (lcaf_hdr_t) + sizeof (lcaf_spi_hdr_t); } u8 @@ -1581,6 +1662,9 @@ gid_address_cmp (gid_address_t * a1, gid_address_t * a2) case GID_ADDR_SRC_DST: cmp = sd_cmp (&gid_address_sd (a1), &gid_address_sd (a2)); break; + case GID_ADDR_NSH: + cmp = nsh_cmp (&gid_address_nsh (a1), &gid_address_nsh (a2)); + break; default: break; } diff --git a/src/vnet/lisp-cp/lisp_types.h b/src/vnet/lisp-cp/lisp_types.h index f5d2a676..b7ad0f27 100644 --- a/src/vnet/lisp-cp/lisp_types.h +++ b/src/vnet/lisp-cp/lisp_types.h @@ -102,6 +102,7 @@ typedef enum LCAF_AFI_LIST_TYPE, LCAF_INSTANCE_ID, LCAF_SOURCE_DEST = 12, + LCAF_NSH = 17, LCAF_TYPES } lcaf_type_t; @@ -166,6 +167,9 @@ typedef struct u8 si; } nsh_t; +#define nsh_spi(_a) (_a)->spi +#define nsh_si(_a) (_a)->si + typedef struct { ip4_address_t addr; @@ -258,6 +262,8 @@ void gid_address_ip_set (gid_address_t * dst, void *src, u8 version); #define gid_address_lcaf(_a) (_a)->lcaf #define gid_address_mac(_a) (_a)->mac #define gid_address_nsh(_a) (_a)->nsh +#define gid_address_nsh_spi(_a) nsh_spi(&gid_address_nsh(_a)) +#define gid_address_nsh_si(_a) nsh_si(&gid_address_nsh(_a)) #define gid_address_vni(_a) (_a)->vni #define gid_address_vni_mask(_a) (_a)->vni_mask #define gid_address_sd_dst_ippref(_a) sd_dst_ippref(&(_a)->sd) @@ -275,6 +281,7 @@ void gid_address_ip_set (gid_address_t * dst, void *src, u8 version); /* 'sub'address functions */ #define foreach_gid_address_type_fcns \ + _(no_addr) \ _(ip_prefix) \ _(lcaf) \ _(mac) \ @@ -350,7 +357,8 @@ typedef struct /* valid only for remote mappings */ u8 is_static:1; u8 pitr_set:1; - u8 rsvd:4; + u8 nsh_set:1; + u8 rsvd:3; u8 *key; diff --git a/src/vnet/lisp-cp/one.api b/src/vnet/lisp-cp/one.api index 7d07cc47..31811a34 100644 --- a/src/vnet/lisp-cp/one.api +++ b/src/vnet/lisp-cp/one.api @@ -146,6 +146,20 @@ autoreply define one_enable_disable u8 is_en; }; +/** \brief configure or delete ONE NSH mapping + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param ls_name - locator set name + @param is_add - add locator set if non-zero; delete otherwise +*/ +autoreply define one_nsh_set_locator_set +{ + u32 client_index; + u32 context; + u8 is_add; + u8 ls_name[64]; +}; + /** \brief configure or disable ONE PITR node @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -325,6 +339,12 @@ typeonly manual_endian manual_print define one_remote_locator 0 : ipv4 1 : ipv6 2 : mac + 3 : NSH : both information (service path ID and service index) are + encoded in 'eid' field in a following way: + + |4 B |1 B | + ----------- + |SPI | SI | @param deid - dst EID @param seid - src EID, valid only if is_src_dst is enabled @param rloc_num - number of remote locators @@ -596,6 +616,12 @@ define one_eid_table_details 0: EID is IPv4 1: EID is IPv6 2: EID is ethernet address + 3 : NSH : both information (service path ID and service index) are + encoded in 'eid' field in a following way: + + |4 B |1 B | + ----------- + |SPI | SI | @param eid - endpoint identifier @param filter - filter type; Support values: @@ -787,6 +813,29 @@ define one_get_map_request_itr_rlocs_reply u8 locator_set_name[64]; }; +/** \brief Request for ONE NSH mapping + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define show_one_nsh_mapping +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply for ONE NSH mapping + @param context - sender context, to match reply w/ request + @param is_set - is ONE NSH mapping set + @param locator_set_name - name of the locator_set if NSH mapping is set +*/ +define show_one_nsh_mapping_reply +{ + u32 context; + i32 retval; + u8 is_set; + u8 locator_set_name[64]; +}; + /** \brief Request for ONE PITR status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/lisp-cp/one_api.c b/src/vnet/lisp-cp/one_api.c index 724e58df..18235fa4 100644 --- a/src/vnet/lisp-cp/one_api.c +++ b/src/vnet/lisp-cp/one_api.c @@ -90,6 +90,7 @@ _(ONE_MAP_REGISTER_ENABLE_DISABLE, one_map_register_enable_disable) \ _(ONE_ADD_DEL_REMOTE_MAPPING, one_add_del_remote_mapping) \ _(ONE_ADD_DEL_ADJACENCY, one_add_del_adjacency) \ _(ONE_PITR_SET_LOCATOR_SET, one_pitr_set_locator_set) \ +_(ONE_NSH_SET_LOCATOR_SET, one_nsh_set_locator_set) \ _(ONE_MAP_REQUEST_MODE, one_map_request_mode) \ _(ONE_EID_TABLE_ADD_DEL_MAP, one_eid_table_add_del_map) \ _(ONE_LOCATOR_SET_DUMP, one_locator_set_dump) \ @@ -100,6 +101,7 @@ _(ONE_MAP_SERVER_DUMP, one_map_server_dump) \ _(ONE_EID_TABLE_MAP_DUMP, one_eid_table_map_dump) \ _(ONE_EID_TABLE_VNI_DUMP, one_eid_table_vni_dump) \ _(ONE_ADJACENCIES_GET, one_adjacencies_get) \ +_(SHOW_ONE_NSH_MAPPING, show_one_nsh_mapping) \ _(SHOW_ONE_RLOC_PROBE_STATE, show_one_rloc_probe_state) \ _(SHOW_ONE_MAP_REGISTER_STATE, show_one_map_register_state) \ _(SHOW_ONE_STATUS, show_one_status) \ @@ -225,10 +227,18 @@ vl_api_one_add_del_locator_t_handler (vl_api_one_add_del_locator_t * mp) REPLY_MACRO (VL_API_ONE_ADD_DEL_LOCATOR_REPLY); } +typedef struct +{ + u32 spi; + u8 si; +} __attribute__ ((__packed__)) lisp_nsh_api_t; + static int unformat_one_eid_api (gid_address_t * dst, u32 vni, u8 type, void *src, u8 len) { + lisp_nsh_api_t *nsh; + switch (type) { case 0: /* ipv4 */ @@ -247,6 +257,12 @@ unformat_one_eid_api (gid_address_t * dst, u32 vni, u8 type, void *src, gid_address_type (dst) = GID_ADDR_MAC; clib_memcpy (&gid_address_mac (dst), src, 6); break; + case 3: /* NSH */ + gid_address_type (dst) = GID_ADDR_NSH; + nsh = src; + gid_address_nsh_spi (dst) = clib_net_to_host_u32 (nsh->spi); + gid_address_nsh_si (dst) = nsh->si; + break; default: /* unknown type */ return VNET_API_ERROR_INVALID_VALUE; @@ -276,6 +292,12 @@ vl_api_one_add_del_local_eid_t_handler (vl_api_one_add_del_local_eid_t * mp) if (rv) goto out; + if (gid_address_type (eid) == GID_ADDR_NSH) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + name = format (0, "%s", mp->locator_set_name); p = hash_get_mem (lcm->locator_set_index_by_name, name); if (!p) @@ -408,6 +430,21 @@ vl_api_one_map_request_mode_t_handler (vl_api_one_map_request_mode_t * mp) REPLY_MACRO (VL_API_ONE_MAP_REQUEST_MODE_REPLY); } +static void +vl_api_one_nsh_set_locator_set_t_handler (vl_api_one_nsh_set_locator_set_t + * mp) +{ + vl_api_one_nsh_set_locator_set_reply_t *rmp; + int rv = 0; + u8 *ls_name = 0; + + ls_name = format (0, "%s", mp->ls_name); + rv = vnet_lisp_nsh_set_locator_set (ls_name, mp->is_add); + vec_free (ls_name); + + REPLY_MACRO (VL_API_ONE_PITR_SET_LOCATOR_SET_REPLY); +} + static void vl_api_one_pitr_set_locator_set_t_handler (vl_api_one_pitr_set_locator_set_t * mp) @@ -781,7 +818,7 @@ send_one_eid_table_details (mapping_t * mapit, u8 *mac = 0; ip_prefix_t *ip_prefix = NULL; - if (mapit->pitr_set) + if (mapit->pitr_set || mapit->nsh_set) return; switch (filter) @@ -851,6 +888,13 @@ send_one_eid_table_details (mapping_t * mapit, rmp->eid_type = 2; /* l2 mac type */ clib_memcpy (rmp->eid, mac, 6); break; + case GID_ADDR_NSH: + rmp->eid_type = 3; /* NSH type */ + lisp_nsh_api_t nsh; + nsh.spi = clib_host_to_net_u32 (gid_address_nsh_spi (gid)); + nsh.si = gid_address_nsh_si (gid); + clib_memcpy (rmp->eid, &nsh, sizeof (nsh)); + break; default: ASSERT (0); } @@ -1071,6 +1115,7 @@ one_adjacency_copy (vl_api_one_adjacency_t * dst, lisp_adjacency_t * adjs) lisp_adjacency_t *adj; vl_api_one_adjacency_t a; u32 i, n = vec_len (adjs); + lisp_nsh_api_t nsh; for (i = 0; i < n; i++) { @@ -1100,6 +1145,15 @@ one_adjacency_copy (vl_api_one_adjacency_t * dst, lisp_adjacency_t * adjs) mac_copy (a.reid, gid_address_mac (&adj->reid)); mac_copy (a.leid, gid_address_mac (&adj->leid)); break; + case GID_ADDR_NSH: + a.eid_type = 3; /* NSH type */ + nsh.spi = clib_host_to_net_u32 (gid_address_nsh_spi (&adj->reid)); + nsh.si = gid_address_nsh_si (&adj->reid); + clib_memcpy (a.reid, &nsh, sizeof (nsh)); + + nsh.spi = clib_host_to_net_u32 (gid_address_nsh_spi (&adj->leid)); + nsh.si = gid_address_nsh_si (&adj->leid); + clib_memcpy (a.leid, &nsh, sizeof (nsh)); default: ASSERT (0); } @@ -1255,6 +1309,55 @@ static void vec_free (tmp_str); } +static void +vl_api_show_one_nsh_mapping_t_handler (vl_api_show_one_nsh_mapping_t * mp) +{ + unix_shared_memory_queue_t *q = NULL; + vl_api_show_one_nsh_mapping_reply_t *rmp = NULL; + lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); + mapping_t *m; + locator_set_t *ls = 0; + u8 *tmp_str = 0; + u8 is_set = 0; + int rv = 0; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + if (lcm->nsh_map_index == (u32) ~ 0) + { + tmp_str = format (0, "N/A"); + } + else + { + m = pool_elt_at_index (lcm->mapping_pool, lcm->nsh_map_index); + if (~0 != m->locator_set_index) + { + ls = + pool_elt_at_index (lcm->locator_set_pool, m->locator_set_index); + tmp_str = format (0, "%s", ls->name); + is_set = 1; + } + else + { + tmp_str = format (0, "N/A"); + } + } + vec_add1 (tmp_str, 0); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_SHOW_ONE_NSH_MAPPING_REPLY, + ({ + rmp->is_set = is_set; + strncpy((char *) rmp->locator_set_name, (char *) tmp_str, + ARRAY_LEN(rmp->locator_set_name) - 1); + })); + /* *INDENT-ON* */ +} + static void vl_api_show_one_pitr_t_handler (vl_api_show_one_pitr_t * mp) { diff --git a/src/vnet/lisp-cp/one_cli.c b/src/vnet/lisp-cp/one_cli.c index db6a6c83..e3fbf5a1 100644 --- a/src/vnet/lisp-cp/one_cli.c +++ b/src/vnet/lisp-cp/one_cli.c @@ -721,6 +721,62 @@ VLIB_CLI_COMMAND (one_show_map_resolvers_command) = { }; /* *INDENT-ON* */ +static clib_error_t * +lisp_nsh_set_locator_set_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 locator_name_set = 0; + u8 *locator_set_name = 0; + u8 is_add = 1; + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + int rv = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "ls %_%v%_", &locator_set_name)) + locator_name_set = 1; + else if (unformat (line_input, "disable")) + is_add = 0; + else + { + error = clib_error_return (0, "parse error"); + goto done; + } + } + + if (!locator_name_set) + { + clib_warning ("No locator set specified!"); + goto done; + } + + rv = vnet_lisp_nsh_set_locator_set (locator_set_name, is_add); + if (0 != rv) + { + error = clib_error_return (0, "failed to %s NSH mapping!", + is_add ? "add" : "delete"); + } + +done: + vec_free (locator_set_name); + unformat_free (line_input); + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (one_nsh_set_locator_set_command) = { + .path = "one nsh-mapping", + .short_help = "one nsh-mapping [del] ls ", + .function = lisp_nsh_set_locator_set_command_fn, +}; +/* *INDENT-ON* */ + static clib_error_t * lisp_pitr_set_locator_set_command_fn (vlib_main_t * vm, @@ -923,7 +979,7 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm, /* *INDENT-OFF* */ pool_foreach (mapit, lcm->mapping_pool, ({ - if (mapit->pitr_set) + if (mapit->pitr_set || mapit->nsh_set) continue; locator_set_t * ls = pool_elt_at_index (lcm->locator_set_pool, diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c index 94703abc..52b939f0 100644 --- a/src/vnet/lisp-gpe/interface.c +++ b/src/vnet/lisp-gpe/interface.c @@ -742,7 +742,7 @@ lisp_gpe_del_l2_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id) * @return sw_if_index. */ u32 -lisp_gpe_add_nsh_iface (lisp_gpe_main_t * lgm) +vnet_lisp_gpe_add_nsh_iface (lisp_gpe_main_t * lgm) { vnet_main_t *vnm = lgm->vnet_main; tunnel_lookup_t *nsh_ifaces = &lgm->nsh_ifaces; @@ -782,7 +782,7 @@ lisp_gpe_add_nsh_iface (lisp_gpe_main_t * lgm) * */ void -lisp_gpe_del_nsh_iface (lisp_gpe_main_t * lgm) +vnet_lisp_gpe_del_nsh_iface (lisp_gpe_main_t * lgm) { tunnel_lookup_t *nsh_ifaces = &lgm->nsh_ifaces; uword *hip; @@ -851,7 +851,7 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, { if (is_add) { - if (~0 == lisp_gpe_add_nsh_iface (&lisp_gpe_main)) + if (~0 == vnet_lisp_gpe_add_nsh_iface (&lisp_gpe_main)) { error = clib_error_return (0, "NSH interface not created"); goto done; @@ -859,7 +859,7 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, } else { - lisp_gpe_del_nsh_iface (&lisp_gpe_main); + vnet_lisp_gpe_del_nsh_iface (&lisp_gpe_main); } goto done; } diff --git a/src/vnet/lisp-gpe/lisp_gpe.h b/src/vnet/lisp-gpe/lisp_gpe.h index 660f8a66..5eafdd55 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.h +++ b/src/vnet/lisp-gpe/lisp_gpe.h @@ -324,6 +324,9 @@ vnet_api_error_t vnet_lisp_stats_enable_disable (u8 enable); lisp_api_stats_t *vnet_lisp_get_stats (void); int vnet_lisp_flush_stats (void); int vnet_gpe_add_del_native_fwd_rpath (vnet_gpe_native_fwd_rpath_args_t * a); +u32 vnet_lisp_gpe_add_nsh_iface (lisp_gpe_main_t * lgm); +void vnet_lisp_gpe_del_nsh_iface (lisp_gpe_main_t * lgm); + #endif /* included_vnet_lisp_gpe_h */ /* diff --git a/src/vpp/api/test_client.c b/src/vpp/api/test_client.c index 231b3c18..844b9702 100644 --- a/src/vpp/api/test_client.c +++ b/src/vpp/api/test_client.c @@ -245,6 +245,7 @@ static void vl_api_ip_neighbor_add_del_reply_t_handler fformat (stdout, "ip neighbor add del reply %d\n", ntohl (mp->retval)); } +#if 0 static void vl_api_vnet_interface_counters_t_handler (vl_api_vnet_interface_counters_t * mp) @@ -332,6 +333,7 @@ vl_api_vnet_interface_counters_t_handler (vl_api_vnet_interface_counters_t * } } } +#endif /* Format an IP4 address. */ u8 * @@ -578,7 +580,6 @@ _(WANT_STATS_REPLY, want_stats_reply) \ _(WANT_OAM_EVENTS_REPLY, want_oam_events_reply) \ _(OAM_EVENT, oam_event) \ _(OAM_ADD_DEL_REPLY, oam_add_del_reply) \ -_(VNET_INTERFACE_COUNTERS, vnet_interface_counters) \ _(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ _(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ _(IP_ADD_DEL_ROUTE_REPLY, ip_add_del_route_reply) \ -- cgit 1.2.3-korg From 04ffd0ad83b2d87edb669a9d76eee85f5c589564 Mon Sep 17 00:00:00 2001 From: Hongjun Ni Date: Fri, 23 Jun 2017 00:18:40 +0800 Subject: VPP crash on creating vxlan gpe interface. VPP-875 Change-Id: I6b19634ecb03860a7624d9408e09b52e95f47aef Signed-off-by: Hongjun Ni --- src/vat/api_format.c | 108 +++++++++++++++++++++++++++++++++---- src/vnet/vxlan-gpe/vxlan_gpe.api | 1 - src/vnet/vxlan-gpe/vxlan_gpe.c | 3 ++ src/vnet/vxlan-gpe/vxlan_gpe_api.c | 13 +++-- src/vpp/api/custom_dump.c | 12 ++++- 5 files changed, 118 insertions(+), 19 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 0a38353c..937d7c5d 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -1795,6 +1795,40 @@ static void vl_api_vxlan_add_del_tunnel_reply_t_handler_json vam->result_ready = 1; } +static void vl_api_vxlan_gpe_add_del_tunnel_reply_t_handler + (vl_api_vxlan_gpe_add_del_tunnel_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + vam->sw_if_index = ntohl (mp->sw_if_index); + vam->result_ready = 1; + } +} + +static void vl_api_vxlan_gpe_add_del_tunnel_reply_t_handler_json + (vl_api_vxlan_gpe_add_del_tunnel_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + vat_json_node_t node; + + vat_json_init_object (&node); + vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); + vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index)); + + vat_json_print (vam->ofp, &node); + vat_json_free (&node); + + vam->retval = ntohl (mp->retval); + vam->result_ready = 1; +} + static void vl_api_gre_add_del_tunnel_reply_t_handler (vl_api_gre_add_del_tunnel_reply_t * mp) { @@ -4480,6 +4514,7 @@ _(sw_interface_set_table_reply) \ _(sw_interface_set_mpls_enable_reply) \ _(sw_interface_set_vpath_reply) \ _(sw_interface_set_vxlan_bypass_reply) \ +_(sw_interface_set_vxlan_gpe_bypass_reply) \ _(sw_interface_set_l2_bridge_reply) \ _(bridge_domain_add_del_reply) \ _(sw_interface_set_l2_xconnect_reply) \ @@ -4578,7 +4613,6 @@ _(gpe_enable_disable_reply) \ _(gpe_set_encap_mode_reply) \ _(gpe_add_del_iface_reply) \ _(gpe_add_del_native_fwd_rpath_reply) \ -_(vxlan_gpe_add_del_tunnel_reply) \ _(af_packet_delete_reply) \ _(policer_classify_set_interface_reply) \ _(netmap_create_reply) \ @@ -4651,6 +4685,7 @@ _(SW_INTERFACE_SET_TABLE_REPLY, sw_interface_set_table_reply) \ _(SW_INTERFACE_SET_MPLS_ENABLE_REPLY, sw_interface_set_mpls_enable_reply) \ _(SW_INTERFACE_SET_VPATH_REPLY, sw_interface_set_vpath_reply) \ _(SW_INTERFACE_SET_VXLAN_BYPASS_REPLY, sw_interface_set_vxlan_bypass_reply) \ +_(SW_INTERFACE_SET_VXLAN_GPE_BYPASS_REPLY, sw_interface_set_vxlan_gpe_bypass_reply) \ _(SW_INTERFACE_SET_L2_XCONNECT_REPLY, \ sw_interface_set_l2_xconnect_reply) \ _(SW_INTERFACE_SET_L2_BRIDGE_REPLY, \ @@ -6039,6 +6074,7 @@ api_sw_interface_set_vxlan_bypass (vat_main_t * vam) return ret; } + static int api_sw_interface_set_l2_xconnect (vat_main_t * vam) { @@ -12002,6 +12038,8 @@ api_vxlan_gpe_add_del_tunnel (vat_main_t * vam) u8 ipv4_set = 0, ipv6_set = 0; u8 local_set = 0; u8 remote_set = 0; + u8 grp_set = 0; + u32 mcast_sw_if_index = ~0; u32 encap_vrf_id = 0; u32 decap_vrf_id = 0; u8 protocol = ~0; @@ -12009,6 +12047,12 @@ api_vxlan_gpe_add_del_tunnel (vat_main_t * vam) u8 vni_set = 0; int ret; + /* Can't "universally zero init" (={0}) due to GCC bug 53119 */ + memset (&local4, 0, sizeof local4); + memset (&remote4, 0, sizeof remote4); + memset (&local6, 0, sizeof local6); + memset (&remote6, 0, sizeof remote6); + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "del")) @@ -12037,6 +12081,35 @@ api_vxlan_gpe_add_del_tunnel (vat_main_t * vam) remote_set = 1; ipv6_set = 1; } + else if (unformat (line_input, "group %U %U", + unformat_ip4_address, &remote4, + api_unformat_sw_if_index, vam, &mcast_sw_if_index)) + { + grp_set = remote_set = 1; + ipv4_set = 1; + } + else if (unformat (line_input, "group %U", + unformat_ip4_address, &remote4)) + { + grp_set = remote_set = 1; + ipv4_set = 1; + } + else if (unformat (line_input, "group %U %U", + unformat_ip6_address, &remote6, + api_unformat_sw_if_index, vam, &mcast_sw_if_index)) + { + grp_set = remote_set = 1; + ipv6_set = 1; + } + else if (unformat (line_input, "group %U", + unformat_ip6_address, &remote6)) + { + grp_set = remote_set = 1; + ipv6_set = 1; + } + else + if (unformat (line_input, "mcast_sw_if_index %u", &mcast_sw_if_index)) + ; else if (unformat (line_input, "encap-vrf-id %d", &encap_vrf_id)) ; else if (unformat (line_input, "decap-vrf-id %d", &decap_vrf_id)) @@ -12068,6 +12141,11 @@ api_vxlan_gpe_add_del_tunnel (vat_main_t * vam) errmsg ("tunnel remote address not specified"); return -99; } + if (grp_set && mcast_sw_if_index == ~0) + { + errmsg ("tunnel nonexistent multicast device"); + return -99; + } if (ipv4_set && ipv6_set) { errmsg ("both IPv4 and IPv6 addresses specified"); @@ -12094,6 +12172,7 @@ api_vxlan_gpe_add_del_tunnel (vat_main_t * vam) clib_memcpy (&mp->remote, &remote4, sizeof (remote4)); } + mp->mcast_sw_if_index = ntohl (mcast_sw_if_index); mp->encap_vrf_id = ntohl (encap_vrf_id); mp->decap_vrf_id = ntohl (decap_vrf_id); mp->protocol = protocol; @@ -12110,16 +12189,19 @@ static void vl_api_vxlan_gpe_tunnel_details_t_handler (vl_api_vxlan_gpe_tunnel_details_t * mp) { vat_main_t *vam = &vat_main; + ip46_address_t local = to_ip46 (mp->is_ipv6, mp->local); + ip46_address_t remote = to_ip46 (mp->is_ipv6, mp->remote); - print (vam->ofp, "%11d%24U%24U%13d%12d%14d%14d", + print (vam->ofp, "%11d%24U%24U%13d%12d%19d%14d%14d", ntohl (mp->sw_if_index), - format_ip46_address, &(mp->local[0]), - format_ip46_address, &(mp->remote[0]), - ntohl (mp->vni), - ntohl (mp->protocol), + format_ip46_address, &local, IP46_TYPE_ANY, + format_ip46_address, &remote, IP46_TYPE_ANY, + ntohl (mp->vni), mp->protocol, + ntohl (mp->mcast_sw_if_index), ntohl (mp->encap_vrf_id), ntohl (mp->decap_vrf_id)); } + static void vl_api_vxlan_gpe_tunnel_details_t_handler_json (vl_api_vxlan_gpe_tunnel_details_t * mp) { @@ -12153,6 +12235,8 @@ static void vl_api_vxlan_gpe_tunnel_details_t_handler_json } vat_json_object_add_uint (node, "vni", ntohl (mp->vni)); vat_json_object_add_uint (node, "protocol", ntohl (mp->protocol)); + vat_json_object_add_uint (node, "mcast_sw_if_index", + ntohl (mp->mcast_sw_if_index)); vat_json_object_add_uint (node, "encap_vrf_id", ntohl (mp->encap_vrf_id)); vat_json_object_add_uint (node, "decap_vrf_id", ntohl (mp->decap_vrf_id)); vat_json_object_add_uint (node, "is_ipv6", mp->is_ipv6 ? 1 : 0); @@ -12184,9 +12268,9 @@ api_vxlan_gpe_tunnel_dump (vat_main_t * vam) if (!vam->json_output) { - print (vam->ofp, "%11s%24s%24s%13s%15s%14s%14s", + print (vam->ofp, "%11s%24s%24s%13s%15s%19s%14s%14s", "sw_if_index", "local", "remote", "vni", - "protocol", "encap_vrf_id", "decap_vrf_id"); + "protocol", "mcast_sw_if_index", "encap_vrf_id", "decap_vrf_id"); } /* Get list of vxlan-tunnel interfaces */ @@ -12204,6 +12288,7 @@ api_vxlan_gpe_tunnel_dump (vat_main_t * vam) return ret; } + u8 * format_l2_fib_mac_address (u8 * s, va_list * args) { @@ -19673,9 +19758,10 @@ _(delete_vhost_user_if, " | sw_if_index ") \ _(sw_interface_vhost_user_dump, "") \ _(show_version, "") \ _(vxlan_gpe_add_del_tunnel, \ - "local remote vni \n" \ - "[encap-vrf-id ] [decap-vrf-id ] [next-ip4][next-ip6]" \ - "[next-ethernet] [next-nsh]\n") \ + "local remote | group \n" \ + "{ | mcast_sw_if_index } }\n" \ + "vni [encap-vrf-id ] [decap-vrf-id ]\n" \ + "[next-ip4][next-ip6][next-ethernet] [next-nsh] [del]\n") \ _(vxlan_gpe_tunnel_dump, "[ | sw_if_index ]") \ _(l2_fib_table_dump, "bd_id ") \ _(interface_name_renumber, \ diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.api b/src/vnet/vxlan-gpe/vxlan_gpe.api index 41b10316..04082d69 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.api +++ b/src/vnet/vxlan-gpe/vxlan_gpe.api @@ -53,7 +53,6 @@ define vxlan_gpe_tunnel_details u32 mcast_sw_if_index; u32 encap_vrf_id; u32 decap_vrf_id; - u8 is_ipv6; }; diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c index 3a92c88f..fab887ad 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.c +++ b/src/vnet/vxlan-gpe/vxlan_gpe.c @@ -83,6 +83,9 @@ u8 * format_vxlan_gpe_tunnel (u8 * s, va_list * args) s = format (s, "next-protocol unknown %d", t->protocol); } + if (ip46_address_is_multicast (&t->remote)) + s = format (s, "mcast_sw_if_index %d ", t->mcast_sw_if_index); + s = format (s, " fibs: (encap %d, decap %d)", t->encap_fib_index, t->decap_fib_index); diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_api.c b/src/vnet/vxlan-gpe/vxlan_gpe_api.c index 3675fc55..8e268418 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe_api.c +++ b/src/vnet/vxlan-gpe/vxlan_gpe_api.c @@ -47,7 +47,7 @@ #define foreach_vpe_api_msg \ _(SW_INTERFACE_SET_VXLAN_GPE_BYPASS, sw_interface_set_vxlan_gpe_bypass) \ _(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \ -_(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \ +_(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) static void vl_api_sw_interface_set_vxlan_gpe_bypass_t_handler @@ -156,15 +156,15 @@ static void send_vxlan_gpe_tunnel_details rmp->_vl_msg_id = ntohs (VL_API_VXLAN_GPE_TUNNEL_DETAILS); if (is_ipv6) { - memcpy (rmp->local, &(t->local.ip6), 16); - memcpy (rmp->remote, &(t->remote.ip6), 16); + memcpy (rmp->local, &(t->local.ip6.as_u8), 16); + memcpy (rmp->remote, &(t->remote.ip6.as_u8), 16); rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id); rmp->decap_vrf_id = htonl (im6->fibs[t->decap_fib_index].ft_table_id); } else { - memcpy (rmp->local, &(t->local.ip4), 4); - memcpy (rmp->remote, &(t->remote.ip4), 4); + memcpy (rmp->local, &(t->local.ip4.as_u8), 4); + memcpy (rmp->remote, &(t->remote.ip4.as_u8), 4); rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id); rmp->decap_vrf_id = htonl (im4->fibs[t->decap_fib_index].ft_table_id); } @@ -250,6 +250,9 @@ vxlan_gpe_api_hookup (vlib_main_t * vm) foreach_vpe_api_msg; #undef _ + am->api_trace_cfg[VL_API_VXLAN_GPE_ADD_DEL_TUNNEL].size += + 17 * sizeof (u32); + /* * Set up the (msg_name, crc, message-id) table */ diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index dc9fee9a..3ac8874e 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -1703,10 +1703,18 @@ static void *vl_api_vxlan_gpe_add_del_tunnel_t_print s = format (0, "SCRIPT: vxlan_gpe_add_del_tunnel "); - s = format (s, "local %U ", format_ip46_address, &mp->local, mp->is_ipv6); + ip46_address_t local = to_ip46 (mp->is_ipv6, mp->local); + ip46_address_t remote = to_ip46 (mp->is_ipv6, mp->remote); - s = format (s, "remote %U ", format_ip46_address, &mp->remote, mp->is_ipv6); + u8 is_grp = ip46_address_is_multicast (&remote); + char *remote_name = is_grp ? "group" : "remote"; + s = format (s, "local %U ", format_ip46_address, &local, IP46_TYPE_ANY); + s = format (s, "%s %U ", remote_name, format_ip46_address, + &remote, IP46_TYPE_ANY); + + if (is_grp) + s = format (s, "mcast_sw_if_index %d ", ntohl (mp->mcast_sw_if_index)); s = format (s, "protocol %d ", ntohl (mp->protocol)); s = format (s, "vni %d ", ntohl (mp->vni)); -- cgit 1.2.3-korg From 99a0e60eb6f6acd7eabd5a4cb7ded1e0419ccd54 Mon Sep 17 00:00:00 2001 From: Steve Shin Date: Sat, 1 Jul 2017 04:16:20 +0000 Subject: Add API support for LLDP config/interface set Add API methods to configure LLDP and set interface to enable/disable. Also add port description TLV for LLDP. Change-Id: Ib959d488c2ab8a0069f143558871f41fcc43a5d3 Signed-off-by: Steve Shin --- src/vat/api_format.c | 95 ++++++++++++++++++++++++++++- src/vnet.am | 9 ++- src/vnet/lldp/lldp.api | 47 +++++++++++++++ src/vnet/lldp/lldp.h | 32 ++++++++++ src/vnet/lldp/lldp_api.c | 144 ++++++++++++++++++++++++++++++++++++++++++++ src/vnet/lldp/lldp_cli.c | 86 +++++++++++++++----------- src/vnet/lldp/lldp_doc.md | 10 +-- src/vnet/lldp/lldp_node.h | 3 + src/vnet/lldp/lldp_output.c | 20 +++++- src/vnet/vnet_all_api_h.h | 1 + src/vpp/api/vpe.api | 1 + 11 files changed, 401 insertions(+), 47 deletions(-) create mode 100644 src/vnet/lldp/lldp.api create mode 100644 src/vnet/lldp/lldp.h create mode 100644 src/vnet/lldp/lldp_api.c (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 937d7c5d..40eca8c5 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -4633,7 +4633,9 @@ _(feature_enable_disable_reply) \ _(sw_interface_tag_add_del_reply) \ _(sw_interface_set_mtu_reply) \ _(p2p_ethernet_add_reply) \ -_(p2p_ethernet_del_reply) +_(p2p_ethernet_del_reply) \ +_(lldp_config_reply) \ +_(sw_interface_set_lldp_reply) #define _(n) \ static void vl_api_##n##_t_handler \ @@ -4915,7 +4917,9 @@ _(SW_INTERFACE_SET_MTU_REPLY, sw_interface_set_mtu_reply) \ _(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) \ _(P2P_ETHERNET_ADD_REPLY, p2p_ethernet_add_reply) \ -_(P2P_ETHERNET_DEL_REPLY, p2p_ethernet_del_reply) +_(P2P_ETHERNET_DEL_REPLY, p2p_ethernet_del_reply) \ +_(LLDP_CONFIG_REPLY, lldp_config_reply) \ +_(SW_INTERFACE_SET_LLDP_REPLY, sw_interface_set_lldp_reply) #define foreach_standalone_reply_msg \ _(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ @@ -19232,6 +19236,88 @@ api_p2p_ethernet_del (vat_main_t * vam) return ret; } +static int +api_lldp_config (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_lldp_config_t *mp; + int tx_hold = 0; + int tx_interval = 0; + u8 *sys_name = NULL; + int ret; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "system-name %s", &sys_name)) + ; + else if (unformat (i, "tx-hold %d", &tx_hold)) + ; + else if (unformat (i, "tx-interval %d", &tx_interval)) + ; + else + { + clib_warning ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + vec_add1 (sys_name, 0); + + M (LLDP_CONFIG, mp); + mp->tx_hold = htonl (tx_hold); + mp->tx_interval = htonl (tx_interval); + clib_memcpy (mp->system_name, sys_name, vec_len (sys_name)); + vec_free (sys_name); + + S (mp); + W (ret); + return ret; +} + +static int +api_sw_interface_set_lldp (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_sw_interface_set_lldp_t *mp; + u32 sw_if_index = ~0; + u32 enable = 1; + u8 *port_desc = NULL; + int ret; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "disable")) + enable = 0; + else + if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) + ; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (i, "port-desc %s", &port_desc)) + ; + else + break; + } + + if (sw_if_index == ~0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + + /* Construct the API message */ + vec_add1 (port_desc, 0); + M (SW_INTERFACE_SET_LLDP, mp); + mp->sw_if_index = ntohl (sw_if_index); + mp->enable = enable; + clib_memcpy (mp->port_desc, port_desc, vec_len (port_desc)); + vec_free (port_desc); + + S (mp); + W (ret); + return ret; +} + static int q_or_quit (vat_main_t * vam) { @@ -19994,7 +20080,10 @@ _(sw_interface_set_mtu, " | sw_if_index mtu ") \ _(ip_neighbor_dump, "[ip6] | sw_if_index ") \ _(sw_interface_get_table, " | sw_if_index [ipv6]") \ _(p2p_ethernet_add, " | sw_if_index remote_mac ") \ -_(p2p_ethernet_del, " | sw_if_index remote_mac ") +_(p2p_ethernet_del, " | sw_if_index remote_mac ") \ +_(lldp_config, "system-name tx-hold tx-interval ") \ +_(sw_interface_set_lldp, \ + " | sw_if_index [port-desc ] [disable]") /* List of command functions, CLI names map directly to functions */ #define foreach_cli_function \ diff --git a/src/vnet.am b/src/vnet.am index b5ce6d5a..ebcf0a0a 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -269,10 +269,15 @@ libvnet_la_SOURCES += \ vnet/lldp/lldp_input.c \ vnet/lldp/lldp_node.c \ vnet/lldp/lldp_output.c \ - vnet/lldp/lldp_cli.c + vnet/lldp/lldp_cli.c \ + vnet/lldp/lldp_api.c nobase_include_HEADERS += \ - vnet/lldp/lldp_protocol.h + vnet/lldp/lldp_protocol.h \ + vnet/lldp/lldp.h \ + vnet/lldp/lldp.api.h + +API_FILES += vnet/lldp/lldp.api ######################################## # Layer 2/3 "classify" diff --git a/src/vnet/lldp/lldp.api b/src/vnet/lldp/lldp.api new file mode 100644 index 00000000..02fe32ca --- /dev/null +++ b/src/vnet/lldp/lldp.api @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief configure global parameter for LLDP + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param system_name - VPP system name + @param tx_hold - multiplier for tx_interval when setting time-to-live (TTL) + value in the LLDP packets + @param tx_interval - time interval, in seconds, between each LLDP frames +*/ +autoreply define lldp_config +{ + u32 client_index; + u32 context; + u8 system_name[256]; + u32 tx_hold; + u32 tx_interval; +}; + +/** \brief Interface set LLDP request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface for which to enable/disable LLDP + @param port_desc - local port description + @param enable - if non-zero enable, else disable +*/ +autoreply define sw_interface_set_lldp +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 port_desc[256]; + u8 enable; +}; diff --git a/src/vnet/lldp/lldp.h b/src/vnet/lldp/lldp.h new file mode 100644 index 00000000..473c2021 --- /dev/null +++ b/src/vnet/lldp/lldp.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief LLDP external definition + */ +#ifndef __included_lldp_h__ +#define __included_lldp_h__ + +typedef enum lldp_cfg_err +{ + lldp_ok, + lldp_not_supported, + lldp_invalid_arg, +} lldp_cfg_err_t; + +lldp_cfg_err_t lldp_cfg_intf_set (u32 hw_if_index, u8 ** port_desc, int enable); +lldp_cfg_err_t lldp_cfg_set (u8 ** host, int hold_time, int tx_interval); + +#endif /* __included_lldp_h__ */ diff --git a/src/vnet/lldp/lldp_api.c b/src/vnet/lldp/lldp_api.c new file mode 100644 index 00000000..bdada897 --- /dev/null +++ b/src/vnet/lldp/lldp_api.c @@ -0,0 +1,144 @@ +/* + *------------------------------------------------------------------ + * lldp_api.c - lldp api + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(LLDP_CONFIG, lldp_config) \ +_(SW_INTERFACE_SET_LLDP, sw_interface_set_lldp) + +static void +vl_api_lldp_config_t_handler (vl_api_lldp_config_t * mp) +{ + vl_api_lldp_config_reply_t *rmp; + int rv = 0; + u8 *sys_name = 0; + + vec_validate (sys_name, strlen ((char *) mp->system_name) - 1); + strncpy ((char *) sys_name, (char *) mp->system_name, vec_len (sys_name)); + + if (lldp_cfg_set (&sys_name, ntohl (mp->tx_hold), + ntohl (mp->tx_interval)) != lldp_ok) + { + vec_free (sys_name); + rv = VNET_API_ERROR_INVALID_VALUE; + } + + REPLY_MACRO (VL_API_LLDP_CONFIG_REPLY); +} + +static void +vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp) +{ + vl_api_sw_interface_set_lldp_reply_t *rmp; + int rv = 0; + u8 *port_desc = 0; + + vec_validate (port_desc, strlen ((char *) mp->port_desc) - 1); + strncpy ((char *) port_desc, (char *) mp->port_desc, vec_len (port_desc)); + + VALIDATE_SW_IF_INDEX (mp); + + if (lldp_cfg_intf_set (ntohl (mp->sw_if_index), &port_desc, + mp->enable) != lldp_ok) + { + vec_free (port_desc); + rv = VNET_API_ERROR_INVALID_VALUE; + } + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_LLDP_REPLY); +} + + +/* + * * lldp_api_hookup + * * Add vpe's API message handlers to the table. + * * vlib has alread mapped shared memory and + * * added the client registration handlers. + * * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + * */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_lldp; +#undef _ +} + +static clib_error_t * +lldp_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * * Set up the (msg_name, crc, message-id) table + * */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (lldp_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/lldp/lldp_cli.c b/src/vnet/lldp/lldp_cli.c index d6d84bfd..af18f90f 100644 --- a/src/vnet/lldp/lldp_cli.c +++ b/src/vnet/lldp/lldp_cli.c @@ -19,19 +19,13 @@ * */ #include +#include #include #ifndef ETHER_ADDR_LEN #include #endif -typedef enum lldp_cfg_err -{ - lldp_ok, - lldp_not_supported, - lldp_invalid_arg, -} lldp_cfg_err_t; - static clib_error_t * lldp_cfg_err_to_clib_err (lldp_cfg_err_t e) { @@ -48,8 +42,8 @@ lldp_cfg_err_to_clib_err (lldp_cfg_err_t e) return 0; } -static lldp_cfg_err_t -lldp_cfg_intf_set (u32 hw_if_index, int enable) +lldp_cfg_err_t +lldp_cfg_intf_set (u32 hw_if_index, u8 ** port_desc, int enable) { lldp_main_t *lm = &lldp_main; vnet_main_t *vnm = lm->vnet_main; @@ -68,9 +62,16 @@ lldp_cfg_intf_set (u32 hw_if_index, int enable) if (n) { /* already enabled */ - return 0; + return lldp_ok; } n = lldp_create_intf (lm, hw_if_index); + + if (port_desc && *port_desc) + { + n->port_desc = *port_desc; + *port_desc = NULL; + } + const vnet_sw_interface_t *sw = vnet_get_sw_interface (lm->vnet_main, hi->sw_if_index); if (sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) @@ -84,7 +85,7 @@ lldp_cfg_intf_set (u32 hw_if_index, int enable) lldp_delete_intf (lm, n); } - return 0; + return lldp_ok; } static clib_error_t * @@ -93,24 +94,33 @@ lldp_intf_cmd (vlib_main_t * vm, unformat_input_t * input, { lldp_main_t *lm = &lldp_main; vnet_main_t *vnm = lm->vnet_main; - u32 hw_if_index; - int enable = 0; + u32 sw_if_index = (u32) ~ 0; + int enable = 1; + u8 *port_desc = NULL; - if (unformat (input, "%U %U", unformat_vnet_hw_interface, vnm, &hw_if_index, - unformat_vlib_enable_disable, &enable)) - { - return - lldp_cfg_err_to_clib_err (lldp_cfg_intf_set (hw_if_index, enable)); - } - else + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + if (unformat (input, "sw_if_index %d", &sw_if_index)) + ; + if (unformat + (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else if (unformat (input, "disable")) + enable = 0; + else if (unformat (input, "port-desc %s", &port_desc)) + ; + else + break; } - return 0; + + if (sw_if_index == (u32) ~ 0) + return clib_error_return (0, "Interface name is invalid!"); + + return lldp_cfg_err_to_clib_err (lldp_cfg_intf_set (sw_if_index, + &port_desc, enable)); } -static lldp_cfg_err_t +lldp_cfg_err_t lldp_cfg_set (u8 ** host, int hold_time, int tx_interval) { lldp_main_t *lm = &lldp_main; @@ -208,7 +218,8 @@ out: /* *INDENT-OFF* */ VLIB_CLI_COMMAND(set_interface_lldp_cmd, static) = { .path = "set interface lldp", - .short_help = "set interface lldp (enable | disable) ", + .short_help = "set interface lldp | sw_if_index " + " [port-desc ] [disable]", .function = lldp_intf_cmd, }; @@ -499,23 +510,26 @@ format_lldp_intfs_detail (u8 * s, vlib_main_t * vm, const lldp_main_t * lm) else if (now < n->last_heard + n->ttl) { s = format(s, - "\nInterface name: %s\nInterface/peer state: " - "active\nPeer chassis ID: %U\nRemote port ID: %U\nLast " - "packet sent: %U\nLast packet received: %U\n", - hw->name, format_lldp_chassis_id, n->chassis_id_subtype, - n->chassis_id, vec_len(n->chassis_id), 1, + "\nInterface name: %s\nPort Desc: %s\nInterface/peer " + "state: active\nPeer chassis ID: %U\nRemote port ID:" + " %U\nLast packet sent: %U\nLast packet received: %U\n", + hw->name, n->port_desc, format_lldp_chassis_id, + n->chassis_id_subtype, n->chassis_id, + vec_len(n->chassis_id), 1, format_lldp_port_id, n->port_id_subtype, n->port_id, vec_len(n->port_id), 1, format_time_ago, n->last_sent, now, format_time_ago, n->last_heard, now); } else { - s = format(s, "\nInterface name: %s\nInterface/peer state: " - "inactive(timeout)\nLast known peer chassis ID: " - "%U\nLast known peer port ID: %U\nLast packet sent: " - "%U\nLast packet received: %U\n", - hw->name, format_lldp_chassis_id, n->chassis_id_subtype, - n->chassis_id, vec_len(n->chassis_id), 1, + s = format(s, + "\nInterface name: %s\nPort Desc: %s\nInterface/peer " + "state: inactive(timeout)\nLast known peer chassis ID:" + "%U\nLast known peer port ID: %U\nLast packet sent: " + "%U\nLast packet received: %U\n", + hw->name, n->port_desc, format_lldp_chassis_id, + n->chassis_id_subtype, n->chassis_id, + vec_len(n->chassis_id), 1, format_lldp_port_id, n->port_id_subtype, n->port_id, vec_len(n->port_id), 1, format_time_ago, n->last_sent, now, format_time_ago, n->last_heard, now); diff --git a/src/vnet/lldp/lldp_doc.md b/src/vnet/lldp/lldp_doc.md index bac480a5..717de898 100644 --- a/src/vnet/lldp/lldp_doc.md +++ b/src/vnet/lldp/lldp_doc.md @@ -27,10 +27,12 @@ tx-interval: time interval between sending out LLDP packets Per interface setting is done using the "set interface lldp" command -set interface lldp (enable | disable) +set interface lldp | if_index [port-desc ] [disable] interface: the name of the interface for which to enable/disable LLDP - +if_index: sw interface index can be used if interface name is not used. +port-desc: port description +disable: LLDP feature can be enabled or disabled per interface. ### Configuration example @@ -38,9 +40,9 @@ Configure system-name as "VPP" and transmit interval to 10 seconds: set lldp system-name VPP tx-interval 10 -Enable LLDP on interface TenGigabitEthernet5/0/1 +Enable LLDP on interface TenGigabitEthernet5/0/1 with port description -set interface lldp TenGigabitEthernet5/0/1 enable +set interface lldp TenGigabitEthernet5/0/1 port-desc vtf:eth0 ### Operational data diff --git a/src/vnet/lldp/lldp_node.h b/src/vnet/lldp/lldp_node.h index 477ca7dc..14a10e33 100644 --- a/src/vnet/lldp/lldp_node.h +++ b/src/vnet/lldp/lldp_node.h @@ -43,6 +43,9 @@ typedef struct lldp_intf lldp_port_id_subtype_t port_id_subtype; lldp_chassis_id_subtype_t chassis_id_subtype; + /* Local info */ + u8 *port_desc; + } lldp_intf_t; typedef struct diff --git a/src/vnet/lldp/lldp_output.c b/src/vnet/lldp/lldp_output.c index 6cb26270..950b79ae 100644 --- a/src/vnet/lldp/lldp_output.c +++ b/src/vnet/lldp/lldp_output.c @@ -74,6 +74,20 @@ lldp_add_ttl (const lldp_main_t * lm, u8 ** t0p, int shutdown) *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + len; } +static void +lldp_add_port_desc (const lldp_main_t * lm, lldp_intf_t * n, u8 ** t0p) +{ + const size_t len = vec_len (n->port_desc); + if (len) + { + lldp_tlv_t *t = (lldp_tlv_t *) * t0p; + lldp_tlv_set_code (t, LLDP_TLV_NAME (port_desc)); + lldp_tlv_set_length (t, len); + clib_memcpy (t->v, n->port_desc, len); + *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + len; + } +} + static void lldp_add_sys_name (const lldp_main_t * lm, u8 ** t0p) { @@ -99,11 +113,12 @@ lldp_add_pdu_end (u8 ** t0p) static void lldp_add_tlvs (lldp_main_t * lm, vnet_hw_interface_t * hw, u8 ** t0p, - int shutdown) + int shutdown, lldp_intf_t * n) { lldp_add_chassis_id (hw, t0p); lldp_add_port_id (hw, t0p); lldp_add_ttl (lm, t0p, shutdown); + lldp_add_port_desc (lm, n, t0p); lldp_add_sys_name (lm, t0p); lldp_add_pdu_end (t0p); } @@ -139,7 +154,7 @@ lldp_send_ethernet (lldp_main_t * lm, lldp_intf_t * n, int shutdown) t0 = data; /* add TLVs */ - lldp_add_tlvs (lm, hw, &t0, shutdown); + lldp_add_tlvs (lm, hw, &t0, shutdown, n); /* Set the outbound packet length */ b0 = vlib_get_buffer (vm, bi0); @@ -167,6 +182,7 @@ lldp_delete_intf (lldp_main_t * lm, lldp_intf_t * n) hash_unset (lm->intf_by_hw_if_index, n->hw_if_index); vec_free (n->chassis_id); vec_free (n->port_id); + vec_free (n->port_desc); pool_put (lm->intfs, n); } } diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index 5da2acb7..c1eff61f 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index d3c7e985..2a763d19 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -26,6 +26,7 @@ * IP APIs: see .../src/vnet/ip/{ip.api, ip_api.c} * TAP APIs: see .../src/vnet/unix/{tap.api, tap_api.c} * VXLAN APIs: see .../src/vnet/vxlan/{vxlan.api, vxlan_api.c} + * LLDP APIs: see .../src/vnet/lldp/{lldp.api, lldp_api.c} * AF-PACKET APIs: see ... /vnet/devices/af_packet/{af_packet.api, af_packet_api.c} * NETMAP APIs: see ... /src/vnet/devices/netmap/{netmap.api, netmap_api.c} * VHOST-USER APIs: see .../vnet/devices/virtio/{vhost_user.api, vhost_user_api.c} -- cgit 1.2.3-korg From 75e2f2ac39871554c05a9a240ac26a6028ee3e99 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Mon, 10 Jul 2017 10:12:13 +0300 Subject: API:fix arp/ND event messages - remove context context causes the message to be treated as a reply by the python API Change-Id: Icf4d051a69f5a2cb9be5879accfe030ebcd650a8 Signed-off-by: Eyal Bari --- src/vpp/api/api.c | 3 +-- src/vpp/api/vpe.api | 4 ---- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index c1dcfb03..de9a2477 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -1520,9 +1520,9 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) } memset (event, 0, sizeof (*event)); + /* Python API expects events to have no context */ event->_vl_msg_id = ntohs (VL_API_IP4_ARP_EVENT); event->client_index = mp->client_index; - event->context = mp->context; event->address = mp->address; event->pid = mp->pid; if (mp->address == 0) @@ -1568,7 +1568,6 @@ vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp) event->_vl_msg_id = ntohs (VL_API_IP6_ND_EVENT); event->client_index = mp->client_index; - event->context = mp->context; clib_memcpy (event->address, mp->address, 16); event->pid = mp->pid; if (ip6_address_is_zero ((ip6_address_t *) mp->address)) diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 2a763d19..ec5ffbe9 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -710,7 +710,6 @@ autoreply define want_ip4_arp_events /** \brief Tell client about an ip4 arp resolution event @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request @param address - the exact ip4 address of interest @param pid - client pid registered to receive notification @param sw_if_index - interface which received ARP packet @@ -720,7 +719,6 @@ autoreply define want_ip4_arp_events define ip4_arp_event { u32 client_index; - u32 context; u32 address; u32 pid; u32 sw_if_index; @@ -746,7 +744,6 @@ autoreply define want_ip6_nd_events /** \brief Tell client about an ip6 nd resolution or mac/ip event @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request @param pid - client pid registered to receive notification @param sw_if_index - interface which received ARP packet @param address - the exact ip6 address of interest @@ -756,7 +753,6 @@ autoreply define want_ip6_nd_events define ip6_nd_event { u32 client_index; - u32 context; u32 pid; u32 sw_if_index; u8 address[16]; -- cgit 1.2.3-korg From d756b35032cdf7fdaaf0d6611388a54d32d72e92 Mon Sep 17 00:00:00 2001 From: Dave Wallace Date: Mon, 3 Jul 2017 13:11:38 -0400 Subject: Fix unlinking of /dev/shm files. - api-segment prefix not used when unlinking shm files - unlink root region on exit if no clients referenced - stale reference to freed segment name - don't add fake client to /db unless CLIB_DEBUG > 2 - turn off the gmond plugin - clean up unused vars in vpp/api Change-Id: I66451fcfd6ee64a12466c2d6c209050e3cdb74b7 Signed-off-by: Dave Wallace Signed-off-by: Dave Barach --- Makefile | 9 +++++++-- build-data/platforms/vpp.mk | 2 +- src/svm/ssvm.c | 11 ++++++++++- src/svm/svm.c | 38 +++++++++++++++++++++++++++++++------- src/svm/svm_fifo_segment.c | 2 +- src/svm/svmdb.c | 15 ++++++++++----- src/vlib/buffer.h | 18 +++++++++--------- src/vpp.am | 6 +++--- src/vpp/api/api.c | 1 - src/vpp/api/api_main.c | 2 -- src/vpp/api/custom_dump.c | 8 +------- src/vpp/stats/stats.c | 1 - 12 files changed, 73 insertions(+), 40 deletions(-) (limited to 'src/vpp/api') diff --git a/Makefile b/Makefile index 0d21f335..46c51dd8 100644 --- a/Makefile +++ b/Makefile @@ -55,8 +55,10 @@ else ifeq ($(filter rhel centos fedora opensuse,$(OS_ID)),$(OS_ID)) PKG=rpm endif +# +libganglia1-dev if building the gmond plugin + DEB_DEPENDS = curl build-essential autoconf automake bison libssl-dev ccache -DEB_DEPENDS += debhelper dkms git libtool libganglia1-dev libapr1-dev dh-systemd +DEB_DEPENDS += debhelper dkms git libtool libapr1-dev dh-systemd DEB_DEPENDS += libconfuse-dev git-review exuberant-ctags cscope pkg-config DEB_DEPENDS += lcov chrpath autoconf nasm indent DEB_DEPENDS += python-all python-dev python-virtualenv python-pip libffi6 @@ -79,9 +81,12 @@ else RPM_DEPENDS += python-virtualenv RPM_DEPENDS_GROUPS = 'Development Tools' endif + +# +ganglia-devel if building the ganglia plugin + RPM_DEPENDS += chrpath libffi-devel rpm-build RPM_DEPENDS += https://kojipkgs.fedoraproject.org//packages/nasm/2.12.02/2.fc26/x86_64/nasm-2.12.02-2.fc26.x86_64.rpm -EPEL_DEPENDS = libconfuse-devel ganglia-devel epel-rpm-macros +EPEL_DEPENDS = libconfuse-devel epel-rpm-macros ifeq ($(filter rhel centos,$(OS_ID)),$(OS_ID)) EPEL_DEPENDS += lcov else diff --git a/build-data/platforms/vpp.mk b/build-data/platforms/vpp.mk index 4577fa2e..acbe0e7f 100644 --- a/build-data/platforms/vpp.mk +++ b/build-data/platforms/vpp.mk @@ -36,7 +36,7 @@ vpp_uses_dpdk = yes # Uncoment to enable building unit tests # vpp_enable_tests = yes -vpp_root_packages = vpp gmod +vpp_root_packages = vpp # DPDK configuration parameters # vpp_uses_dpdk_cryptodev_sw = yes diff --git a/src/svm/ssvm.c b/src/svm/ssvm.c index 6cda1f27..23e3cf44 100644 --- a/src/svm/ssvm.c +++ b/src/svm/ssvm.c @@ -29,6 +29,9 @@ ssvm_master_init (ssvm_private_t * ssvm, u32 master_index) if (ssvm->ssvm_size == 0) return SSVM_API_ERROR_NO_SIZE; + if (CLIB_DEBUG > 1) + clib_warning ("[%d] creating segment '%s'", getpid (), ssvm->name); + ssvm_filename = format (0, "/dev/shm/%s%c", ssvm->name, 0); unlink ((char *) ssvm_filename); @@ -176,12 +179,18 @@ ssvm_delete (ssvm_private_t * ssvm) fn = format (0, "/dev/shm/%s%c", ssvm->name, 0); + if (CLIB_DEBUG > 1) + clib_warning ("[%d] unlinking ssvm (%s) backing file '%s'", getpid (), + ssvm->name, fn); + /* Throw away the backing file */ if (unlink ((char *) fn) < 0) clib_unix_warning ("unlink segment '%s'", ssvm->name); - munmap ((void *) ssvm->requested_va, ssvm->ssvm_size); vec_free (fn); + vec_free (ssvm->name); + + munmap ((void *) ssvm->requested_va, ssvm->ssvm_size); } diff --git a/src/svm/svm.c b/src/svm/svm.c index c96135cf..600fa744 100644 --- a/src/svm/svm.c +++ b/src/svm/svm.c @@ -458,14 +458,15 @@ svm_map_region (svm_map_region_args_t * a) struct stat stat; struct timespec ts, tsrem; - if (CLIB_DEBUG > 1) - clib_warning ("[%d] map region %s", getpid (), a->name); - ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size); ASSERT (a->name); shm_name = shm_name_from_svm_map_region_args (a); + if (CLIB_DEBUG > 1) + clib_warning ("[%d] map region %s: shm_open (%s)", + getpid (), a->name, shm_name); + svm_fd = shm_open ((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (svm_fd >= 0) @@ -947,6 +948,29 @@ svm_region_find_or_create (svm_map_region_args_t * a) return (rp); } +void +svm_region_unlink (svm_region_t * rp) +{ + svm_map_region_args_t _a, *a = &_a; + svm_main_region_t *mp; + u8 *shm_name; + + ASSERT (root_rp); + ASSERT (rp); + ASSERT (vec_c_string_is_terminated (rp->region_name)); + + mp = root_rp->data_base; + ASSERT (mp); + + a->root_path = (char *) mp->root_path; + a->name = rp->region_name; + shm_name = shm_name_from_svm_map_region_args (a); + if (CLIB_DEBUG > 1) + clib_warning ("[%d] shm_unlink (%s)", getpid (), shm_name); + shm_unlink ((const char *) shm_name); + vec_free (shm_name); +} + /* * svm_region_unmap * @@ -1056,7 +1080,7 @@ found: vec_free (name); region_unlock (rp); - shm_unlink (rp->region_name); + svm_region_unlink (rp); munmap ((void *) virtual_base, virtual_size); region_unlock (root_rp); svm_pop_heap (oldheap); @@ -1071,9 +1095,6 @@ found: /* * svm_region_exit - * There is no clean way to unlink the - * root region when all clients go away, - * so remove the pid entry and call it a day. */ void svm_region_exit () @@ -1116,6 +1137,9 @@ svm_region_exit () found: + if (vec_len (root_rp->client_pids) == 0) + svm_region_unlink (root_rp); + region_unlock (root_rp); svm_pop_heap (oldheap); diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c index 69d4ecb9..c80374a7 100644 --- a/src/svm/svm_fifo_segment.c +++ b/src/svm/svm_fifo_segment.c @@ -105,7 +105,7 @@ svm_fifo_segment_create (svm_fifo_segment_create_args_t * a) s->ssvm.ssvm_size = a->segment_size; s->ssvm.i_am_master = 1; s->ssvm.my_pid = getpid (); - s->ssvm.name = (u8 *) a->segment_name; + s->ssvm.name = format (0, "%s", a->segment_name); s->ssvm.requested_va = sm->next_baseva; rv = ssvm_master_init (&s->ssvm, s - sm->segments); diff --git a/src/svm/svmdb.c b/src/svm/svmdb.c index 03dfe7c3..043b0924 100644 --- a/src/svm/svmdb.c +++ b/src/svm/svmdb.c @@ -106,11 +106,16 @@ svmdb_map (svmdb_map_args_t * dba) } /* Nope, it's our problem... */ - /* Add a bogus client (pid=0) so the svm won't be deallocated */ - oldheap = svm_push_pvt_heap (db_rp); - vec_add1 (client->db_rp->client_pids, 0); - svm_pop_heap (oldheap); - + if (CLIB_DEBUG > 2) + { + /* Add a bogus client (pid=0) so the svm won't be deallocated */ + clib_warning + ("[%d] adding fake client (pid=0) so '%s' won't be unlinked", + getpid (), db_rp->region_name); + oldheap = svm_push_pvt_heap (db_rp); + vec_add1 (client->db_rp->client_pids, 0); + svm_pop_heap (oldheap); + } oldheap = svm_push_data_heap (db_rp); vec_validate (hp, 0); diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index c810db4e..77528e77 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -87,17 +87,17 @@ typedef struct /* any change to the following line requres update of * vlib_buffer_get_free_list_index(...) and * vlib_buffer_set_free_list_index(...) functions */ -#define VLIB_BUFFER_FREE_LIST_INDEX_MASK ((1 << 4) - 1) +#define VLIB_BUFFER_FREE_LIST_INDEX_MASK ((1 << 5) - 1) -#define VLIB_BUFFER_IS_TRACED (1 << 4) -#define VLIB_BUFFER_LOG2_NEXT_PRESENT (5) +#define VLIB_BUFFER_IS_TRACED (1 << 5) +#define VLIB_BUFFER_LOG2_NEXT_PRESENT (6) #define VLIB_BUFFER_NEXT_PRESENT (1 << VLIB_BUFFER_LOG2_NEXT_PRESENT) -#define VLIB_BUFFER_IS_RECYCLED (1 << 6) -#define VLIB_BUFFER_TOTAL_LENGTH_VALID (1 << 7) -#define VLIB_BUFFER_REPL_FAIL (1 << 8) -#define VLIB_BUFFER_RECYCLE (1 << 9) -#define VLIB_BUFFER_FLOW_REPORT (1 << 10) -#define VLIB_BUFFER_EXT_HDR_VALID (1 << 11) +#define VLIB_BUFFER_IS_RECYCLED (1 << 7) +#define VLIB_BUFFER_TOTAL_LENGTH_VALID (1 << 8) +#define VLIB_BUFFER_REPL_FAIL (1 << 9) +#define VLIB_BUFFER_RECYCLE (1 << 10) +#define VLIB_BUFFER_FLOW_REPORT (1 << 11) +#define VLIB_BUFFER_EXT_HDR_VALID (1 << 12) /* User defined buffer flags. */ #define LOG2_VLIB_BUFFER_FLAG_USER(n) (32 - (n)) diff --git a/src/vpp.am b/src/vpp.am index 614bd26a..10a4e311 100644 --- a/src/vpp.am +++ b/src/vpp.am @@ -33,11 +33,11 @@ if WITH_APICLI vpp/api/plugin.h endif -# comment out to disable stats upload to gmond +# uncomment to enable stats upload to gmond +# bin_vpp_SOURCES += \ +# vpp/api/gmon.c bin_vpp_CFLAGS = @APICLI@ -bin_vpp_SOURCES += \ - vpp/api/gmon.c nobase_include_HEADERS += \ vpp/api/vpe_all_api_h.h \ diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index de9a2477..4e892431 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -686,7 +686,6 @@ static void BAD_SW_IF_INDEX_LABEL; -out: REPLY_MACRO (VL_API_PROXY_ARP_INTFC_ENABLE_DISABLE_REPLY); } diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c index ac09cd15..c355a5fd 100644 --- a/src/vpp/api/api_main.c +++ b/src/vpp/api/api_main.c @@ -232,8 +232,6 @@ unformat_sw_if_index (unformat_input_t * input, va_list * args) u32 *result = va_arg (*args, u32 *); vnet_main_t *vnm = vnet_get_main (); u32 sw_if_index = ~0; - u8 *if_name; - uword *p; if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) { diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 3ac8874e..7f3a58d9 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -1174,7 +1174,6 @@ static void *vl_api_sr_policy_mod_t_print (vl_api_sr_policy_mod_t * mp, void *handle) { u8 *s; - u32 weight; ip6_address_t *segments = 0, *seg; ip6_address_t *this_address = (ip6_address_t *) mp->segments; @@ -1216,8 +1215,6 @@ static void *vl_api_sr_policy_del_t_print u8 *s; s = format (0, "SCRIPT: sr_policy_del "); - u8 bsid_addr[16]; - u32 sr_policy_index; s = format (s, "To be delivered. Good luck."); FINISH; } @@ -2432,7 +2429,7 @@ static void *vl_api_lisp_add_del_remote_mapping_t_print (vl_api_lisp_add_del_remote_mapping_t * mp, void *handle) { u8 *s; - u32 i, rloc_num = 0; + u32 rloc_num = 0; s = format (0, "SCRIPT: lisp_add_del_remote_mapping "); @@ -2574,7 +2571,6 @@ static void *vl_api_lisp_add_del_locator_set_t_print (vl_api_lisp_add_del_locator_set_t * mp, void *handle) { u8 *s; - u32 loc_num = 0, i; s = format (0, "SCRIPT: lisp_add_del_locator_set "); @@ -2583,8 +2579,6 @@ static void *vl_api_lisp_add_del_locator_set_t_print s = format (s, "locator-set %s ", mp->locator_set_name); - loc_num = clib_net_to_host_u32 (mp->locator_num); - FINISH; } diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c index 38821da7..422b7b3b 100644 --- a/src/vpp/stats/stats.c +++ b/src/vpp/stats/stats.c @@ -577,7 +577,6 @@ do_ip4_fibs (stats_main_t * sm) ip4_route_t *r; fib_table_t *fib; ip4_fib_t *v4_fib; - ip_lookup_main_t *lm = &im4->lookup_main; static uword *results; vl_api_vnet_ip4_fib_counters_t *mp = 0; u32 items_this_message; -- cgit 1.2.3-korg From 001fd406df771f1cf73ca0dea440c8bde309e077 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Sun, 16 Jul 2017 09:34:53 +0300 Subject: SPAN:add l2 mirror added span feature nodes for l2-input / l2-output Change-Id: Ib6e0ce60d0811901b6edd70209e6a4c4a35cd8ff Signed-off-by: Eyal Bari --- src/vat/api_format.c | 6 +- src/vnet/l2/l2_input.h | 5 +- src/vnet/l2/l2_output.c | 13 +- src/vnet/l2/l2_output.h | 8 +- src/vnet/span/node.c | 198 ++++++++++++++++++-------- src/vnet/span/span.api | 1 + src/vnet/span/span.c | 148 +++++++++++--------- src/vnet/span/span.h | 25 +++- src/vnet/span/span_api.c | 15 +- src/vpp/api/custom_dump.c | 3 + test/test_span.py | 348 +++++++++++++++++++++++++++++++++++++++++----- test/vpp_papi_provider.py | 7 +- 12 files changed, 606 insertions(+), 171 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 40eca8c5..932e162d 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -18082,6 +18082,7 @@ api_sw_interface_span_enable_disable (vat_main_t * vam) u32 dst_sw_if_index = ~0; u8 state = 3; int ret; + u8 is_l2 = 0; while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { @@ -18104,6 +18105,8 @@ api_sw_interface_span_enable_disable (vat_main_t * vam) state = 2; else if (unformat (i, "both")) state = 3; + else if (unformat (i, "l2")) + is_l2 = 1; else break; } @@ -18113,6 +18116,7 @@ api_sw_interface_span_enable_disable (vat_main_t * vam) mp->sw_if_index_from = htonl (src_sw_if_index); mp->sw_if_index_to = htonl (dst_sw_if_index); mp->state = state; + mp->is_l2 = is_l2; S (mp); W (ret); @@ -20044,7 +20048,7 @@ _(set_ipfix_classify_stream, "[domain ] [src_port ]") \ _(ipfix_classify_stream_dump, "") \ _(ipfix_classify_table_add_del, "table ip4|ip6 [tcp|udp]") \ _(ipfix_classify_table_dump, "") \ -_(sw_interface_span_enable_disable, "[src | src_sw_if_index ] [disable | [[dst | dst_sw_if_index ] [both|rx|tx]]]") \ +_(sw_interface_span_enable_disable, "[l2] [src | src_sw_if_index ] [disable | [[dst | dst_sw_if_index ] [both|rx|tx]]]") \ _(sw_interface_span_dump, "") \ _(get_next_index, "node-name next-node-name ") \ _(pg_create_interface, "if_id ") \ diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h index 244ef445..e6b3bc7f 100644 --- a/src/vnet/l2/l2_input.h +++ b/src/vnet/l2/l2_input.h @@ -102,7 +102,7 @@ l2input_bd_config (u32 bd_index) /* L2 input features */ -/* Mappings from feature ID to graph node name */ +/* Mappings from feature ID to graph node name in reverse order */ #define foreach_l2input_feat \ _(DROP, "feature-bitmap-drop") \ _(XCONNECT, "l2-output") \ @@ -116,7 +116,8 @@ l2input_bd_config (u32 bd_index) _(VPATH, "vpath-input-l2") \ _(ACL, "l2-input-acl") \ _(POLICER_CLAS, "l2-policer-classify") \ - _(INPUT_CLASSIFY, "l2-input-classify") + _(INPUT_CLASSIFY, "l2-input-classify") \ + _(SPAN, "span-l2-input") /* Feature bitmap positions */ typedef enum diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c index b3537a35..fbee590c 100644 --- a/src/vnet/l2/l2_output.c +++ b/src/vnet/l2/l2_output.c @@ -195,8 +195,6 @@ l2output_vtr (vlib_node_runtime_t * node, l2_output_config_t * config, } -static vlib_node_registration_t l2output_node; - static_always_inline uword l2output_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, int do_trace) @@ -477,7 +475,7 @@ l2output_node_fn (vlib_main_t * vm, } /* *INDENT-OFF* */ -VLIB_REGISTER_NODE (l2output_node,static) = { +VLIB_REGISTER_NODE (l2output_node) = { .function = l2output_node_fn, .name = "l2-output", .vector_size = sizeof (u32), @@ -495,6 +493,8 @@ VLIB_REGISTER_NODE (l2output_node,static) = { [L2OUTPUT_NEXT_BAD_INTF] = "l2-output-bad-intf", }, }; + +VLIB_NODE_FUNCTION_MULTIARCH (l2output_node, l2output_node_fn); /* *INDENT-ON* */ @@ -601,11 +601,12 @@ VLIB_REGISTER_NODE (l2output_bad_intf_node,static) = { [0] = "error-drop", }, }; -/* *INDENT-ON* */ +VLIB_NODE_FUNCTION_MULTIARCH (l2output_bad_intf_node, l2output_bad_intf_node_fn); +/* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (l2output_node, l2output_node_fn) - clib_error_t *l2output_init (vlib_main_t * vm) +static clib_error_t * +l2output_init (vlib_main_t * vm) { l2output_main_t *mp = &l2output_main; diff --git a/src/vnet/l2/l2_output.h b/src/vnet/l2/l2_output.h index 6da3e303..a54b8d67 100644 --- a/src/vnet/l2/l2_output.h +++ b/src/vnet/l2/l2_output.h @@ -77,12 +77,14 @@ typedef struct l2output_main_t l2output_main; +extern vlib_node_registration_t l2output_node; + /* L2 output features */ -/* Mappings from feature ID to graph node name */ +/* Mappings from feature ID to graph node name in reverse order */ #define foreach_l2output_feat \ _(OUTPUT, "interface-output") \ - _(SPAN, "feature-bitmap-drop") \ + _(SPAN, "span-l2-output") \ _(CFM, "feature-bitmap-drop") \ _(QOS, "feature-bitmap-drop") \ _(ACL, "l2-output-acl") \ @@ -103,6 +105,8 @@ typedef enum L2OUTPUT_N_FEAT, } l2output_feat_t; +STATIC_ASSERT (L2OUTPUT_N_FEAT <= 32, "too many l2 output features"); + /* Feature bit masks */ typedef enum { diff --git a/src/vnet/span/node.c b/src/vnet/span/node.c index 3a461b0a..9d83d4ef 100644 --- a/src/vnet/span/node.c +++ b/src/vnet/span/node.c @@ -18,6 +18,9 @@ #include #include +#include +#include +#include #include #include @@ -59,21 +62,19 @@ static char *span_error_strings[] = { static_always_inline void span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0, - vlib_buffer_t * b0, vlib_frame_t ** mirror_frames, int is_rx) + vlib_buffer_t * b0, vlib_frame_t ** mirror_frames, + vlib_rx_or_tx_t rxtx, span_feat_t sf) { vlib_buffer_t *c0; span_main_t *sm = &span_main; vnet_main_t *vnm = &vnet_main; - span_interface_t *si0 = 0; u32 *to_mirror_next = 0; u32 i; - si0 = vec_elt_at_index (sm->interfaces, sw_if_index0); + span_interface_t *si0 = vec_elt_at_index (sm->interfaces, sw_if_index0); + span_mirror_t *sm0 = &si0->mirror_rxtx[sf][rxtx]; - if (is_rx != 0 && si0->num_rx_mirror_ports == 0) - return; - - if (is_rx == 0 && si0->num_tx_mirror_ports == 0) + if (sm0->num_mirror_ports == 0) return; /* Don't do it again */ @@ -81,10 +82,15 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0, return; /* *INDENT-OFF* */ - clib_bitmap_foreach (i, is_rx ? si0->rx_mirror_ports : si0->tx_mirror_ports, ( + clib_bitmap_foreach (i, sm0->mirror_ports, ( { if (mirror_frames[i] == 0) - mirror_frames[i] = vnet_get_frame_to_sw_interface (vnm, i); + { + if (sf == SPAN_FEAT_L2) + mirror_frames[i] = vlib_get_frame_to_node (vnm->vlib_main, l2output_node.index); + else + mirror_frames[i] = vnet_get_frame_to_sw_interface (vnm, i); + } to_mirror_next = vlib_frame_vector_args (mirror_frames[i]); to_mirror_next += mirror_frames[i]->n_vectors; /* This can fail */ @@ -93,6 +99,8 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0, { vnet_buffer (c0)->sw_if_index[VLIB_TX] = i; c0->flags |= VNET_BUFFER_F_SPAN_CLONE; + if (sf == SPAN_FEAT_L2) + vnet_buffer (c0)->l2.feature_bitmap = L2OUTPUT_FEAT_OUTPUT; to_mirror_next[0] = vlib_get_buffer_index (vm, c0); mirror_frames[i]->n_vectors++; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -108,7 +116,8 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0, static_always_inline uword span_node_inline_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, int is_rx) + vlib_frame_t * frame, vlib_rx_or_tx_t rxtx, + span_feat_t sf) { span_main_t *sm = &span_main; vnet_main_t *vnm = &vnet_main; @@ -117,7 +126,6 @@ span_node_inline_fn (vlib_main_t * vm, vlib_node_runtime_t * node, u32 next_index; u32 sw_if_index; static __thread vlib_frame_t **mirror_frames = 0; - vlib_rx_or_tx_t rxtx = is_rx ? VLIB_RX : VLIB_TX; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -156,11 +164,33 @@ span_node_inline_fn (vlib_main_t * vm, vlib_node_runtime_t * node, sw_if_index0 = vnet_buffer (b0)->sw_if_index[rxtx]; sw_if_index1 = vnet_buffer (b1)->sw_if_index[rxtx]; - span_mirror (vm, node, sw_if_index0, b0, mirror_frames, is_rx); - span_mirror (vm, node, sw_if_index1, b1, mirror_frames, is_rx); - - vnet_feature_next (sw_if_index0, &next0, b0); - vnet_feature_next (sw_if_index1, &next1, b1); + span_mirror (vm, node, sw_if_index0, b0, mirror_frames, rxtx, sf); + span_mirror (vm, node, sw_if_index1, b1, mirror_frames, rxtx, sf); + + switch (sf) + { + case SPAN_FEAT_L2: + if (rxtx == VLIB_RX) + { + next0 = vnet_l2_feature_next (b0, sm->l2_input_next, + L2INPUT_FEAT_SPAN); + next1 = vnet_l2_feature_next (b1, sm->l2_input_next, + L2INPUT_FEAT_SPAN); + } + else + { + next0 = vnet_l2_feature_next (b0, sm->l2_output_next, + L2OUTPUT_FEAT_SPAN); + next1 = vnet_l2_feature_next (b1, sm->l2_output_next, + L2OUTPUT_FEAT_SPAN); + } + break; + case SPAN_FEAT_DEVICE: + default: + vnet_feature_next (sw_if_index0, &next0, b0); + vnet_feature_next (sw_if_index1, &next1, b1); + break; + } /* verify speculative enqueue, maybe switch current next frame */ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, @@ -184,9 +214,23 @@ span_node_inline_fn (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); sw_if_index0 = vnet_buffer (b0)->sw_if_index[rxtx]; - span_mirror (vm, node, sw_if_index0, b0, mirror_frames, is_rx); - - vnet_feature_next (sw_if_index0, &next0, b0); + span_mirror (vm, node, sw_if_index0, b0, mirror_frames, rxtx, sf); + + switch (sf) + { + case SPAN_FEAT_L2: + if (rxtx == VLIB_RX) + next0 = vnet_l2_feature_next (b0, sm->l2_input_next, + L2INPUT_FEAT_SPAN); + else + next0 = vnet_l2_feature_next (b0, sm->l2_output_next, + L2OUTPUT_FEAT_SPAN); + break; + case SPAN_FEAT_DEVICE: + default: + vnet_feature_next (sw_if_index0, &next0, b0); + break; + } /* verify speculative enqueue, maybe switch current next frame */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, @@ -199,11 +243,14 @@ span_node_inline_fn (vlib_main_t * vm, vlib_node_runtime_t * node, for (sw_if_index = 0; sw_if_index < vec_len (mirror_frames); sw_if_index++) { - if (mirror_frames[sw_if_index] == 0) + vlib_frame_t *f = mirror_frames[sw_if_index]; + if (f == 0) continue; - vnet_put_frame_to_sw_interface (vnm, sw_if_index, - mirror_frames[sw_if_index]); + if (sf == SPAN_FEAT_L2) + vlib_put_frame_to_node (vnm->vlib_main, l2output_node.index, f); + else + vnet_put_frame_to_sw_interface (vnm, sw_if_index, f); mirror_frames[sw_if_index] = 0; } vlib_node_increment_counter (vm, span_node.index, SPAN_ERROR_HITS, @@ -213,62 +260,103 @@ span_node_inline_fn (vlib_main_t * vm, vlib_node_runtime_t * node, } static uword -span_input_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) +span_device_input_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) { - return span_node_inline_fn (vm, node, frame, 1); + return span_node_inline_fn (vm, node, frame, VLIB_RX, SPAN_FEAT_DEVICE); } static uword -span_output_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) +span_device_output_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) { - return span_node_inline_fn (vm, node, frame, 0); + return span_node_inline_fn (vm, node, frame, VLIB_TX, SPAN_FEAT_DEVICE); } -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (span_input_node) = { - .function = span_input_node_fn, - .name = "span-input", - .vector_size = sizeof (u32), - .format_trace = format_span_trace, - .type = VLIB_NODE_TYPE_INTERNAL, +static uword +span_l2_input_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return span_node_inline_fn (vm, node, frame, VLIB_RX, SPAN_FEAT_L2); +} - .n_errors = ARRAY_LEN(span_error_strings), - .error_strings = span_error_strings, +static uword +span_l2_output_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return span_node_inline_fn (vm, node, frame, VLIB_TX, SPAN_FEAT_L2); +} - .n_next_nodes = 0, +#define span_node_defs \ + .vector_size = sizeof (u32), \ + .format_trace = format_span_trace, \ + .type = VLIB_NODE_TYPE_INTERNAL, \ + .n_errors = ARRAY_LEN(span_error_strings), \ + .error_strings = span_error_strings, \ + .n_next_nodes = 0, \ + .next_nodes = { \ + [0] = "error-drop" \ + } - /* edit / add dispositions here */ - .next_nodes = { - [0] = "error-drop", - }, +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (span_input_node) = { + span_node_defs, + .function = span_device_input_node_fn, + .name = "span-input", }; -VLIB_NODE_FUNCTION_MULTIARCH (span_input_node, span_input_node_fn) +VLIB_NODE_FUNCTION_MULTIARCH (span_input_node, span_device_input_node_fn) VLIB_REGISTER_NODE (span_output_node) = { - .function = span_output_node_fn, + span_node_defs, + .function = span_device_output_node_fn, .name = "span-output", - .vector_size = sizeof (u32), - .format_trace = format_span_trace, - .type = VLIB_NODE_TYPE_INTERNAL, +}; - .n_errors = ARRAY_LEN(span_error_strings), - .error_strings = span_error_strings, +VLIB_NODE_FUNCTION_MULTIARCH (span_output_node, span_device_output_node_fn) - .n_next_nodes = 0, +VLIB_REGISTER_NODE (span_l2_input_node) = { + span_node_defs, + .function = span_l2_input_node_fn, + .name = "span-l2-input", +}; - /* edit / add dispositions here */ - .next_nodes = { - [0] = "error-drop", - }, +VLIB_NODE_FUNCTION_MULTIARCH (span_l2_input_node, span_l2_input_node_fn) + +VLIB_REGISTER_NODE (span_l2_output_node) = { + span_node_defs, + .function = span_l2_output_node_fn, + .name = "span-l2-output", }; -VLIB_NODE_FUNCTION_MULTIARCH (span_output_node, span_output_node_fn) +VLIB_NODE_FUNCTION_MULTIARCH (span_l2_output_node, span_l2_output_node_fn) + +clib_error_t *span_init (vlib_main_t * vm) +{ + span_main_t *sm = &span_main; + + sm->vlib_main = vm; + sm->vnet_main = vnet_get_main (); + + /* Initialize the feature next-node indexes */ + feat_bitmap_init_next_nodes (vm, + span_l2_input_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names (), + sm->l2_input_next); + + feat_bitmap_init_next_nodes (vm, + span_l2_output_node.index, + L2OUTPUT_N_FEAT, + l2output_get_feat_names (), + sm->l2_output_next); + return 0; +} +VLIB_INIT_FUNCTION (span_init); /* *INDENT-ON* */ +#undef span_node_defs /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/span/span.api b/src/vnet/span/span.api index 914fd8d0..2a762ac2 100644 --- a/src/vnet/span/span.api +++ b/src/vnet/span/span.api @@ -27,6 +27,7 @@ autoreply define sw_interface_span_enable_disable { u32 sw_if_index_from; u32 sw_if_index_to; u8 state; + u8 is_l2; }; /** \brief SPAN dump request diff --git a/src/vnet/span/span.c b/src/vnet/span/span.c index c5b43e34..6ecd1789 100644 --- a/src/vnet/span/span.c +++ b/src/vnet/span/span.c @@ -16,60 +16,81 @@ #include #include #include +#include +#include #include +typedef enum +{ + SPAN_DISABLE = 0, + SPAN_RX = 1, + SPAN_TX = 2, + SPAN_BOTH = SPAN_RX | SPAN_TX +} span_state_t; + +static_always_inline u32 +span_dst_set (span_mirror_t * sm, u32 dst_sw_if_index, int enable) +{ + sm->mirror_ports = + clib_bitmap_set (sm->mirror_ports, dst_sw_if_index, enable); + u32 last = sm->num_mirror_ports; + sm->num_mirror_ports = clib_bitmap_count_set_bits (sm->mirror_ports); + return last; +} + int span_add_delete_entry (vlib_main_t * vm, - u32 src_sw_if_index, u32 dst_sw_if_index, u8 state) + u32 src_sw_if_index, u32 dst_sw_if_index, u8 state, + span_feat_t sf) { span_main_t *sm = &span_main; - span_interface_t *si; - u32 new_num_rx_mirror_ports, new_num_tx_mirror_ports; - if (state > 3) + if (state > SPAN_BOTH) return VNET_API_ERROR_UNIMPLEMENTED; if ((src_sw_if_index == ~0) || (dst_sw_if_index == ~0 && state > 0) || (src_sw_if_index == dst_sw_if_index)) return VNET_API_ERROR_INVALID_INTERFACE; - vnet_sw_interface_t *sw_if; - - sw_if = vnet_get_sw_interface (vnet_get_main (), src_sw_if_index); - if (sw_if->type == VNET_SW_INTERFACE_TYPE_SUB) - return VNET_API_ERROR_UNIMPLEMENTED; - vec_validate_aligned (sm->interfaces, src_sw_if_index, CLIB_CACHE_LINE_BYTES); - si = vec_elt_at_index (sm->interfaces, src_sw_if_index); - - si->rx_mirror_ports = clib_bitmap_set (si->rx_mirror_ports, dst_sw_if_index, - (state & 1) != 0); - si->tx_mirror_ports = clib_bitmap_set (si->tx_mirror_ports, dst_sw_if_index, - (state & 2) != 0); - new_num_rx_mirror_ports = clib_bitmap_count_set_bits (si->rx_mirror_ports); - new_num_tx_mirror_ports = clib_bitmap_count_set_bits (si->tx_mirror_ports); + span_interface_t *si = vec_elt_at_index (sm->interfaces, src_sw_if_index); - if (new_num_rx_mirror_ports == 1 && si->num_rx_mirror_ports == 0) - vnet_feature_enable_disable ("device-input", "span-input", - src_sw_if_index, 1, 0, 0); + int rx = ! !(state & SPAN_RX); + int tx = ! !(state & SPAN_TX); - if (new_num_rx_mirror_ports == 0 && si->num_rx_mirror_ports == 1) - vnet_feature_enable_disable ("device-input", "span-input", - src_sw_if_index, 0, 0, 0); + span_mirror_t *rxm = &si->mirror_rxtx[sf][VLIB_RX]; + span_mirror_t *txm = &si->mirror_rxtx[sf][VLIB_TX]; - if (new_num_rx_mirror_ports == 1 && si->num_rx_mirror_ports == 0) - vnet_feature_enable_disable ("interface-output", "span-output", - src_sw_if_index, 1, 0, 0); + u32 last_rx_ports_count = span_dst_set (rxm, dst_sw_if_index, rx); + u32 last_tx_ports_count = span_dst_set (txm, dst_sw_if_index, tx); - if (new_num_rx_mirror_ports == 0 && si->num_rx_mirror_ports == 1) - vnet_feature_enable_disable ("interface-output", "span-output", - src_sw_if_index, 0, 0, 0); + int enable_rx = last_rx_ports_count == 0 && rxm->num_mirror_ports == 1; + int disable_rx = last_rx_ports_count == 1 && rxm->num_mirror_ports == 0; + int enable_tx = last_tx_ports_count == 0 && txm->num_mirror_ports == 1; + int disable_tx = last_tx_ports_count == 1 && txm->num_mirror_ports == 0; - si->num_rx_mirror_ports = new_num_rx_mirror_ports; - si->num_tx_mirror_ports = new_num_tx_mirror_ports; + switch (sf) + { + case SPAN_FEAT_DEVICE: + if (enable_rx || disable_rx) + vnet_feature_enable_disable ("device-input", "span-input", + src_sw_if_index, rx, 0, 0); + if (enable_tx || disable_tx) + vnet_feature_enable_disable ("interface-output", "span-output", + src_sw_if_index, tx, 0, 0); + break; + case SPAN_FEAT_L2: + if (enable_rx || disable_rx) + l2input_intf_bitmap_enable (src_sw_if_index, L2INPUT_FEAT_SPAN, rx); + if (enable_tx || disable_tx) + l2output_intf_bitmap_enable (src_sw_if_index, L2OUTPUT_FEAT_SPAN, tx); + break; + default: + return VNET_API_ERROR_UNIMPLEMENTED; + } if (dst_sw_if_index > sm->max_sw_if_index) sm->max_sw_if_index = dst_sw_if_index; @@ -85,7 +106,8 @@ set_interface_span_command_fn (vlib_main_t * vm, span_main_t *sm = &span_main; u32 src_sw_if_index = ~0; u32 dst_sw_if_index = ~0; - u8 state = 3; + u8 state = SPAN_BOTH; + span_feat_t sf = SPAN_FEAT_DEVICE; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -96,19 +118,21 @@ set_interface_span_command_fn (vlib_main_t * vm, sm->vnet_main, &dst_sw_if_index)) ; else if (unformat (input, "disable")) - state = 0; + state = SPAN_DISABLE; else if (unformat (input, "rx")) - state = 1; + state = SPAN_RX; else if (unformat (input, "tx")) - state = 2; + state = SPAN_TX; else if (unformat (input, "both")) - state = 3; + state = SPAN_BOTH; + else if (unformat (input, "l2")) + sf = SPAN_FEAT_L2; else break; } int rv = - span_add_delete_entry (vm, src_sw_if_index, dst_sw_if_index, state); + span_add_delete_entry (vm, src_sw_if_index, dst_sw_if_index, state, sf); if (rv == VNET_API_ERROR_INVALID_INTERFACE) return clib_error_return (0, "Invalid interface"); return 0; @@ -117,7 +141,7 @@ set_interface_span_command_fn (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_interface_span_command, static) = { .path = "set interface span", - .short_help = "set interface span [disable | destination [both|rx|tx]]", + .short_help = "set interface span [l2] {disable | destination [both|rx|tx]}", .function = set_interface_span_command_fn, }; /* *INDENT-ON* */ @@ -136,31 +160,44 @@ show_interfaces_span_command_fn (vlib_main_t * vm, /* *INDENT-OFF* */ vec_foreach (si, sm->interfaces) - if (si->num_rx_mirror_ports || si->num_tx_mirror_ports) + { + span_mirror_t * drxm = &si->mirror_rxtx[SPAN_FEAT_DEVICE][VLIB_RX]; + span_mirror_t * dtxm = &si->mirror_rxtx[SPAN_FEAT_DEVICE][VLIB_TX]; + + span_mirror_t * lrxm = &si->mirror_rxtx[SPAN_FEAT_L2][VLIB_RX]; + span_mirror_t * ltxm = &si->mirror_rxtx[SPAN_FEAT_L2][VLIB_TX]; + + if (drxm->num_mirror_ports || dtxm->num_mirror_ports || + lrxm->num_mirror_ports || ltxm->num_mirror_ports) { - clib_bitmap_t *b; u32 i; - b = clib_bitmap_dup_or (si->rx_mirror_ports, si->tx_mirror_ports); + clib_bitmap_t *d = clib_bitmap_dup_or (drxm->mirror_ports, dtxm->mirror_ports); + clib_bitmap_t *l = clib_bitmap_dup_or (lrxm->mirror_ports, ltxm->mirror_ports); + clib_bitmap_t *b = clib_bitmap_dup_or (d, l); if (header) { - vlib_cli_output (vm, "%-40s %s", "Source interface", - "Mirror interface (direction)"); + vlib_cli_output (vm, "%-20s %-20s %6s %6s", "Source", "Destination", + "Device", "L2"); header = 0; } s = format (s, "%U", format_vnet_sw_if_index_name, vnm, si - sm->interfaces); clib_bitmap_foreach (i, b, ( { - int state; - state = (clib_bitmap_get (si->rx_mirror_ports, i) + - clib_bitmap_get (si->tx_mirror_ports, i) * 2); + int device = (clib_bitmap_get (drxm->mirror_ports, i) + + clib_bitmap_get (dtxm->mirror_ports, i) * 2); + int l2 = (clib_bitmap_get (lrxm->mirror_ports, i) + + clib_bitmap_get (ltxm->mirror_ports, i) * 2); - vlib_cli_output (vm, "%-40v %U (%s)", s, + vlib_cli_output (vm, "%-20v %-20U (%6s) (%6s)", s, format_vnet_sw_if_index_name, vnm, i, - states[state]); + states[device], states[l2]); vec_reset_length (s); })); clib_bitmap_free (b); + clib_bitmap_free (l); + clib_bitmap_free (d); + } } /* *INDENT-ON* */ vec_free (s); @@ -175,19 +212,6 @@ VLIB_CLI_COMMAND (show_interfaces_span_command, static) = { }; /* *INDENT-ON* */ -static clib_error_t * -span_init (vlib_main_t * vm) -{ - span_main_t *sm = &span_main; - - sm->vlib_main = vm; - sm->vnet_main = vnet_get_main (); - - return 0; -} - -VLIB_INIT_FUNCTION (span_init); - /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/span/span.h b/src/vnet/span/span.h index a98b010b..10de8272 100644 --- a/src/vnet/span/span.h +++ b/src/vnet/span/span.h @@ -18,17 +18,32 @@ #include #include +#include + +typedef enum +{ + SPAN_FEAT_DEVICE, + SPAN_FEAT_L2, + SPAN_FEAT_N +} span_feat_t; + +typedef struct +{ + clib_bitmap_t *mirror_ports; + u32 num_mirror_ports; +} span_mirror_t; typedef struct { - clib_bitmap_t *rx_mirror_ports; - clib_bitmap_t *tx_mirror_ports; - u32 num_rx_mirror_ports; - u32 num_tx_mirror_ports; + span_mirror_t mirror_rxtx[SPAN_FEAT_N][VLIB_N_RX_TX]; } span_interface_t; typedef struct { + /* l2 feature Next nodes */ + u32 l2_input_next[32]; + u32 l2_output_next[32]; + /* per-interface vector of span instances */ span_interface_t *interfaces; @@ -52,7 +67,7 @@ typedef struct int span_add_delete_entry (vlib_main_t * vm, u32 src_sw_if_index, - u32 dst_sw_if_index, u8 is_add); + u32 dst_sw_if_index, u8 state, span_feat_t sf); /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/span/span_api.c b/src/vnet/span/span_api.c index b4565663..69fa8e97 100644 --- a/src/vnet/span/span_api.c +++ b/src/vnet/span/span_api.c @@ -56,7 +56,8 @@ static void vlib_main_t *vm = vlib_get_main (); rv = span_add_delete_entry (vm, ntohl (mp->sw_if_index_from), - ntohl (mp->sw_if_index_to), mp->state); + ntohl (mp->sw_if_index_to), mp->state, + mp->is_l2 ? SPAN_FEAT_L2 : SPAN_FEAT_DEVICE); REPLY_MACRO (VL_API_SW_INTERFACE_SPAN_ENABLE_DISABLE_REPLY); } @@ -76,11 +77,14 @@ vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp) /* *INDENT-OFF* */ vec_foreach (si, sm->interfaces) - if (si->num_rx_mirror_ports || si->num_tx_mirror_ports) + { + span_mirror_t * drxm = &si->mirror_rxtx[SPAN_FEAT_DEVICE][VLIB_RX]; + span_mirror_t * dtxm = &si->mirror_rxtx[SPAN_FEAT_DEVICE][VLIB_TX]; + if (drxm->num_mirror_ports || dtxm->num_mirror_ports) { clib_bitmap_t *b; u32 i; - b = clib_bitmap_dup_or (si->rx_mirror_ports, si->tx_mirror_ports); + b = clib_bitmap_dup_or (drxm->mirror_ports, dtxm->mirror_ports); clib_bitmap_foreach (i, b, ( { rmp = vl_msg_api_alloc (sizeof (*rmp)); @@ -90,13 +94,14 @@ vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp) rmp->sw_if_index_from = htonl (si - sm->interfaces); rmp->sw_if_index_to = htonl (i); - rmp->state = (u8) (clib_bitmap_get (si->rx_mirror_ports, i) + - clib_bitmap_get (si->tx_mirror_ports, i) * 2); + rmp->state = (u8) (clib_bitmap_get (drxm->mirror_ports, i) + + clib_bitmap_get (dtxm->mirror_ports, i) * 2); vl_msg_api_send_shmem (q, (u8 *) & rmp); })); clib_bitmap_free (b); } + } /* *INDENT-ON* */ } diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 7f3a58d9..55a362a3 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -2227,6 +2227,9 @@ static void *vl_api_sw_interface_span_enable_disable_t_print s = format (s, "src_sw_if_index %u ", ntohl (mp->sw_if_index_from)); s = format (s, "dst_sw_if_index %u ", ntohl (mp->sw_if_index_to)); + if (mp->is_l2) + s = format (s, "l2 "); + switch (mp->state) { case 0: diff --git a/test/test_span.py b/test/test_span.py index d8b65252..f2529e8f 100644 --- a/test/test_span.py +++ b/test/test_span.py @@ -3,63 +3,121 @@ import unittest from scapy.packet import Raw -from scapy.layers.l2 import Ether +from scapy.layers.l2 import Ether, Dot1Q, GRE from scapy.layers.inet import IP, UDP +from scapy.layers.vxlan import VXLAN from framework import VppTestCase, VppTestRunner from util import Host, ppp +from vpp_sub_interface import VppDot1QSubint, VppDot1ADSubint +from vpp_gre_interface import VppGreInterface, VppGre6Interface +from vpp_papi_provider import L2_VTR_OP +from collections import namedtuple + +Tag = namedtuple('Tag', ['dot1', 'vlan']) +DOT1AD = 0x88A8 +DOT1Q = 0x8100 class TestSpan(VppTestCase): """ SPAN Test Case """ - # Test variables - hosts_nr = 10 # Number of hosts - pkts_per_burst = 257 # Number of packets per burst - @classmethod def setUpClass(cls): super(TestSpan, cls).setUpClass() - - def setUp(self): - super(TestSpan, self).setUp() - + # Test variables + cls.hosts_nr = 10 # Number of hosts + cls.pkts_per_burst = 257 # Number of packets per burst # create 3 pg interfaces - self.create_pg_interfaces(range(3)) + cls.create_pg_interfaces(range(3)) + cls.bd_id = 55 + cls.sub_if = VppDot1QSubint(cls, cls.pg0, 100) + cls.dst_sub_if = VppDot1QSubint(cls, cls.pg2, 300) + cls.dst_sub_if.set_vtr(L2_VTR_OP.L2_POP_1, tag=300) # packet flows mapping pg0 -> pg1, pg2 -> pg3, etc. - self.flows = dict() - self.flows[self.pg0] = [self.pg1] + cls.flows = dict() + cls.flows[cls.pg0] = [cls.pg1] # packet sizes - self.pg_if_packet_sizes = [64, 512] # , 1518, 9018] + cls.pg_if_packet_sizes = [64, 512] # , 1518, 9018] - self.interfaces = list(self.pg_interfaces) + cls.interfaces = list(cls.pg_interfaces) # Create host MAC and IPv4 lists - # self.MY_MACS = dict() - # self.MY_IP4S = dict() - self.create_host_lists(TestSpan.hosts_nr) - - # Create bi-directional cross-connects between pg0 and pg1 - self.vapi.sw_interface_set_l2_xconnect( - self.pg0.sw_if_index, self.pg1.sw_if_index, enable=1) - self.vapi.sw_interface_set_l2_xconnect( - self.pg1.sw_if_index, self.pg0.sw_if_index, enable=1) + # cls.MY_MACS = dict() + # cls.MY_IP4S = dict() + cls.create_host_lists(cls.hosts_nr) # setup all interfaces - for i in self.interfaces: + for i in cls.interfaces: i.admin_up() i.config_ip4() i.resolve_arp() - # Enable SPAN on pg0 (mirrored to pg2) - self.vapi.sw_interface_span_enable_disable( - self.pg0.sw_if_index, self.pg2.sw_if_index) + cls.vxlan = cls.vapi.vxlan_add_del_tunnel( + src_addr=cls.pg2.local_ip4n, + dst_addr=cls.pg2.remote_ip4n, + vni=1111, + is_add=1) + + def setUp(self): + super(TestSpan, self).setUp() + self.reset_packet_infos() def tearDown(self): super(TestSpan, self).tearDown() + if not self.vpp_dead: + self.logger.info(self.vapi.ppcli("show interface span")) + + def xconnect(self, a, b, is_add=1): + self.vapi.sw_interface_set_l2_xconnect(a, b, enable=is_add) + self.vapi.sw_interface_set_l2_xconnect(b, a, enable=is_add) + + def bridge(self, sw_if_index, is_add=1): + self.vapi.sw_interface_set_l2_bridge( + sw_if_index, bd_id=self.bd_id, enable=is_add) + + def _remove_tag(self, packet, vlan, tag_type): + self.assertEqual(packet.type, tag_type) + payload = packet.payload + self.assertEqual(payload.vlan, vlan) + inner_type = payload.type + payload = payload.payload + packet.remove_payload() + packet.add_payload(payload) + packet.type = inner_type + + def remove_tags(self, packet, tags): + for t in tags: + self._remove_tag(packet, t.vlan, t.dot1) + return packet + def decap_gre(self, pkt): + """ + Decapsulate the original payload frame by removing GRE header + """ + self.assertEqual(pkt[Ether].src, self.pg2.local_mac) + self.assertEqual(pkt[Ether].dst, self.pg2.remote_mac) + + self.assertEqual(pkt[IP].src, self.pg2.local_ip4) + self.assertEqual(pkt[IP].dst, self.pg2.remote_ip4) + + return pkt[GRE].payload + + def decap_vxlan(self, pkt): + """ + Decapsulate the original payload frame by removing VXLAN header + """ + self.assertEqual(pkt[Ether].src, self.pg2.local_mac) + self.assertEqual(pkt[Ether].dst, self.pg2.remote_mac) + + self.assertEqual(pkt[IP].src, self.pg2.local_ip4) + self.assertEqual(pkt[IP].dst, self.pg2.remote_ip4) + + return pkt[VXLAN].payload + + @classmethod def create_host_lists(self, count): """ Method to create required number of MAC and IPv4 addresses. Create required number of host MAC addresses and distribute them among @@ -81,9 +139,9 @@ class TestSpan(VppTestCase): "172.17.1%02x.%u" % (pg_if.sw_if_index, j)) hosts.append(host) - def create_stream(self, src_if, packet_sizes): + def create_stream(self, src_if, packet_sizes, do_dot1=False): pkts = [] - for i in range(0, TestSpan.pkts_per_burst): + for i in range(0, self.pkts_per_burst): dst_if = self.flows[src_if][0] pkt_info = self.create_packet_info(src_if, dst_if) payload = self.info_to_payload(pkt_info) @@ -91,6 +149,8 @@ class TestSpan(VppTestCase): IP(src=src_if.remote_ip4, dst=dst_if.remote_ip4) / UDP(sport=1234, dport=1234) / Raw(payload)) + if do_dot1: + p = self.sub_if.add_dot1_layer(p) pkt_info.data = p.copy() size = packet_sizes[(i / 2) % len(packet_sizes)] self.extend_packet(p, size) @@ -161,8 +221,8 @@ class TestSpan(VppTestCase): "Port %u: Packet expected from source %u didn't" " arrive" % (dst_sw_if_index, i.sw_if_index)) - def test_span(self): - """ SPAN test + def test_device_span(self): + """ SPAN device rx mirror test Test scenario: 1. config @@ -173,10 +233,17 @@ class TestSpan(VppTestCase): burst of packets per interface """ + # Create bi-directional cross-connects between pg0 and pg1 + self.xconnect(self.pg0.sw_if_index, self.pg1.sw_if_index) # Create incoming packet streams for packet-generator interfaces pkts = self.create_stream(self.pg0, self.pg_if_packet_sizes) self.pg0.add_stream(pkts) + # Enable SPAN on pg0 (mirrored to pg2) + self.vapi.sw_interface_span_enable_disable( + self.pg0.sw_if_index, self.pg2.sw_if_index) + + self.logger.info(self.vapi.ppcli("show interface span")) # Enable packet capturing and start packet sending self.pg_enable_capture(self.pg_interfaces) self.pg_start() @@ -190,6 +257,225 @@ class TestSpan(VppTestCase): self.pg1.get_capture(), self.pg2.get_capture(pg2_expected)) + # Disable SPAN on pg0 (mirrored to pg2) + self.vapi.sw_interface_span_enable_disable( + self.pg0.sw_if_index, self.pg2.sw_if_index, state=0) + self.xconnect(self.pg0.sw_if_index, self.pg1.sw_if_index, is_add=0) + + def test_span_l2_rx(self): + """ SPAN l2 rx mirror test """ + + self.sub_if.admin_up() + + self.bridge(self.pg2.sw_if_index) + # Create bi-directional cross-connects between pg0 and pg1 + self.xconnect(self.sub_if.sw_if_index, self.pg1.sw_if_index) + # Create incoming packet streams for packet-generator interfaces + pkts = self.create_stream( + self.pg0, self.pg_if_packet_sizes, do_dot1=True) + self.pg0.add_stream(pkts) + + # Enable SPAN on pg0 (mirrored to pg2) + self.vapi.sw_interface_span_enable_disable( + self.sub_if.sw_if_index, self.pg2.sw_if_index, is_l2=1) + + self.logger.info(self.vapi.ppcli("show interface span")) + # Enable packet capturing and start packet sending + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # Verify packets outgoing packet streams on mirrored interface (pg2) + self.logger.info("Verifying capture on interfaces %s and %s" % + (self.pg1.name, self.pg2.name)) + pg2_expected = self.get_packet_count_for_if_idx(self.pg1.sw_if_index) + pg1_pkts = self.pg1.get_capture() + pg2_pkts = self.pg2.get_capture(pg2_expected) + self.verify_capture( + self.pg1, + pg1_pkts, + pg2_pkts) + + self.bridge(self.pg2.sw_if_index, is_add=0) + # Disable SPAN on pg0 (mirrored to pg2) + self.vapi.sw_interface_span_enable_disable( + self.sub_if.sw_if_index, self.pg2.sw_if_index, state=0, is_l2=1) + self.xconnect(self.sub_if.sw_if_index, self.pg1.sw_if_index, is_add=0) + + def test_span_l2_rx_dst_vxlan(self): + """ SPAN l2 rx mirror into vxlan test """ + + self.sub_if.admin_up() + self.vapi.sw_interface_set_flags(self.vxlan.sw_if_index, + admin_up_down=1) + + self.bridge(self.vxlan.sw_if_index, is_add=1) + # Create bi-directional cross-connects between pg0 and pg1 + self.xconnect(self.sub_if.sw_if_index, self.pg1.sw_if_index) + # Create incoming packet streams for packet-generator interfaces + pkts = self.create_stream( + self.pg0, self.pg_if_packet_sizes, do_dot1=True) + self.pg0.add_stream(pkts) + + # Enable SPAN on pg0 sub if (mirrored to vxlan) + self.vapi.sw_interface_span_enable_disable( + self.sub_if.sw_if_index, self.vxlan.sw_if_index, is_l2=1) + + self.logger.info(self.vapi.ppcli("show interface span")) + # Enable packet capturing and start packet sending + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # Verify packets outgoing packet streams on mirrored interface (pg2) + self.logger.info("Verifying capture on interfaces %s and %s" % + (self.pg1.name, self.pg2.name)) + pg2_expected = self.get_packet_count_for_if_idx(self.pg1.sw_if_index) + pg1_pkts = self.pg1.get_capture() + pg2_pkts = [self.decap_vxlan(p) + for p in self.pg2.get_capture(pg2_expected)] + self.verify_capture( + self.pg1, + pg1_pkts, + pg2_pkts) + + self.bridge(self.vxlan.sw_if_index, is_add=0) + # Disable SPAN on pg0 sub if (mirrored to vxlan) + self.vapi.sw_interface_span_enable_disable( + self.sub_if.sw_if_index, self.vxlan.sw_if_index, state=0, is_l2=1) + self.xconnect(self.sub_if.sw_if_index, self.pg1.sw_if_index, is_add=0) + + def test_span_l2_rx_dst_gre_subif_vtr(self): + """ SPAN l2 rx mirror into gre-subif+vtr """ + + self.sub_if.admin_up() + + gre_if = VppGreInterface(self, self.pg2.local_ip4, + self.pg2.remote_ip4, + is_teb=1) + + gre_if.add_vpp_config() + gre_if.admin_up() + + gre_sub_if = VppDot1QSubint(self, gre_if, 500) + gre_sub_if.set_vtr(L2_VTR_OP.L2_POP_1, tag=500) + gre_sub_if.admin_up() + + self.bridge(gre_sub_if.sw_if_index) + # Create bi-directional cross-connects between pg0 and pg1 + self.xconnect(self.sub_if.sw_if_index, self.pg1.sw_if_index, is_add=1) + + # Create incoming packet streams for packet-generator interfaces + pkts = self.create_stream( + self.pg0, self.pg_if_packet_sizes, do_dot1=True) + self.pg0.add_stream(pkts) + + self.vapi.sw_interface_span_enable_disable( + self.sub_if.sw_if_index, gre_sub_if.sw_if_index, is_l2=1) + + # Enable packet capturing and start packet sending + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # Verify packets outgoing packet streams on mirrored interface (pg2) + self.logger.info("Verifying capture on interfaces %s and %s" % + (self.pg1.name, self.pg2.name)) + pg2_expected = self.get_packet_count_for_if_idx(self.pg1.sw_if_index) + pg1_pkts = self.pg1.get_capture() + pg2_pkts = self.pg2.get_capture(pg2_expected) + pg2_decaped = [self.remove_tags(self.decap_gre( + p), [Tag(dot1=DOT1Q, vlan=500)]) for p in pg2_pkts] + self.verify_capture( + self.pg1, + pg1_pkts, + pg2_decaped) + + self.bridge(gre_sub_if.sw_if_index, is_add=0) + # Disable SPAN on pg0 sub if + self.vapi.sw_interface_span_enable_disable( + self.sub_if.sw_if_index, gre_sub_if.sw_if_index, state=0, + is_l2=1) + gre_if.remove_vpp_config() + self.xconnect(self.sub_if.sw_if_index, self.pg1.sw_if_index, is_add=0) + + def test_span_l2_rx_dst_vtr(self): + """ SPAN l2 rx mirror into subif+vtr """ + + self.sub_if.admin_up() + self.dst_sub_if.admin_up() + + self.bridge(self.dst_sub_if.sw_if_index) + # Create bi-directional cross-connects between pg0 and pg1 + self.xconnect(self.sub_if.sw_if_index, self.pg1.sw_if_index, is_add=1) + + # Create incoming packet streams for packet-generator interfaces + pkts = self.create_stream( + self.pg0, self.pg_if_packet_sizes, do_dot1=True) + self.pg0.add_stream(pkts) + + self.vapi.sw_interface_span_enable_disable( + self.sub_if.sw_if_index, self.dst_sub_if.sw_if_index, is_l2=1) + + # Enable packet capturing and start packet sending + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # Verify packets outgoing packet streams on mirrored interface (pg2) + self.logger.info("Verifying capture on interfaces %s and %s" % + (self.pg1.name, self.pg2.name)) + pg2_expected = self.get_packet_count_for_if_idx(self.pg1.sw_if_index) + pg1_pkts = self.pg1.get_capture() + pg2_pkts = self.pg2.get_capture(pg2_expected) + pg2_untagged = [self.remove_tags(p, [Tag(dot1=DOT1Q, vlan=300)]) + for p in pg2_pkts] + self.verify_capture( + self.pg1, + pg1_pkts, + pg2_untagged) + + self.bridge(self.dst_sub_if.sw_if_index, is_add=0) + # Disable SPAN on pg0 sub if (mirrored to vxlan) + self.vapi.sw_interface_span_enable_disable( + self.sub_if.sw_if_index, self.dst_sub_if.sw_if_index, state=0, + is_l2=1) + self.xconnect(self.sub_if.sw_if_index, self.pg1.sw_if_index, is_add=0) + + def test_l2_tx_span(self): + """ SPAN l2 tx mirror test """ + + self.sub_if.admin_up() + self.bridge(self.pg2.sw_if_index) + # Create bi-directional cross-connects between pg0 and pg1 + self.xconnect(self.sub_if.sw_if_index, self.pg1.sw_if_index) + # Create incoming packet streams for packet-generator interfaces + pkts = self.create_stream( + self.pg0, self.pg_if_packet_sizes, do_dot1=True) + self.pg0.add_stream(pkts) + + # Enable SPAN on pg0 (mirrored to pg2) + self.vapi.sw_interface_span_enable_disable( + self.pg1.sw_if_index, self.pg2.sw_if_index, is_l2=1, state=2) + + self.logger.info(self.vapi.ppcli("show interface span")) + # Enable packet capturing and start packet sending + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # Verify packets outgoing packet streams on mirrored interface (pg2) + self.logger.info("Verifying capture on interfaces %s and %s" % + (self.pg1.name, self.pg2.name)) + pg2_expected = self.get_packet_count_for_if_idx(self.pg1.sw_if_index) + pg1_pkts = self.pg1.get_capture() + pg2_pkts = self.pg2.get_capture(pg2_expected) + self.verify_capture( + self.pg1, + pg1_pkts, + pg2_pkts) + + self.bridge(self.pg2.sw_if_index, is_add=0) + # Disable SPAN on pg0 (mirrored to pg2) + self.vapi.sw_interface_span_enable_disable( + self.pg1.sw_if_index, self.pg2.sw_if_index, state=0, is_l2=1) + self.xconnect(self.sub_if.sw_if_index, self.pg1.sw_if_index, is_add=0) + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 11e16e49..204d9e31 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -847,17 +847,20 @@ class VppPapiProvider(object): ) def sw_interface_span_enable_disable( - self, sw_if_index_from, sw_if_index_to, state=1): + self, sw_if_index_from, sw_if_index_to, state=1, is_l2=0): """ :param sw_if_index_from: :param sw_if_index_to: :param state: + :param is_l2: """ return self.api(self.papi.sw_interface_span_enable_disable, {'sw_if_index_from': sw_if_index_from, 'sw_if_index_to': sw_if_index_to, - 'state': state}) + 'state': state, + 'is_l2': is_l2, + }) def gre_tunnel_add_del(self, src_address, -- cgit 1.2.3-korg From 5b311202b82a827c712d2cb7604c56049266adc9 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Mon, 31 Jul 2017 13:12:30 +0300 Subject: SPAN/API:enable L2 dump Change-Id: Icea1dff33aae35a85ae1a7ed1900a0abb3fe4b6b Signed-off-by: Eyal Bari --- src/vat/api_format.c | 13 ++++++++++++- src/vnet/span/span.api | 3 +++ src/vnet/span/span_api.c | 13 +++++++------ src/vpp/api/custom_dump.c | 3 +++ 4 files changed, 25 insertions(+), 7 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 932e162d..5a0c4580 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -18207,11 +18207,22 @@ static void static int api_sw_interface_span_dump (vat_main_t * vam) { + unformat_input_t *input = vam->input; vl_api_sw_interface_span_dump_t *mp; vl_api_control_ping_t *mp_ping; + u8 is_l2 = 0; int ret; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "l2")) + is_l2 = 1; + else + break; + } + M (SW_INTERFACE_SPAN_DUMP, mp); + mp->is_l2 = is_l2; S (mp); /* Use a control ping for synchronization */ @@ -20049,7 +20060,7 @@ _(ipfix_classify_stream_dump, "") \ _(ipfix_classify_table_add_del, "table ip4|ip6 [tcp|udp]") \ _(ipfix_classify_table_dump, "") \ _(sw_interface_span_enable_disable, "[l2] [src | src_sw_if_index ] [disable | [[dst | dst_sw_if_index ] [both|rx|tx]]]") \ -_(sw_interface_span_dump, "") \ +_(sw_interface_span_dump, "[l2]") \ _(get_next_index, "node-name next-node-name ") \ _(pg_create_interface, "if_id ") \ _(pg_capture, "if_id pcap count [disable]") \ diff --git a/src/vnet/span/span.api b/src/vnet/span/span.api index 2a762ac2..03cd60ec 100644 --- a/src/vnet/span/span.api +++ b/src/vnet/span/span.api @@ -20,6 +20,7 @@ @param sw_if_index_from - interface to be mirorred @param sw_if_index_to - interface where the traffic is mirrored @param state - 0 = disabled, 1 = rx enabled, 2 = tx enabled, 3 tx & rx enabled + @param is_l2 - 0 = mirror at hw device level, 1 = mirror at L2 */ autoreply define sw_interface_span_enable_disable { u32 client_index; @@ -33,10 +34,12 @@ autoreply define sw_interface_span_enable_disable { /** \brief SPAN dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request + @param is_l2 - 0 = hw device level, 1 = L2 */ define sw_interface_span_dump { u32 client_index; u32 context; + u8 is_l2; }; /** \brief Reply to SPAN dump request diff --git a/src/vnet/span/span_api.c b/src/vnet/span/span_api.c index 69fa8e97..64a71a2e 100644 --- a/src/vnet/span/span_api.c +++ b/src/vnet/span/span_api.c @@ -75,16 +75,17 @@ vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp) if (!q) return; + span_feat_t sf = mp->is_l2 ? SPAN_FEAT_L2 : SPAN_FEAT_DEVICE; /* *INDENT-OFF* */ vec_foreach (si, sm->interfaces) { - span_mirror_t * drxm = &si->mirror_rxtx[SPAN_FEAT_DEVICE][VLIB_RX]; - span_mirror_t * dtxm = &si->mirror_rxtx[SPAN_FEAT_DEVICE][VLIB_TX]; - if (drxm->num_mirror_ports || dtxm->num_mirror_ports) + span_mirror_t * rxm = &si->mirror_rxtx[sf][VLIB_RX]; + span_mirror_t * txm = &si->mirror_rxtx[sf][VLIB_TX]; + if (rxm->num_mirror_ports || txm->num_mirror_ports) { clib_bitmap_t *b; u32 i; - b = clib_bitmap_dup_or (drxm->mirror_ports, dtxm->mirror_ports); + b = clib_bitmap_dup_or (rxm->mirror_ports, txm->mirror_ports); clib_bitmap_foreach (i, b, ( { rmp = vl_msg_api_alloc (sizeof (*rmp)); @@ -94,8 +95,8 @@ vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp) rmp->sw_if_index_from = htonl (si - sm->interfaces); rmp->sw_if_index_to = htonl (i); - rmp->state = (u8) (clib_bitmap_get (drxm->mirror_ports, i) + - clib_bitmap_get (dtxm->mirror_ports, i) * 2); + rmp->state = (u8) (clib_bitmap_get (rxm->mirror_ports, i) + + clib_bitmap_get (txm->mirror_ports, i) * 2); vl_msg_api_send_shmem (q, (u8 *) & rmp); })); diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 55a362a3..520361f6 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -2258,6 +2258,9 @@ vl_api_sw_interface_span_dump_t_print (vl_api_sw_interface_span_dump_t * mp, s = format (0, "SCRIPT: sw_interface_span_dump "); + if (mp->is_l2) + s = format (s, "l2 "); + FINISH; } -- cgit 1.2.3-korg From 8d00fff8dff4e449767601645422e03df92a83af Mon Sep 17 00:00:00 2001 From: John Lo Date: Thu, 3 Aug 2017 00:35:36 -0400 Subject: Add support for API client to receive L2 MAC events Added APIs want_l2_macs_events and l2_macs_event to allow an API client to receive notification events from VPP for MAC learned or aged in L2FIB. Only one API client is allowed for L2 MAC events. The want_l2_macs_events API allow caller to specify MAC learn limit, event scan delay and max number of MACs that can be included in a event message. These parameters should be choosen properly as to not have too many MAC events sent by VPP and overwhelm the API share memory. They can all be left as 0's so VPP will setup reasonable defaults which are: 1000 learn limit, 100 msec scan delay and 100 MACs per event message. If want_l2_macs_events is never called, VPP learning and aging should behave as before except that MAC entries provisioned by API or CLI will not be aged, even if it is not set as static_mac. These non static MACs, however, can be overwritten by MAC learning on a MAC move as a leared MAC. Only learned MACs are subject to aging. Change-Id: Ia3757a80cf8adb2811a089d2eafbd6439461285c Signed-off-by: John Lo --- src/vat/api_format.c | 86 +++++++++- src/vnet/api_errno.h | 5 +- src/vnet/l2/l2.api | 65 +++++++- src/vnet/l2/l2_api.c | 83 +++++++++- src/vnet/l2/l2_bd.c | 37 +---- src/vnet/l2/l2_fib.c | 394 ++++++++++++++++++++++++++++++++++------------ src/vnet/l2/l2_fib.h | 37 ++++- src/vnet/l2/l2_fwd.c | 3 +- src/vnet/l2/l2_learn.c | 30 ++-- src/vnet/l2/l2_learn.h | 5 + src/vpp/api/custom_dump.c | 33 +++- 11 files changed, 605 insertions(+), 173 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index bbd97ba1..27286686 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -1283,6 +1283,30 @@ vl_api_ip6_nd_event_t_handler_json (vl_api_ip6_nd_event_t * mp) /* JSON output not supported */ } +static void +vl_api_l2_macs_event_t_handler (vl_api_l2_macs_event_t * mp) +{ + u32 n_macs = ntohl (mp->n_macs); + errmsg ("L2MAC event recived with pid %d cl-idx %d for %d macs: \n", + ntohl (mp->pid), mp->client_index, n_macs); + int i; + for (i = 0; i < n_macs; i++) + { + vl_api_mac_entry_t *mac = &mp->mac[i]; + errmsg (" [%d] sw_if_index %d mac_addr %U is_del %d \n", + i + 1, ntohl (mac->sw_if_index), + format_ethernet_address, mac->mac_addr, mac->is_del); + if (i == 1000) + break; + } +} + +static void +vl_api_l2_macs_event_t_handler_json (vl_api_l2_macs_event_t * mp) +{ + /* JSON output not supported */ +} + #define vl_api_bridge_domain_details_t_endian vl_noop_handler #define vl_api_bridge_domain_details_t_print vl_noop_handler @@ -4597,6 +4621,7 @@ _(modify_vhost_user_if_reply) \ _(delete_vhost_user_if_reply) \ _(want_ip4_arp_events_reply) \ _(want_ip6_nd_events_reply) \ +_(want_l2_macs_events_reply) \ _(input_acl_set_interface_reply) \ _(ipsec_spd_add_del_reply) \ _(ipsec_interface_add_del_spd_reply) \ @@ -4813,6 +4838,8 @@ _(WANT_IP4_ARP_EVENTS_REPLY, want_ip4_arp_events_reply) \ _(IP4_ARP_EVENT, ip4_arp_event) \ _(WANT_IP6_ND_EVENTS_REPLY, want_ip6_nd_events_reply) \ _(IP6_ND_EVENT, ip6_nd_event) \ +_(WANT_L2_MACS_EVENTS_REPLY, want_l2_macs_events_reply) \ +_(L2_MACS_EVENT, l2_macs_event) \ _(INPUT_ACL_SET_INTERFACE_REPLY, input_acl_set_interface_reply) \ _(IP_ADDRESS_DETAILS, ip_address_details) \ _(IP_DETAILS, ip_details) \ @@ -6607,8 +6634,9 @@ api_l2_flags (vat_main_t * vam) unformat_input_t *i = vam->input; vl_api_l2_flags_t *mp; u32 sw_if_index; - u32 feature_bitmap = 0; + u32 flags = 0; u8 sw_if_index_set = 0; + u8 is_set = 0; int ret; /* Parse args required to build the message */ @@ -6628,13 +6656,19 @@ api_l2_flags (vat_main_t * vam) break; } else if (unformat (i, "learn")) - feature_bitmap |= L2INPUT_FEAT_LEARN; + flags |= L2_LEARN; else if (unformat (i, "forward")) - feature_bitmap |= L2INPUT_FEAT_FWD; + flags |= L2_FWD; else if (unformat (i, "flood")) - feature_bitmap |= L2INPUT_FEAT_FLOOD; + flags |= L2_FLOOD; else if (unformat (i, "uu-flood")) - feature_bitmap |= L2INPUT_FEAT_UU_FLOOD; + flags |= L2_UU_FLOOD; + else if (unformat (i, "arp-term")) + flags |= L2_ARP_TERM; + else if (unformat (i, "off")) + is_set = 0; + else if (unformat (i, "disable")) + is_set = 0; else break; } @@ -6648,7 +6682,8 @@ api_l2_flags (vat_main_t * vam) M (L2_FLAGS, mp); mp->sw_if_index = ntohl (sw_if_index); - mp->feature_bitmap = ntohl (feature_bitmap); + mp->feature_bitmap = ntohl (flags); + mp->is_set = is_set; S (mp); W (ret); @@ -12534,6 +12569,42 @@ api_want_ip6_nd_events (vat_main_t * vam) return ret; } +static int +api_want_l2_macs_events (vat_main_t * vam) +{ + unformat_input_t *line_input = vam->input; + vl_api_want_l2_macs_events_t *mp; + u8 enable_disable = 1; + u32 scan_delay = 0; + u32 max_macs_in_event = 0; + u32 learn_limit = 0; + int ret; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "learn-limit %d", &learn_limit)) + ; + else if (unformat (line_input, "scan-delay %d", &scan_delay)) + ; + else if (unformat (line_input, "max-entries %d", &max_macs_in_event)) + ; + else if (unformat (line_input, "disable")) + enable_disable = 0; + else + break; + } + + M (WANT_L2_MACS_EVENTS, mp); + mp->enable_disable = enable_disable; + mp->pid = htonl (getpid ()); + mp->learn_limit = htonl (learn_limit); + mp->scan_delay = (u8) scan_delay; + mp->max_macs_in_event = (u8) (max_macs_in_event / 10); + S (mp); + W (ret); + return ret; +} + static int api_input_acl_set_interface (vat_main_t * vam) { @@ -19831,7 +19902,7 @@ _(l2fib_add_del, \ _(l2fib_flush_bd, "bd_id ") \ _(l2fib_flush_int, " | sw_if_index ") \ _(l2_flags, \ - "sw_if | sw_if_index [learn] [forward] [uu-flood] [flood]\n") \ + "sw_if | sw_if_index [learn] [forward] [uu-flood] [flood] [arp-term] [disable]\n") \ _(bridge_flags, \ "bd_id [learn] [forward] [uu-flood] [flood] [arp-term] [disable]\n") \ _(tap_connect, \ @@ -19974,6 +20045,7 @@ _(input_acl_set_interface, \ " [l2-table ] [del]") \ _(want_ip4_arp_events, "address [del]") \ _(want_ip6_nd_events, "address [del]") \ +_(want_l2_macs_events, "[disable] [learn-limit ] [scan-delay ] [max-entries ]") \ _(ip_address_dump, "(ipv4 | ipv6) ( | sw_if_index )") \ _(ip_dump, "ipv4 | ipv6") \ _(ipsec_spd_add_del, "spd_id [del]") \ diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 747c65e7..22522f34 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -112,8 +112,9 @@ _(BD_ALREADY_EXISTS, -119, "Bridge domain already exists") \ _(BD_IN_USE, -120, "Bridge domain has member interfaces") \ _(BD_NOT_MODIFIABLE, -121, "Bridge domain 0 can't be deleted/modified") \ _(BD_ID_EXCEED_MAX, -122, "Bridge domain ID exceed 16M limit") \ -_(UNSUPPORTED, -123, "Unsupported") \ -_(SUBIF_DOESNT_EXIST, -124, "Subinterface doesn't exist") +_(SUBIF_DOESNT_EXIST, -123, "Subinterface doesn't exist") \ +_(L2_MACS_EVENT_CLINET_PRESENT, -124, "Client already exist for L2 MACs events") \ +_(UNSUPPORTED, -125, "Unsupported") typedef enum { diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index bb3990c6..e508bfb5 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -133,12 +133,64 @@ autoreply define l2fib_add_del u8 bvi_mac; }; -/** \brief Set L2 flags request !!! TODO - need more info, feature bits in l2_input.h +/** \brief Register to recive L2 MAC events for leanred and aged MAC + Will also change MAC learn limit to L2LEARN_INFORM_LIMIT + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param learn_limit - MAC learn limit, 0 => default to 1000 + @param scan_delay - event scan delay in 10 msec unit, 0 => default to 100 msec + @param max_macs_in_event - in units of 10 mac entries, 0 => default to 100 entries + @param enable_disable - 1 => register for MAC events, 0 => cancel registration + @param pid - sender's pid +*/ +autoreply define want_l2_macs_events +{ + u32 client_index; + u32 context; + u32 learn_limit; + u8 scan_delay; + u8 max_macs_in_event; + u8 enable_disable; + u32 pid; +}; + +/** \brief Entry for learned or aged MAC in L2 MAC Events + @param sw_if_index - sw_if_index in the domain + @param mac_addr - mac_address + @is_del - 0 => newly learned MAC, 1 => aged out MAC +*/ +typeonly define mac_entry +{ + u32 sw_if_index; + u8 mac_addr[6]; + u8 is_del; + u8 spare; +}; + +/** \brief L2 MAC event for a list of learned or aged MACs + @param client_index - opaque cookie to identify the sender + @param pid - client pid registered to receive notification + @param n_macs - number of learned/aged MAC enntries + @param mac - array of learned/aged MAC entries +*/ +define l2_macs_event +{ + u32 client_index; + u32 pid; + u32 n_macs; + vl_api_mac_entry_t mac[n_macs]; +}; + +/** \brief Set interface L2 flags (such as L2_LEARN, L2_FWD, + L2_FLOOD, L2_UU_FLOOD, or L2_ARP_TERM bits). This can be used + to disable one or more of the features represented by the + flag bits on an interface to override what is set as default + for all interfaces in the bridge domain @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - interface @param is_set - if non-zero, set the bits, else clear them - @param feature_bitmap - non-zero bits to set or clear + @param feature_bitmap - non-zero bits (as above) to set or clear */ define l2_flags { @@ -149,9 +201,10 @@ define l2_flags u32 feature_bitmap; }; -/** \brief Set L2 bits response +/** \brief Set interface L2 flags response @param context - sender context, to match reply w/ request @param retval - return code for the set l2 bits request + @param resulting_feature_bitmap - the internal l2 feature bitmap after the request is implemented */ define l2_flags_reply { @@ -250,12 +303,12 @@ manual_print manual_endian define bridge_domain_details }; /** \brief Set bridge flags (such as L2_LEARN, L2_FWD, L2_FLOOD, - L2_UU_FLOOD, or L2_ARP_TERM) request + L2_UU_FLOOD, or L2_ARP_TERM bits) request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bd_id - the bridge domain to set the flags for @param is_set - if non-zero, set the flags, else clear them - @param feature_bitmap - bits that are non-zero to set or clear + @param feature_bitmap - bits (as above) that are non-zero to set or clear */ define bridge_flags { @@ -269,7 +322,7 @@ define bridge_flags /** \brief Set bridge flags response @param context - sender context, to match reply w/ request @param retval - return code for the set bridge flags request - @param resulting_feature_bitmap - the feature bitmap value after the request is implemented + @param resulting_feature_bitmap - the internal L2 feature bitmap after the request is implemented */ define bridge_flags_reply { diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index a0b40d6d..c81cbad7 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -55,6 +56,7 @@ _(L2FIB_FLUSH_ALL, l2fib_flush_all) \ _(L2FIB_FLUSH_INT, l2fib_flush_int) \ _(L2FIB_FLUSH_BD, l2fib_flush_bd) \ _(L2FIB_ADD_DEL, l2fib_add_del) \ +_(WANT_L2_MACS_EVENTS, want_l2_macs_events) \ _(L2_FLAGS, l2_flags) \ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ @@ -221,8 +223,8 @@ vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp) goto bad_sw_if_index; } } - u32 static_mac = mp->static_mac ? 1 : 0; - u32 bvi_mac = mp->bvi_mac ? 1 : 0; + u8 static_mac = mp->static_mac ? 1 : 0; + u8 bvi_mac = mp->bvi_mac ? 1 : 0; l2fib_add_fwd_entry (mac, bd_index, sw_if_index, static_mac, bvi_mac); } @@ -237,6 +239,58 @@ vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp) REPLY_MACRO (VL_API_L2FIB_ADD_DEL_REPLY); } +static void +vl_api_want_l2_macs_events_t_handler (vl_api_want_l2_macs_events_t * mp) +{ + int rv = 0; + vl_api_want_l2_macs_events_reply_t *rmp; + l2learn_main_t *lm = &l2learn_main; + l2fib_main_t *fm = &l2fib_main; + u32 pid = ntohl (mp->pid); + u32 learn_limit = ntohl (mp->learn_limit); + + if (mp->enable_disable) + { + if (lm->client_pid == 0) + { + lm->client_pid = pid; + lm->client_index = mp->client_index; + + if (mp->max_macs_in_event) + fm->max_macs_in_event = mp->max_macs_in_event * 10; + else + fm->max_macs_in_event = L2FIB_EVENT_MAX_MACS_DEFAULT; + + if (mp->scan_delay) + fm->event_scan_delay = (f64) (mp->scan_delay) * 10e-3; + else + fm->event_scan_delay = L2FIB_EVENT_SCAN_DELAY_DEFAULT; + + /* change learn limit and flush all learned MACs */ + if (learn_limit && (learn_limit < L2LEARN_DEFAULT_LIMIT)) + lm->global_learn_limit = learn_limit; + else + lm->global_learn_limit = L2FIB_EVENT_LEARN_LIMIT_DEFAULT; + + l2fib_flush_all_mac (vlib_get_main ()); + } + else if (lm->client_pid != pid) + { + rv = VNET_API_ERROR_L2_MACS_EVENT_CLINET_PRESENT; + goto exit; + } + } + else if (lm->client_pid) + { + lm->client_pid = 0; + lm->client_index = 0; + lm->global_learn_limit = L2LEARN_DEFAULT_LIMIT; + } + +exit: + REPLY_MACRO (VL_API_WANT_L2_MACS_EVENTS_REPLY); +} + static void vl_api_l2fib_flush_int_t_handler (vl_api_l2fib_flush_int_t * mp) { @@ -293,8 +347,25 @@ vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp) VALIDATE_SW_IF_INDEX (mp); u32 sw_if_index = ntohl (mp->sw_if_index); - u32 flags = ntohl (mp->feature_bitmap) & L2INPUT_VALID_MASK; - rbm = l2input_intf_bitmap_enable (sw_if_index, flags, mp->is_set); + u32 flags = ntohl (mp->feature_bitmap); + u32 bitmap = 0; + + if (flags & L2_LEARN) + bitmap |= L2INPUT_FEAT_LEARN; + + if (flags & L2_FWD) + bitmap |= L2INPUT_FEAT_FWD; + + if (flags & L2_FLOOD) + bitmap |= L2INPUT_FEAT_FLOOD; + + if (flags & L2_UU_FLOOD) + bitmap |= L2INPUT_FEAT_UU_FLOOD; + + if (flags & L2_ARP_TERM) + bitmap |= L2INPUT_FEAT_ARP_TERM; + + rbm = l2input_intf_bitmap_enable (sw_if_index, bitmap, mp->is_set); BAD_SW_IF_INDEX_LABEL; @@ -455,13 +526,13 @@ vl_api_bridge_flags_t_handler (vl_api_bridge_flags_t * mp) goto out; } - bd_set_flags (vm, bd_index, flags, mp->is_set); + u32 bitmap = bd_set_flags (vm, bd_index, flags, mp->is_set); out: /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_BRIDGE_FLAGS_REPLY, ({ - rmp->resulting_feature_bitmap = ntohl(flags); + rmp->resulting_feature_bitmap = ntohl(bitmap); })); /* *INDENT-ON* */ } diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index a87d02f2..6e0db058 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -263,7 +263,7 @@ bd_set_flags (vlib_main_t * vm, u32 bd_index, u32 flags, u32 enable) bd_config->feature_bitmap &= ~feature_bitmap; } - return 0; + return bd_config->feature_bitmap; } /** @@ -328,12 +328,7 @@ bd_learn (vlib_main_t * vm, } /* set the bridge domain flag */ - if (bd_set_flags (vm, bd_index, L2_LEARN, enable)) - { - error = - clib_error_return (0, "bridge-domain id %d out of range", bd_index); - goto done; - } + bd_set_flags (vm, bd_index, L2_LEARN, enable); done: return error; @@ -397,12 +392,7 @@ bd_fwd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) } /* set the bridge domain flag */ - if (bd_set_flags (vm, bd_index, L2_FWD, enable)) - { - error = - clib_error_return (0, "bridge-domain id %d out of range", bd_index); - goto done; - } + bd_set_flags (vm, bd_index, L2_FWD, enable); done: return error; @@ -468,12 +458,7 @@ bd_flood (vlib_main_t * vm, } /* set the bridge domain flag */ - if (bd_set_flags (vm, bd_index, L2_FLOOD, enable)) - { - error = - clib_error_return (0, "bridge-domain id %d out of range", bd_index); - goto done; - } + bd_set_flags (vm, bd_index, L2_FLOOD, enable); done: return error; @@ -538,12 +523,7 @@ bd_uu_flood (vlib_main_t * vm, } /* set the bridge domain flag */ - if (bd_set_flags (vm, bd_index, L2_UU_FLOOD, enable)) - { - error = - clib_error_return (0, "bridge-domain id %d out of range", bd_index); - goto done; - } + bd_set_flags (vm, bd_index, L2_UU_FLOOD, enable); done: return error; @@ -605,12 +585,7 @@ bd_arp_term (vlib_main_t * vm, enable = 0; /* set the bridge domain flag */ - if (bd_set_flags (vm, bd_index, L2_ARP_TERM, enable)) - { - error = - clib_error_return (0, "bridge-domain id %d out of range", bd_index); - goto done; - } + bd_set_flags (vm, bd_index, L2_ARP_TERM, enable); done: return error; diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index 7e59b098..8aa0ac29 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -31,6 +31,17 @@ #include +#include +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + /** * @file * @brief Ethernet MAC Address FIB Table Management. @@ -117,6 +128,7 @@ show_l2fib (vlib_main_t * vm, int i, j, k; u8 verbose = 0; u8 raw = 0; + u8 learn = 0; u32 bd_id, bd_index = ~0; u8 now = (u8) (vlib_time_now (vm) / 60); u8 *s = 0; @@ -127,12 +139,18 @@ show_l2fib (vlib_main_t * vm, verbose = 1; else if (unformat (input, "bd_index %d", &bd_index)) verbose = 1; + else if (unformat (input, "learn")) + { + learn = 1; + verbose = 0; + } else if (unformat (input, "bd_id %d", &bd_id)) { uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p) { - verbose = 1; + if (learn == 0) + verbose = 1; bd_index = p[0]; } else @@ -155,7 +173,7 @@ show_l2fib (vlib_main_t * vm, if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) continue; - if (verbose && first_entry) + if ((verbose || learn) && first_entry) { first_entry = 0; vlib_cli_output (vm, @@ -168,13 +186,19 @@ show_l2fib (vlib_main_t * vm, key.raw = v->kvp[k].key; result.raw = v->kvp[k].value; - if (verbose + if ((verbose || learn) & ((bd_index >> 31) || (bd_index == key.fields.bd_index))) { + if (learn && result.fields.age_not) + { + total_entries++; + continue; /* skip provisioned macs */ + } + bd_config = vec_elt_at_index (l2input_main.bd_configs, key.fields.bd_index); - if (bd_config->mac_age && !result.fields.static_mac) + if (bd_config->mac_age && !result.fields.age_not) { i16 delta = now - result.fields.timestamp; delta += delta < 0 ? 256 : 0; @@ -206,9 +230,19 @@ show_l2fib (vlib_main_t * vm, if (total_entries == 0) vlib_cli_output (vm, "no l2fib entries"); else - vlib_cli_output (vm, - "%lld l2fib entries with %d learned (or non-static) entries", - total_entries, l2learn_main.global_learn_count); + { + l2learn_main_t *lm = &l2learn_main; + vlib_cli_output (vm, "L2FIB total/learned entries: %d/%d " + "Last scan time: %.4esec Learn limit: %d ", + total_entries, lm->global_learn_count, + msm->age_scan_duration, lm->global_learn_limit); + if (lm->client_pid) + vlib_cli_output (vm, "L2MAC events client PID: %d " + "Last e-scan time: %.4esec Delay: %.2esec " + "Max macs in event: %d", + lm->client_pid, msm->evt_scan_duration, + msm->event_scan_delay, msm->max_macs_in_event); + } if (raw) vlib_cli_output (vm, "Raw Hash Table:\n%U\n", @@ -242,7 +276,7 @@ show_l2fib (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_l2fib_cli, static) = { .path = "show l2fib", - .short_help = "show l2fib [verbose | bd_id | bd_index | raw]", + .short_help = "show l2fib [verbose | learn | bd_id | bd_index | raw", .function = show_l2fib, }; /* *INDENT-ON* */ @@ -309,36 +343,39 @@ l2fib_cur_seq_num (u32 bd_index, u32 sw_if_index) */ void l2fib_add_entry (u64 mac, u32 bd_index, - u32 sw_if_index, u32 static_mac, u32 filter_mac, u32 bvi_mac) + u32 sw_if_index, u8 static_mac, u8 filter_mac, u8 bvi_mac) { l2fib_entry_key_t key; l2fib_entry_result_t result; __attribute__ ((unused)) u32 bucket_contents; - l2fib_main_t *mp = &l2fib_main; + l2fib_main_t *fm = &l2fib_main; + l2learn_main_t *lm = &l2learn_main; BVT (clib_bihash_kv) kv; /* set up key */ key.raw = l2fib_make_key ((u8 *) & mac, bd_index); + /* check if entry alread exist */ + if (BV (clib_bihash_search) (&fm->mac_table, &kv, &kv)) + { + /* decrement counter if overwriting a learned mac */ + result.raw = kv.value; + if ((result.fields.age_not == 0) && (lm->global_learn_count)) + lm->global_learn_count--; + } + /* set up result */ result.raw = 0; /* clear all fields */ result.fields.sw_if_index = sw_if_index; result.fields.static_mac = static_mac; result.fields.filter = filter_mac; result.fields.bvi = bvi_mac; - if (!static_mac) - result.fields.sn = l2fib_cur_seq_num (bd_index, sw_if_index); + result.fields.age_not = 1; /* no aging for provisioned entry */ kv.key = key.raw; kv.value = result.raw; - BV (clib_bihash_add_del) (&mp->mac_table, &kv, 1 /* is_add */ ); - - /* increment counter if dynamically learned mac */ - if (result.fields.static_mac == 0) - { - l2learn_main.global_learn_count++; - } + BV (clib_bihash_add_del) (&fm->mac_table, &kv, 1 /* is_add */ ); } /** @@ -630,13 +667,8 @@ l2fib_del_entry_by_key (u64 raw_key) result.raw = kv.value; /* decrement counter if dynamically learned mac */ - if (result.fields.static_mac == 0) - { - if (l2learn_main.global_learn_count > 0) - { - l2learn_main.global_learn_count--; - } - } + if ((result.fields.age_not == 0) && (l2learn_main.global_learn_count)) + l2learn_main.global_learn_count--; /* Remove entry from hash table */ BV (clib_bihash_add_del) (&mp->mac_table, &kv, 0 /* is_add */ ); @@ -910,111 +942,273 @@ BVT (clib_bihash) * get_mac_table (void) return &mp->mac_table; } +static_always_inline void * +allocate_mac_evt_buf (u32 client, u32 client_index) +{ + l2fib_main_t *fm = &l2fib_main; + vl_api_l2_macs_event_t *mp = vl_msg_api_alloc + (sizeof (*mp) + (fm->max_macs_in_event * sizeof (vl_api_mac_entry_t))); + mp->_vl_msg_id = htons (VL_API_L2_MACS_EVENT); + mp->pid = htonl (client); + mp->client_index = client_index; + return mp; +} + +static_always_inline f64 +l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) +{ + l2fib_main_t *fm = &l2fib_main; + l2learn_main_t *lm = &l2learn_main; + + BVT (clib_bihash) * h = &fm->mac_table; + int i, j, k; + f64 last_start = start_time; + f64 accum_t = 0; + f64 delta_t = 0; + u32 evt_idx = 0; + u32 learn_count = 0; + u32 client = lm->client_pid; + u32 cl_idx = lm->client_index; + vl_api_l2_macs_event_t *mp = 0; + unix_shared_memory_queue_t *q = 0; + + if (client) + { + mp = allocate_mac_evt_buf (client, cl_idx); + q = vl_api_client_index_to_input_queue (lm->client_index); + } + + for (i = 0; i < h->nbuckets; i++) + { + /* allow no more than 20us without a pause */ + delta_t = vlib_time_now (vm) - last_start; + if (delta_t > 20e-6) + { + vlib_process_suspend (vm, 100e-6); /* suspend for 100 us */ + last_start = vlib_time_now (vm); + accum_t += delta_t; + } + + if (i < (h->nbuckets - 3)) + { + BVT (clib_bihash_bucket) * b = &h->buckets[i + 3]; + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); + b = &h->buckets[i + 1]; + if (b->offset) + { + BVT (clib_bihash_value) * v = + BV (clib_bihash_get_value) (h, b->offset); + CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, LOAD); + } + } + + BVT (clib_bihash_bucket) * b = &h->buckets[i]; + if (b->offset == 0) + continue; + BVT (clib_bihash_value) * v = BV (clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1 << b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + continue; + + l2fib_entry_key_t key = {.raw = v->kvp[k].key }; + l2fib_entry_result_t result = {.raw = v->kvp[k].value }; + + if (result.fields.age_not == 0) + learn_count++; + + if (PREDICT_FALSE (evt_idx >= fm->max_macs_in_event)) + { + /* evet message full, sent it and start a new one */ + if (q && (q->cursize < q->maxsize)) + { + mp->n_macs = htonl (evt_idx); + vl_msg_api_send_shmem (q, (u8 *) & mp); + mp = allocate_mac_evt_buf (client, cl_idx); + } + else + { + clib_warning ("MAC event to pid %d queue stuffed!" + " %d MAC entries lost", client, evt_idx); + } + evt_idx = 0; + } + + if (client) + { + if (result.fields.lrn_evt) + { + /* copy mac entry to event msg */ + clib_memcpy (mp->mac[evt_idx].mac_addr, key.fields.mac, + 6); + mp->mac[evt_idx].is_del = 0; + mp->mac[evt_idx].sw_if_index = + htonl (result.fields.sw_if_index); + /* clear event bit and update mac entry */ + result.fields.lrn_evt = 0; + BVT (clib_bihash_kv) kv; + kv.key = key.raw; + kv.value = result.raw; + BV (clib_bihash_add_del) (&fm->mac_table, &kv, 1); + evt_idx++; + continue; /* skip aging */ + } + } + + if (event_only || result.fields.age_not) + continue; /* skip aging - static_mac alsways age_not */ + + /* start aging processing */ + u32 bd_index = key.fields.bd_index; + u32 sw_if_index = result.fields.sw_if_index; + u16 sn = l2fib_cur_seq_num (bd_index, sw_if_index).as_u16; + if (result.fields.sn.as_u16 != sn) + goto age_out; /* stale mac */ + + l2_bridge_domain_t *bd_config = + vec_elt_at_index (l2input_main.bd_configs, bd_index); + + if (bd_config->mac_age == 0) + continue; /* skip aging */ + + i16 delta = (u8) (start_time / 60) - result.fields.timestamp; + delta += delta < 0 ? 256 : 0; + + if (delta < bd_config->mac_age) + continue; /* still valid */ + + age_out: + if (client) + { + /* copy mac entry to event msg */ + clib_memcpy (mp->mac[evt_idx].mac_addr, key.fields.mac, 6); + mp->mac[evt_idx].is_del = 1; + mp->mac[evt_idx].sw_if_index = + htonl (result.fields.sw_if_index); + evt_idx++; + } + /* delete mac entry */ + BVT (clib_bihash_kv) kv; + kv.key = key.raw; + BV (clib_bihash_add_del) (&fm->mac_table, &kv, 0); + learn_count--; + } + v++; + } + } + + /* keep learn count consistent */ + l2learn_main.global_learn_count = learn_count; + + if (mp) + { + /* send any outstanding mac event message else free message buffer */ + if (evt_idx) + { + if (q && (q->cursize < q->maxsize)) + { + mp->n_macs = htonl (evt_idx); + vl_msg_api_send_shmem (q, (u8 *) & mp); + } + else + { + clib_warning ("MAC event to pid %d queue stuffed!" + " %d MAC entries lost", client, evt_idx); + vl_msg_api_free (mp); + } + } + else + vl_msg_api_free (mp); + } + return delta_t + accum_t; +} + +/* Type of scan */ +#define SCAN_MAC_AGE 0 +#define SCAN_MAC_EVENT 1 + +/* Maximum f64 value */ +#define TIME_MAX (1.7976931348623157e+308) + static uword l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { uword event_type, *event_data = 0; - l2fib_main_t *msm = &l2fib_main; + l2fib_main_t *fm = &l2fib_main; + l2learn_main_t *lm = &l2learn_main; bool enabled = 0; - f64 start_time, last_run_duration = 0, t; + bool scan = SCAN_MAC_AGE; /* SCAN_FOR_AGE or SCAN_FOR_EVENT */ + f64 start_time, next_age_scan_time = TIME_MAX; while (1) { if (enabled) - vlib_process_wait_for_event_or_clock (vm, 60 - last_run_duration); + { + if (lm->client_pid) /* mac event client waiting */ + vlib_process_wait_for_event_or_clock (vm, fm->event_scan_delay); + else /* agin only */ + { + f64 t = next_age_scan_time - vlib_time_now (vm); + if (t < fm->event_scan_delay) + t = fm->event_scan_delay; + vlib_process_wait_for_event_or_clock (vm, t); + } + } else vlib_process_wait_for_event (vm); event_type = vlib_process_get_events (vm, &event_data); vec_reset_length (event_data); + start_time = vlib_time_now (vm); + switch (event_type) { - case ~0: + case ~0: /* timer expired */ + if ((lm->client_pid == 0) || (start_time >= next_age_scan_time)) + { + scan = SCAN_MAC_AGE; + if (enabled) + next_age_scan_time = start_time + L2FIB_AGE_SCAN_INTERVAL; + else + next_age_scan_time = TIME_MAX; + } + else + scan = SCAN_MAC_EVENT; break; + case L2_MAC_AGE_PROCESS_EVENT_START: + scan = SCAN_MAC_AGE; + next_age_scan_time = start_time + L2FIB_AGE_SCAN_INTERVAL; enabled = 1; break; + case L2_MAC_AGE_PROCESS_EVENT_STOP: enabled = 0; + next_age_scan_time = TIME_MAX; + l2fib_main.age_scan_duration = 0; + l2fib_main.evt_scan_duration = 0; continue; + case L2_MAC_AGE_PROCESS_EVENT_ONE_PASS: - enabled = 0; + scan = SCAN_MAC_AGE; + if (enabled) + next_age_scan_time = start_time + L2FIB_AGE_SCAN_INTERVAL; + else + next_age_scan_time = TIME_MAX; break; + default: ASSERT (0); } - last_run_duration = start_time = vlib_time_now (vm); - BVT (clib_bihash) * h = &msm->mac_table; - int i, j, k; - for (i = 0; i < h->nbuckets; i++) - { - /* Allow no more than 10us without a pause */ - t = vlib_time_now (vm); - if (t > start_time + 10e-6) - { - vlib_process_suspend (vm, 100e-6); /* suspend for 100 us */ - start_time = vlib_time_now (vm); - } - - if (i < (h->nbuckets - 3)) - { - BVT (clib_bihash_bucket) * b = &h->buckets[i + 3]; - CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); - b = &h->buckets[i + 1]; - if (b->offset) - { - BVT (clib_bihash_value) * v = - BV (clib_bihash_get_value) (h, b->offset); - CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, LOAD); - } - } - - BVT (clib_bihash_bucket) * b = &h->buckets[i]; - if (b->offset == 0) - continue; - BVT (clib_bihash_value) * v = - BV (clib_bihash_get_value) (h, b->offset); - for (j = 0; j < (1 << b->log2_pages); j++) - { - for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) - { - if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) - continue; - - l2fib_entry_key_t key = {.raw = v->kvp[k].key }; - l2fib_entry_result_t result = {.raw = v->kvp[k].value }; - - if (result.fields.static_mac) - continue; - - u32 bd_index = key.fields.bd_index; - u32 sw_if_index = result.fields.sw_if_index; - u16 sn = l2fib_cur_seq_num (bd_index, sw_if_index).as_u16; - if (result.fields.sn.as_u16 != sn) - { - l2fib_del_entry_by_key (key.raw); - continue; - } - l2_bridge_domain_t *bd_config = - vec_elt_at_index (l2input_main.bd_configs, bd_index); - - if (bd_config->mac_age == 0) - continue; - - i16 delta = - (u8) (start_time / 60) - result.fields.timestamp; - delta += delta < 0 ? 256 : 0; - - if (delta > bd_config->mac_age) - l2fib_del_entry_by_key (key.raw); - } - v++; - } - } - last_run_duration = vlib_time_now (vm) - last_run_duration; + if (scan == SCAN_MAC_EVENT) + l2fib_main.evt_scan_duration = l2fib_scan (vm, start_time, 1); + else + l2fib_main.age_scan_duration = l2fib_scan (vm, start_time, 0); } return 0; } diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h index ee6f0dc5..49a8b5b6 100644 --- a/src/vnet/l2/l2_fib.h +++ b/src/vnet/l2/l2_fib.h @@ -27,6 +27,18 @@ #define L2FIB_NUM_BUCKETS (64 * 1024) #define L2FIB_MEMORY_SIZE (256<<20) +/* Ager scan interval is 1 minute for aging */ +#define L2FIB_AGE_SCAN_INTERVAL (60.0) + +/* MAC event scan delay is 100 msec unless specified by MAC event client */ +#define L2FIB_EVENT_SCAN_DELAY_DEFAULT (0.1) + +/* Max MACs in a event message is 100 unless specified by MAC event client */ +#define L2FIB_EVENT_MAX_MACS_DEFAULT (100) + +/* MAC event learn limit is 1000 unless specified by MAC event client */ +#define L2FIB_EVENT_LEARN_LIMIT_DEFAULT (1000) + typedef struct { @@ -36,6 +48,16 @@ typedef struct /* per swif vector of sequence number for interface based flush of MACs */ u8 *swif_seq_num; + /* last event or ager scan duration */ + f64 evt_scan_duration; + f64 age_scan_duration; + + /* delay between event scans, default to 100 msec */ + f64 event_scan_delay; + + /* max macs in evet message, default to 100 entries */ + u32 max_macs_in_event; + /* convenience variables */ vlib_main_t *vlib_main; vnet_main_t *vnet_main; @@ -89,12 +111,15 @@ typedef struct { struct { - u32 sw_if_index; /* output sw_if_index (L3 interface if bvi==1) */ + u32 sw_if_index; /* output sw_if_index (L3 intf if bvi==1) */ - u8 static_mac:1; /* static mac, no dataplane learning */ + u8 static_mac:1; /* static mac, no MAC move */ + u8 age_not:1; /* not subject to age */ u8 bvi:1; /* mac is for a bridged virtual interface */ u8 filter:1; /* drop packets to/from this mac */ - u8 unused1:5; + u8 lrn_evt:1; /* MAC learned to be sent in L2 MAC event */ + u8 unused:3; + u8 timestamp; /* timestamp for aging */ l2fib_seq_num_t sn; /* bd/int seq num */ } fields; @@ -348,11 +373,11 @@ void l2fib_clear_table (void); void l2fib_add_entry (u64 mac, u32 bd_index, - u32 sw_if_index, u32 static_mac, u32 drop_mac, u32 bvi_mac); + u32 sw_if_index, u8 static_mac, u8 drop_mac, u8 bvi_mac); static inline void -l2fib_add_fwd_entry (u64 mac, u32 bd_index, u32 sw_if_index, u32 static_mac, - u32 bvi_mac) +l2fib_add_fwd_entry (u64 mac, u32 bd_index, u32 sw_if_index, u8 static_mac, + u8 bvi_mac) { l2fib_add_entry (mac, bd_index, sw_if_index, static_mac, 0, bvi_mac); } diff --git a/src/vnet/l2/l2_fwd.c b/src/vnet/l2/l2_fwd.c index 8140728b..2bb7307c 100644 --- a/src/vnet/l2/l2_fwd.c +++ b/src/vnet/l2/l2_fwd.c @@ -141,8 +141,9 @@ l2fwd_process (vlib_main_t * vm, vnet_buffer (b0)->sw_if_index[VLIB_TX] = result0->fields.sw_if_index; *next0 = L2FWD_NEXT_L2_OUTPUT; int l2fib_seq_num_valid = 1; + /* check l2fib seq num for stale entries */ - if (!result0->fields.static_mac) + if (!result0->fields.age_not) { l2fib_seq_num_t in_sn = {.as_u16 = vnet_buffer (b0)->l2.l2fib_sn }; l2fib_seq_num_t expected_sn = { diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c index 65406292..47c036b0 100644 --- a/src/vnet/l2/l2_learn.c +++ b/src/vnet/l2/l2_learn.c @@ -123,11 +123,9 @@ l2learn_process (vlib_node_runtime_t * node, /* Check mac table lookup result */ if (PREDICT_TRUE (result0->fields.sw_if_index == sw_if_index0)) { - /* - * The entry was in the table, and the sw_if_index matched, the normal case - */ + /* Entry in L2FIB with matching sw_if_index matched - normal fast path */ counter_base[L2LEARN_ERROR_HIT] += 1; - int update = !result0->fields.static_mac && + int update = !result0->fields.age_not && /* static_mac always age_not */ (result0->fields.timestamp != timestamp || result0->fields.sn.as_u16 != vnet_buffer (b0)->l2.l2fib_sn); @@ -136,10 +134,10 @@ l2learn_process (vlib_node_runtime_t * node, } else if (result0->raw == ~0) { - /* The entry was not in table, so add it */ + /* Entry not in L2FIB - add it */ counter_base[L2LEARN_ERROR_MISS] += 1; - if (msm->global_learn_count == msm->global_learn_limit) + if (msm->global_learn_count >= msm->global_learn_limit) { /* * Global limit reached. Do not learn the mac but forward the packet. @@ -149,15 +147,22 @@ l2learn_process (vlib_node_runtime_t * node, return; } + /* Do not learn if mac is 0 */ + l2fib_entry_key_t key = *key0; + key.fields.bd_index = 0; + if (key.raw == 0) + return; + /* It is ok to learn */ msm->global_learn_count++; result0->raw = 0; /* clear all fields */ result0->fields.sw_if_index = sw_if_index0; + result0->fields.lrn_evt = (msm->client_pid != 0); cached_key->raw = ~0; /* invalidate the cache */ } else { - /* The entry was in the table, but with the wrong sw_if_index mapping (mac move) */ + /* Entry in L2FIB with different sw_if_index - mac move or filter */ if (result0->fields.filter) { ASSERT (result0->fields.sw_if_index == ~0); @@ -167,8 +172,6 @@ l2learn_process (vlib_node_runtime_t * node, return; } - counter_base[L2LEARN_ERROR_MAC_MOVE] += 1; - if (result0->fields.static_mac) { /* @@ -185,6 +188,13 @@ l2learn_process (vlib_node_runtime_t * node, * TODO: check global/bridge domain/interface learn limits */ result0->fields.sw_if_index = sw_if_index0; + if (result0->fields.age_not) /* The mac was provisioned */ + { + msm->global_learn_count++; + result0->fields.age_not = 0; + } + result0->fields.lrn_evt = (msm->client_pid != 0); + counter_base[L2LEARN_ERROR_MAC_MOVE] += 1; } /* Update the entry */ @@ -479,7 +489,7 @@ VLIB_NODE_FUNCTION_MULTIARCH (l2learn_node, l2learn_node_fn) * Set the default number of dynamically learned macs to the number * of buckets. */ - mp->global_learn_limit = L2FIB_NUM_BUCKETS * 16; + mp->global_learn_limit = L2LEARN_DEFAULT_LIMIT; return 0; } diff --git a/src/vnet/l2/l2_learn.h b/src/vnet/l2/l2_learn.h index 0d95de04..000ab59e 100644 --- a/src/vnet/l2/l2_learn.h +++ b/src/vnet/l2/l2_learn.h @@ -34,6 +34,10 @@ typedef struct /* maximum number of dynamically learned mac entries */ u32 global_learn_limit; + /* client waiting for L2 MAC events for learned and aged MACs */ + u32 client_pid; + u32 client_index; + /* Next nodes for each feature */ u32 feat_next_node_index[32]; @@ -42,6 +46,7 @@ typedef struct vnet_main_t *vnet_main; } l2learn_main_t; +#define L2LEARN_DEFAULT_LIMIT (L2FIB_NUM_BUCKETS * 16) l2learn_main_t l2learn_main; diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 520361f6..a57799cb 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -373,10 +373,19 @@ vl_api_l2_flags_t_print (vl_api_l2_flags_t * mp, void *handle) s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); -#define _(a,b) \ - if (flags & L2INPUT_FEAT_ ## a) s = format (s, #a " "); - foreach_l2input_feat; -#undef _ + if (flags & L2_LEARN) + s = format (s, "learn "); + if (flags & L2_FWD) + s = format (s, "forward "); + if (flags & L2_FLOOD) + s = format (s, "flood "); + if (flags & L2_UU_FLOOD) + s = format (s, "uu-flood "); + if (flags & L2_ARP_TERM) + s = format (s, "arp-term "); + + if (mp->is_set == 0) + s = format (s, "clear "); FINISH; } @@ -1783,6 +1792,21 @@ static void *vl_api_want_ip6_nd_events_t_print FINISH; } +static void *vl_api_want_l2_macs_events_t_print + (vl_api_want_l2_macs_events_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: want_l2_macs_events "); + s = format (s, "learn-limit %d ", ntohl (mp->learn_limit)); + s = format (s, "scan-delay %d ", (u32) mp->scan_delay); + s = format (s, "max-entries %d ", (u32) mp->max_macs_in_event * 10); + if (mp->enable_disable == 0) + s = format (s, "disable"); + + FINISH; +} + static void *vl_api_input_acl_set_interface_t_print (vl_api_input_acl_set_interface_t * mp, void *handle) { @@ -3066,6 +3090,7 @@ _(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \ _(INTERFACE_NAME_RENUMBER, interface_name_renumber) \ _(WANT_IP4_ARP_EVENTS, want_ip4_arp_events) \ _(WANT_IP6_ND_EVENTS, want_ip6_nd_events) \ +_(WANT_L2_MACS_EVENTS, want_l2_macs_events) \ _(INPUT_ACL_SET_INTERFACE, input_acl_set_interface) \ _(IP_ADDRESS_DUMP, ip_address_dump) \ _(IP_DUMP, ip_dump) \ -- cgit 1.2.3-korg From a07bd708002a9c3d3c584f0d692deed1a758b517 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Mon, 7 Aug 2017 07:53:49 -0700 Subject: Dedicated SW Interface Event Change-Id: I06a10a4291e61aec3f1396d2514ed6fe3901897a Signed-off-by: Neale Ranns Signed-off-by: Marek Gradzki --- src/vat/api_format.c | 17 ++++++----------- src/vnet/devices/virtio/vhost_user_api.c | 6 +++--- src/vnet/interface.api | 19 ++++++++++++++++--- src/vnet/interface_api.c | 8 ++++---- src/vnet/unix/tap_api.c | 6 +++--- .../CallbackJVppFacadeNotificationExample.java | 2 +- .../examples/CallbackNotificationApiExample.java | 18 ++++++------------ .../core/examples/FutureApiNotificationExample.java | 2 +- .../fd/vpp/jvpp/core/examples/NotificationUtils.java | 5 ++--- src/vpp-api/java/jvpp/gen/jvppgen/util.py | 3 +-- src/vpp/api/custom_dump.c | 20 ++++++++++++++++++++ test/vpp_papi_provider.py | 9 ++------- 12 files changed, 65 insertions(+), 50 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 009cf173..ddcd5621 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -972,8 +972,8 @@ static void vl_api_sw_interface_details_t_handler_json } #if VPP_API_TEST_BUILTIN == 0 -static void vl_api_sw_interface_set_flags_t_handler - (vl_api_sw_interface_set_flags_t * mp) +static void vl_api_sw_interface_event_t_handler + (vl_api_sw_interface_event_t * mp) { vat_main_t *vam = &vat_main; if (vam->interface_event_display) @@ -984,8 +984,8 @@ static void vl_api_sw_interface_set_flags_t_handler } #endif -static void vl_api_sw_interface_set_flags_t_handler_json - (vl_api_sw_interface_set_flags_t * mp) +static void vl_api_sw_interface_event_t_handler_json + (vl_api_sw_interface_event_t * mp) { /* JSON output not supported */ } @@ -5026,7 +5026,7 @@ _(LLDP_CONFIG_REPLY, lldp_config_reply) \ _(SW_INTERFACE_SET_LLDP_REPLY, sw_interface_set_lldp_reply) #define foreach_standalone_reply_msg \ -_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ +_(SW_INTERFACE_EVENT, sw_interface_event) \ _(VNET_INTERFACE_SIMPLE_COUNTERS, vnet_interface_simple_counters) \ _(VNET_INTERFACE_COMBINED_COUNTERS, vnet_interface_combined_counters) \ _(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ @@ -5772,7 +5772,7 @@ api_sw_interface_set_flags (vat_main_t * vam) vl_api_sw_interface_set_flags_t *mp; u32 sw_if_index; u8 sw_if_index_set = 0; - u8 admin_up = 0, link_up = 0; + u8 admin_up = 0; int ret; /* Parse args required to build the message */ @@ -5782,10 +5782,6 @@ api_sw_interface_set_flags (vat_main_t * vam) admin_up = 1; else if (unformat (i, "admin-down")) admin_up = 0; - else if (unformat (i, "link-up")) - link_up = 1; - else if (unformat (i, "link-down")) - link_up = 0; else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) sw_if_index_set = 1; @@ -5805,7 +5801,6 @@ api_sw_interface_set_flags (vat_main_t * vam) M (SW_INTERFACE_SET_FLAGS, mp); mp->sw_if_index = ntohl (sw_if_index); mp->admin_up_down = admin_up; - mp->link_up_down = link_up; /* send it... */ S (mp); diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c index 8dbd032b..3f0aac9e 100644 --- a/src/vnet/devices/virtio/vhost_user_api.c +++ b/src/vnet/devices/virtio/vhost_user_api.c @@ -52,11 +52,11 @@ _(SW_INTERFACE_VHOST_USER_DUMP, sw_interface_vhost_user_dump) * WARNING: replicated pending api refactor completion */ static void -send_sw_interface_flags_deleted (vpe_api_main_t * am, +send_sw_interface_event_deleted (vpe_api_main_t * am, unix_shared_memory_queue_t * q, u32 sw_if_index) { - vl_api_sw_interface_set_flags_t *mp; + vl_api_sw_interface_event_t *mp; mp = vl_msg_api_alloc (sizeof (*mp)); memset (mp, 0, sizeof (*mp)); @@ -143,7 +143,7 @@ vl_api_delete_vhost_user_if_t_handler (vl_api_delete_vhost_user_if_t * mp) return; vnet_clear_sw_interface_tag (vnm, sw_if_index); - send_sw_interface_flags_deleted (vam, q, sw_if_index); + send_sw_interface_event_deleted (vam, q, sw_if_index); } } diff --git a/src/vnet/interface.api b/src/vnet/interface.api index 14ff6d5a..a1890706 100644 --- a/src/vnet/interface.api +++ b/src/vnet/interface.api @@ -4,7 +4,6 @@ @param sw_if_index - index of the interface to set flags on @param admin_up_down - set the admin state, 1 = up, 0 = down @param link_up_down - Oper state sent on change event, not used in config. - @param deleted - interface was deleted */ autoreply define sw_interface_set_flags { @@ -13,8 +12,6 @@ autoreply define sw_interface_set_flags u32 sw_if_index; /* 1 = up, 0 = down */ u8 admin_up_down; - u8 link_up_down; - u8 deleted; }; /** \brief Set interface MTU @@ -31,6 +28,22 @@ autoreply define sw_interface_set_mtu u16 mtu; }; +/** \brief Interface Event generated by want_interface_events + @param context - sender context, to match reply w/ request + @param sw_if_index - index of the interface of the event + @param admin_up_down - The administrative state; 1 = up, 0 = down + @param link_up_down - The operational state; 1 = up, 0 = down + @param deleted - interface was deleted +*/ +define sw_interface_event +{ + u32 context; + u32 sw_if_index; + u8 admin_up_down; + u8 link_up_down; + u8 deleted; +}; + /** \brief Register for interface events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index ab0b255a..c56fef68 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -571,18 +571,18 @@ event_data_cmp (void *a1, void *a2) } static void -send_sw_interface_flags (vpe_api_main_t * am, +send_sw_interface_event (vpe_api_main_t * am, unix_shared_memory_queue_t * q, vnet_sw_interface_t * swif) { - vl_api_sw_interface_set_flags_t *mp; + vl_api_sw_interface_event_t *mp; vnet_main_t *vnm = am->vnet_main; vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, swif->sw_if_index); mp = vl_msg_api_alloc (sizeof (*mp)); memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_FLAGS); + mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_EVENT); mp->sw_if_index = ntohl (swif->sw_if_index); mp->admin_up_down = (swif->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? 1 : 0; @@ -638,7 +638,7 @@ link_state_process (vlib_main_t * vm, event_data[i])) { swif = vnet_get_sw_interface (vnm, event_data[i]); - send_sw_interface_flags (vam, q, swif); + send_sw_interface_event (vam, q, swif); } } })); diff --git a/src/vnet/unix/tap_api.c b/src/vnet/unix/tap_api.c index 9b8d52a6..7e812c4f 100644 --- a/src/vnet/unix/tap_api.c +++ b/src/vnet/unix/tap_api.c @@ -59,11 +59,11 @@ _(SW_INTERFACE_TAP_DUMP, sw_interface_tap_dump) * WARNING: replicated pending api refactor completion */ static void -send_sw_interface_flags_deleted (vpe_api_main_t * am, +send_sw_interface_event_deleted (vpe_api_main_t * am, unix_shared_memory_queue_t * q, u32 sw_if_index) { - vl_api_sw_interface_set_flags_t *mp; + vl_api_sw_interface_event_t *mp; mp = vl_msg_api_alloc (sizeof (*mp)); memset (mp, 0, sizeof (*mp)); @@ -196,7 +196,7 @@ vl_api_tap_delete_t_handler (vl_api_tap_delete_t * mp) vl_msg_api_send_shmem (q, (u8 *) & rmp); if (!rv) - send_sw_interface_flags_deleted (vam, q, sw_if_index); + send_sw_interface_event_deleted (vam, q, sw_if_index); } static void diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeNotificationExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeNotificationExample.java index b8b108b6..308dad9f 100644 --- a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeNotificationExample.java +++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackJVppFacadeNotificationExample.java @@ -36,7 +36,7 @@ public class CallbackJVppFacadeNotificationExample { System.out.println("Successfully connected to VPP"); final AutoCloseable notificationListenerReg = - jvppCallbackFacade.getNotificationRegistry().registerSwInterfaceSetFlagsNotificationCallback( + jvppCallbackFacade.getNotificationRegistry().registerSwInterfaceEventNotificationCallback( NotificationUtils::printNotification ); diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackNotificationApiExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackNotificationApiExample.java index 6ee2de31..7d56b7ea 100644 --- a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackNotificationApiExample.java +++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackNotificationApiExample.java @@ -26,10 +26,9 @@ import io.fd.vpp.jvpp.JVppRegistry; import io.fd.vpp.jvpp.JVppRegistryImpl; import io.fd.vpp.jvpp.VppCallbackException; import io.fd.vpp.jvpp.core.JVppCoreImpl; -import io.fd.vpp.jvpp.core.callback.SwInterfaceSetFlagsCallback; -import io.fd.vpp.jvpp.core.callback.SwInterfaceSetFlagsNotificationCallback; +import io.fd.vpp.jvpp.core.callback.SwInterfaceEventNotificationCallback; import io.fd.vpp.jvpp.core.callback.WantInterfaceEventsCallback; -import io.fd.vpp.jvpp.core.dto.SwInterfaceSetFlagsNotification; +import io.fd.vpp.jvpp.core.dto.SwInterfaceEventNotification; import io.fd.vpp.jvpp.core.dto.SwInterfaceSetFlagsReply; import io.fd.vpp.jvpp.core.dto.WantInterfaceEventsReply; @@ -65,12 +64,12 @@ public class CallbackNotificationApiExample { testCallbackApi(); } - private static class TestCallback implements SwInterfaceSetFlagsNotificationCallback, - WantInterfaceEventsCallback, SwInterfaceSetFlagsCallback { + private static class TestCallback implements SwInterfaceEventNotificationCallback, + WantInterfaceEventsCallback { @Override - public void onSwInterfaceSetFlagsNotification( - final SwInterfaceSetFlagsNotification msg) { + public void onSwInterfaceEventNotification( + final SwInterfaceEventNotification msg) { printNotification(msg); } @@ -79,11 +78,6 @@ public class CallbackNotificationApiExample { System.out.println("Interface notification stream updated"); } - @Override - public void onSwInterfaceSetFlagsReply(final SwInterfaceSetFlagsReply swInterfaceSetFlagsReply) { - System.out.println("Interface flags set successfully"); - } - @Override public void onError(VppCallbackException ex) { System.out.printf("Received onError exception in getNodeIndexCallback: call=%s, reply=%d, context=%d%n", diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiNotificationExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiNotificationExample.java index f445dcc8..7460401e 100644 --- a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiNotificationExample.java +++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiNotificationExample.java @@ -33,7 +33,7 @@ public class FutureApiNotificationExample { final FutureJVppCoreFacade jvppFacade = new FutureJVppCoreFacade(registry, new JVppCoreImpl()); final AutoCloseable notificationListenerReg = jvppFacade.getNotificationRegistry() - .registerSwInterfaceSetFlagsNotificationCallback(NotificationUtils::printNotification)) { + .registerSwInterfaceEventNotificationCallback(NotificationUtils::printNotification)) { System.out.println("Successfully connected to VPP"); jvppFacade.wantInterfaceEvents(getEnableInterfaceNotificationsReq()).toCompletableFuture().get(); System.out.println("Interface events started"); diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/NotificationUtils.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/NotificationUtils.java index 7791cafe..d3f9dd2c 100644 --- a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/NotificationUtils.java +++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/NotificationUtils.java @@ -18,14 +18,14 @@ package io.fd.vpp.jvpp.core.examples; import java.io.PrintStream; import io.fd.vpp.jvpp.core.dto.SwInterfaceSetFlags; -import io.fd.vpp.jvpp.core.dto.SwInterfaceSetFlagsNotification; +import io.fd.vpp.jvpp.core.dto.SwInterfaceEventNotification; import io.fd.vpp.jvpp.core.dto.WantInterfaceEvents; final class NotificationUtils { private NotificationUtils() {} - static PrintStream printNotification(final SwInterfaceSetFlagsNotification msg) { + static PrintStream printNotification(final SwInterfaceEventNotification msg) { return System.out.printf("Received interface notification: ifc: %s%n", msg); } @@ -33,7 +33,6 @@ final class NotificationUtils { final SwInterfaceSetFlags swInterfaceSetFlags = new SwInterfaceSetFlags(); swInterfaceSetFlags.swIfIndex = 0; swInterfaceSetFlags.adminUpDown = 1; - swInterfaceSetFlags.deleted = 0; return swInterfaceSetFlags; } diff --git a/src/vpp-api/java/jvpp/gen/jvppgen/util.py b/src/vpp-api/java/jvpp/gen/jvppgen/util.py index 947fc31d..42394419 100644 --- a/src/vpp-api/java/jvpp/gen/jvppgen/util.py +++ b/src/vpp-api/java/jvpp/gen/jvppgen/util.py @@ -161,7 +161,6 @@ unconventional_naming_rep_req = { # # FIXME no convention in the naming of events (notifications) in vpe.api notifications_message_suffixes = ("event", "counters") -notification_messages_reused = ["sw_interface_set_flags"] # messages that must be ignored. These messages are INSUFFICIENTLY marked as disabled in vpe.api # FIXME @@ -170,7 +169,7 @@ ignored_messages = [] def is_notification(name): """ Returns true if the structure is a notification regardless of its no other use """ - return is_just_notification(name) or name.lower() in notification_messages_reused + return is_just_notification(name) def is_just_notification(name): diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index a57799cb..0342476a 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -103,6 +103,22 @@ static void *vl_api_sw_interface_set_flags_t_print s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + if (mp->admin_up_down) + s = format (s, "admin-up "); + else + s = format (s, "admin-down "); + + FINISH; +} + +static void *vl_api_sw_interface_event_t_print + (vl_api_sw_interface_event_t * mp, void *handle) +{ + u8 *s; + s = format (0, "SCRIPT: sw_interface_event "); + + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + if (mp->admin_up_down) s = format (s, "admin-up "); else @@ -113,6 +129,9 @@ static void *vl_api_sw_interface_set_flags_t_print else s = format (s, "link-down"); + if (mp->deleted) + s = format (s, " deleted"); + FINISH; } @@ -3010,6 +3029,7 @@ foreach_custom_print_no_arg_function _(CREATE_LOOPBACK, create_loopback) \ _(CREATE_LOOPBACK_INSTANCE, create_loopback_instance) \ _(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \ +_(SW_INTERFACE_EVENT, sw_interface_event) \ _(SW_INTERFACE_ADD_DEL_ADDRESS, sw_interface_add_del_address) \ _(SW_INTERFACE_SET_TABLE, sw_interface_set_table) \ _(SW_INTERFACE_SET_MPLS_ENABLE, sw_interface_set_mpls_enable) \ diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 4d017c1f..c99d4583 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -550,21 +550,16 @@ class VppPapiProvider(object): 'tag1': tag1, 'tag2': tag2}) - def sw_interface_set_flags(self, sw_if_index, admin_up_down, - link_up_down=0, deleted=0): + def sw_interface_set_flags(self, sw_if_index, admin_up_down): """ :param admin_up_down: :param sw_if_index: - :param link_up_down: (Default value = 0) - :param deleted: (Default value = 0) """ return self.api(self.papi.sw_interface_set_flags, {'sw_if_index': sw_if_index, - 'admin_up_down': admin_up_down, - 'link_up_down': link_up_down, - 'deleted': deleted}) + 'admin_up_down': admin_up_down}) def create_subif(self, sw_if_index, sub_id, outer_vlan, inner_vlan, no_tags=0, one_tag=0, two_tags=0, dot1ad=0, exact_match=0, -- cgit 1.2.3-korg From 3bbcfab119483ef07543242df2c4bb9b4c82b9ac Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 15 Aug 2017 19:03:44 -0400 Subject: TCP source address automation - v6 support - Non-default VRF ID collection - Break up ip source address list into CLI + API-friendly functions - Automate proxy arp / proxy nd configuration - Automate local adjacency insertion - Binary API support Change-Id: Iede31184f65cc1ec8c414447d2d60a1334e3fe15 Signed-off-by: Dave Barach --- src/vat/api_format.c | 76 ++++++++++++++- src/vnet.am | 3 + src/vnet/tcp/tcp.api | 42 +++++++++ src/vnet/tcp/tcp.c | 230 +++++++++++++++++++++++++++++++++++++++++++--- src/vnet/tcp/tcp.h | 8 +- src/vnet/tcp/tcp_api.c | 119 ++++++++++++++++++++++++ src/vnet/vnet_all_api_h.h | 1 + src/vpp/api/custom_dump.c | 24 ++++- 8 files changed, 486 insertions(+), 17 deletions(-) create mode 100644 src/vnet/tcp/tcp.api create mode 100644 src/vnet/tcp/tcp_api.c (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 9381ec5d..43d1eb3d 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -4733,7 +4733,8 @@ _(sw_interface_set_mtu_reply) \ _(p2p_ethernet_add_reply) \ _(p2p_ethernet_del_reply) \ _(lldp_config_reply) \ -_(sw_interface_set_lldp_reply) +_(sw_interface_set_lldp_reply) \ +_(tcp_configure_src_addresses_reply) #define _(n) \ static void vl_api_##n##_t_handler \ @@ -5027,7 +5028,8 @@ _(SW_INTERFACE_GET_TABLE_REPLY, sw_interface_get_table_reply) \ _(P2P_ETHERNET_ADD_REPLY, p2p_ethernet_add_reply) \ _(P2P_ETHERNET_DEL_REPLY, p2p_ethernet_del_reply) \ _(LLDP_CONFIG_REPLY, lldp_config_reply) \ -_(SW_INTERFACE_SET_LLDP_REPLY, sw_interface_set_lldp_reply) +_(SW_INTERFACE_SET_LLDP_REPLY, sw_interface_set_lldp_reply) \ +_(TCP_CONFIGURE_SRC_ADDRESSES_REPLY, tcp_configure_src_addresses_reply) #define foreach_standalone_reply_msg \ _(SW_INTERFACE_EVENT, sw_interface_event) \ @@ -19694,6 +19696,73 @@ api_sw_interface_set_lldp (vat_main_t * vam) return ret; } +static int +api_tcp_configure_src_addresses (vat_main_t * vam) +{ + vl_api_tcp_configure_src_addresses_t *mp; + unformat_input_t *i = vam->input; + ip4_address_t v4first, v4last; + ip6_address_t v6first, v6last; + u8 range_set = 0; + u32 vrf_id = 0; + int ret; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U - %U", + unformat_ip4_address, &v4first, + unformat_ip4_address, &v4last)) + { + if (range_set) + { + errmsg ("one range per message (range already set)"); + return -99; + } + range_set = 1; + } + else if (unformat (i, "%U - %U", + unformat_ip6_address, &v6first, + unformat_ip6_address, &v6last)) + { + if (range_set) + { + errmsg ("one range per message (range already set)"); + return -99; + } + range_set = 2; + } + else if (unformat (i, "vrf %d", &vrf_id)) + ; + else + break; + } + + if (range_set == 0) + { + errmsg ("address range not set"); + return -99; + } + + M (TCP_CONFIGURE_SRC_ADDRESSES, mp); + mp->vrf_id = ntohl (vrf_id); + /* ipv6? */ + if (range_set == 2) + { + mp->is_ipv6 = 1; + clib_memcpy (mp->first_address, &v6first, sizeof (v6first)); + clib_memcpy (mp->last_address, &v6last, sizeof (v6last)); + } + else + { + mp->is_ipv6 = 0; + clib_memcpy (mp->first_address, &v4first, sizeof (v4first)); + clib_memcpy (mp->last_address, &v4last, sizeof (v4last)); + } + S (mp); + W (ret); + return ret; +} + static int q_or_quit (vat_main_t * vam) { @@ -20467,7 +20536,8 @@ _(sw_interface_get_table, " | sw_if_index [ipv6]") \ _(p2p_ethernet_add, " | sw_if_index remote_mac sub_id ") \ _(p2p_ethernet_del, " | sw_if_index remote_mac ") \ _(lldp_config, "system-name tx-hold tx-interval ") \ -_(sw_interface_set_lldp, " | sw_if_index [port-desc ] [disable]") +_(sw_interface_set_lldp, " | sw_if_index [port-desc ] [disable]") \ +_(tcp_configure_src_addresses, "first-last [vrf ]") /* List of command functions, CLI names map directly to functions */ #define foreach_cli_function \ diff --git a/src/vnet.am b/src/vnet.am index ede0376d..9821069a 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -466,6 +466,7 @@ endif # Layer 4 protocol: tcp ######################################## libvnet_la_SOURCES += \ + vnet/tcp/tcp_api.c \ vnet/tcp/tcp_format.c \ vnet/tcp/tcp_pg.c \ vnet/tcp/tcp_syn_filter4.c \ @@ -485,6 +486,8 @@ nobase_include_HEADERS += \ vnet/tcp/tcp_debug.h \ vnet/tcp/tcp.h +API_FILES += vnet/tcp/tcp.api + ######################################## # Layer 4 protocol: udp ######################################## diff --git a/src/vnet/tcp/tcp.api b/src/vnet/tcp/tcp.api new file mode 100644 index 00000000..093a5a89 --- /dev/null +++ b/src/vnet/tcp/tcp.api @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief Configure TCP source addresses, for active-open TCP sessions + + TCP src/dst ports are 16 bits, with the low-order 1024 ports + reserved. So, it's necessary to provide a considerable number of + source IP addresses if one wishes to initiate a large number of + connections. + + Each of those addresses needs to have a receive adjacency - + either a /32 or a /128 - and vpp needs to answer (proxy) arps or + neighbor discovery requests for the addresses. + + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_ipv6 - 1 for ipv6, 0 for ipv4 + @param vrf_id - fib table / vrf id for local adjacencies + @param first_address - first address that TCP will use + @param last_address - last address that TCP will use +*/ +autoreply define tcp_configure_src_addresses { + u32 client_index; + u32 context; + u8 is_ipv6; + u32 vrf_id; + u8 first_address[16]; + u8 last_address[16]; + }; + diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index d1690022..1d10f9bb 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -13,10 +13,17 @@ * limitations under the License. */ +/** + * @file + * @brief TCP host stack utilities + */ + #include #include #include #include +#include +#include #include tcp_main_t tcp_main; @@ -1347,6 +1354,7 @@ tcp_init (vlib_main_t * vm) { tcp_main_t *tm = vnet_get_tcp_main (); tm->is_enabled = 0; + tcp_api_reference (); return 0; } @@ -1375,15 +1383,191 @@ tcp_config_fn (vlib_main_t * vm, unformat_input_t * input) VLIB_CONFIG_FUNCTION (tcp_config_fn, "tcp"); + +/** + * \brief Configure an ipv4 source address range + * @param vm vlib_main_t pointer + * @param start first ipv4 address in the source address range + * @param end last ipv4 address in the source address range + * @param table_id VRF / table ID, 0 for the default FIB + * @return 0 if all OK, else an error indication from api_errno.h + */ + +int +tcp_configure_v4_source_address_range (vlib_main_t * vm, + ip4_address_t * start, + ip4_address_t * end, u32 table_id) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vnet_main_t *vnm = vnet_get_main (); + u32 start_host_byte_order, end_host_byte_order; + fib_prefix_t prefix; + vnet_sw_interface_t *si; + fib_node_index_t fei; + u32 fib_index = 0; + u32 sw_if_index; + int rv; + int vnet_proxy_arp_add_del (ip4_address_t * lo_addr, + ip4_address_t * hi_addr, u32 fib_index, + int is_del); + + memset (&prefix, 0, sizeof (prefix)); + + fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id); + + if (fib_index == ~0) + return VNET_API_ERROR_NO_SUCH_FIB; + + start_host_byte_order = clib_net_to_host_u32 (start->as_u32); + end_host_byte_order = clib_net_to_host_u32 (end->as_u32); + + /* sanity check for reversed args or some such */ + if ((end_host_byte_order - start_host_byte_order) > (10 << 10)) + return VNET_API_ERROR_INVALID_ARGUMENT; + + /* Lookup the last address, to identify the interface involved */ + prefix.fp_len = 32; + prefix.fp_proto = FIB_PROTOCOL_IP4; + memcpy (&prefix.fp_addr.ip4, end, sizeof (ip4_address_t)); + + fei = fib_table_lookup (fib_index, &prefix); + + /* Couldn't find route to destination. Bail out. */ + if (fei == FIB_NODE_INDEX_INVALID) + return VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB; + + sw_if_index = fib_entry_get_resolving_interface (fei); + + /* Enable proxy arp on the interface */ + si = vnet_get_sw_interface (vnm, sw_if_index); + si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP; + + /* Configure proxy arp across the range */ + rv = vnet_proxy_arp_add_del (start, end, fib_index, 0 /* is_del */ ); + + if (rv) + return rv; + + do + { + dpo_id_t dpo = DPO_INVALID; + + vec_add1 (tm->ip4_src_addresses, start[0]); + + /* Add local adjacencies for the range */ + + receive_dpo_add_or_lock (DPO_PROTO_IP4, ~0 /* sw_if_index */ , + NULL, &dpo); + prefix.fp_len = 32; + prefix.fp_proto = FIB_PROTOCOL_IP4; + prefix.fp_addr.ip4.as_u32 = start->as_u32; + + fib_table_entry_special_dpo_update (fib_index, + &prefix, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); + dpo_reset (&dpo); + + start_host_byte_order++; + start->as_u32 = clib_host_to_net_u32 (start_host_byte_order); + } + while (start_host_byte_order <= end_host_byte_order); + + return 0; +} + +/** + * \brief Configure an ipv6 source address range + * @param vm vlib_main_t pointer + * @param start first ipv6 address in the source address range + * @param end last ipv6 address in the source address range + * @param table_id VRF / table ID, 0 for the default FIB + * @return 0 if all OK, else an error indication from api_errno.h + */ + +int +tcp_configure_v6_source_address_range (vlib_main_t * vm, + ip6_address_t * start, + ip6_address_t * end, u32 table_id) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + fib_prefix_t prefix; + u32 fib_index = 0; + fib_node_index_t fei; + u32 sw_if_index; + + memset (&prefix, 0, sizeof (prefix)); + + fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id); + + if (fib_index == ~0) + return VNET_API_ERROR_NO_SUCH_FIB; + + while (1) + { + int i; + ip6_address_t tmp; + dpo_id_t dpo = DPO_INVALID; + + /* Remember this address */ + vec_add1 (tm->ip6_src_addresses, start[0]); + + /* Lookup the prefix, to identify the interface involved */ + prefix.fp_len = 128; + prefix.fp_proto = FIB_PROTOCOL_IP6; + memcpy (&prefix.fp_addr.ip6, start, sizeof (ip6_address_t)); + + fei = fib_table_lookup (fib_index, &prefix); + + /* Couldn't find route to destination. Bail out. */ + if (fei == FIB_NODE_INDEX_INVALID) + return VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB; + + sw_if_index = fib_entry_get_resolving_interface (fei); + + if (sw_if_index == (u32) ~ 0) + return VNET_API_ERROR_NO_MATCHING_INTERFACE; + + /* Add a proxy neighbor discovery entry for this address */ + ip6_neighbor_proxy_add_del (sw_if_index, start, 0 /* is_del */ ); + + /* Add a receive adjacency for this address */ + receive_dpo_add_or_lock (DPO_PROTO_IP6, ~0 /* sw_if_index */ , + NULL, &dpo); + + fib_table_entry_special_dpo_update (fib_index, + &prefix, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); + dpo_reset (&dpo); + + /* Done with the entire range? */ + if (!memcmp (start, end, sizeof (start[0]))) + break; + + /* Increment the address. DGMS. */ + tmp = start[0]; + for (i = 15; i >= 0; i--) + { + tmp.as_u8[i] += 1; + if (tmp.as_u8[i] != 0) + break; + } + start[0] = tmp; + } + return 0; +} + static clib_error_t * tcp_src_address (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd_arg) { - tcp_main_t *tm = vnet_get_tcp_main (); ip4_address_t v4start, v4end; ip6_address_t v6start, v6end; + u32 table_id = 0; int v4set = 0; int v6set = 0; + int rv; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -1396,13 +1580,15 @@ tcp_src_address (vlib_main_t * vm, v4set = 1; } else if (unformat (input, "%U - %U", unformat_ip6_address, &v6start, - unformat_ip4_address, &v6end)) + unformat_ip6_address, &v6end)) v6set = 1; else if (unformat (input, "%U", unformat_ip6_address, &v6start)) { memcpy (&v6end, &v6start, sizeof (v4start)); v6set = 1; } + else if (unformat (input, "fib-table %d", &table_id)) + ; else break; } @@ -1412,21 +1598,41 @@ tcp_src_address (vlib_main_t * vm, if (v4set) { - u32 tmp; - - do + rv = tcp_configure_v4_source_address_range (vm, &v4start, &v4end, + table_id); + switch (rv) { - vec_add1 (tm->ip4_src_addresses, v4start); - tmp = clib_net_to_host_u32 (v4start.as_u32); - tmp++; - v4start.as_u32 = clib_host_to_net_u32 (tmp); + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "Invalid table-id %d", table_id); + + case VNET_API_ERROR_INVALID_ARGUMENT: + return clib_error_return (0, "Invalid address range %U - %U", + format_ip4_address, &v4start, + format_ip4_address, &v4end); + default: + return clib_error_return (0, "error %d", rv); + break; } - while (clib_host_to_net_u32 (v4start.as_u32) <= - clib_host_to_net_u32 (v4end.as_u32)); } if (v6set) { - clib_warning ("v6 src address list unimplemented..."); + rv = tcp_configure_v6_source_address_range (vm, &v6start, &v6end, + table_id); + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "Invalid table-id %d", table_id); + + default: + return clib_error_return (0, "error %d", rv); + break; + } } return 0; } diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 8010b446..097cc8cf 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -465,7 +465,13 @@ void tcp_connection_del (tcp_connection_t * tc); int tcp_half_open_connection_cleanup (tcp_connection_t * tc); tcp_connection_t *tcp_connection_new (u8 thread_index); void tcp_connection_reset (tcp_connection_t * tc); - +int tcp_configure_v4_source_address_range (vlib_main_t * vm, + ip4_address_t * start, + ip4_address_t * end, u32 table_id); +int tcp_configure_v6_source_address_range (vlib_main_t * vm, + ip6_address_t * start, + ip6_address_t * end, u32 table_id); +void tcp_api_reference (void); u8 *format_tcp_connection_id (u8 * s, va_list * args); u8 *format_tcp_connection (u8 * s, va_list * args); u8 *format_tcp_scoreboard (u8 * s, va_list * args); diff --git a/src/vnet/tcp/tcp_api.c b/src/vnet/tcp/tcp_api.c new file mode 100644 index 00000000..4c3e49ee --- /dev/null +++ b/src/vnet/tcp/tcp_api.c @@ -0,0 +1,119 @@ +/* + *------------------------------------------------------------------ + * tcp_api.c - vnet tcp-layer apis + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_tcp_api_msg \ +_(TCP_CONFIGURE_SRC_ADDRESSES, tcp_configure_src_addresses) + +static void + vl_api_tcp_configure_src_addresses_t_handler + (vl_api_tcp_configure_src_addresses_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_tcp_configure_src_addresses_reply_t *rmp; + u32 vrf_id; + int rv; + + vrf_id = clib_net_to_host_u32 (mp->vrf_id); + + if (mp->is_ipv6) + rv = tcp_configure_v6_source_address_range + (vm, + (ip6_address_t *) mp->first_address, + (ip6_address_t *) mp->last_address, vrf_id); + else + rv = tcp_configure_v4_source_address_range + (vm, + (ip4_address_t *) mp->first_address, + (ip4_address_t *) mp->last_address, vrf_id); + + REPLY_MACRO (VL_API_TCP_CONFIGURE_SRC_ADDRESSES_REPLY); +} + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_tcp; +#undef _ +} + +static clib_error_t * +tcp_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_tcp_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (tcp_api_hookup); + +void +tcp_api_reference (void) +{ +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index c1eff61f..0b225340 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -58,6 +58,7 @@ #include #include #include +#include /* * fd.io coding-style-patch-verification: ON diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 0342476a..1353fe28 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -3007,6 +3007,27 @@ static void *vl_api_p2p_ethernet_del_t_print FINISH; } +static void *vl_api_tcp_configure_src_addresses_t_print + (vl_api_tcp_configure_src_addresses_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: tcp_configure_src_addresses "); + if (mp->is_ipv6) + s = format (s, "%U - %U ", + format_ip6_address, (ip6_address_t *) mp->first_address, + format_ip6_address, (ip6_address_t *) mp->last_address); + else + s = format (s, "%U - %U ", + format_ip4_address, (ip4_address_t *) mp->first_address, + format_ip4_address, (ip4_address_t *) mp->last_address); + + if (mp->vrf_id) + s = format (s, "vrf %d ", ntohl (mp->vrf_id)); + + FINISH; +} + #define foreach_custom_print_no_arg_function \ _(lisp_eid_table_vni_dump) \ _(lisp_map_resolver_dump) \ @@ -3191,7 +3212,8 @@ _(FEATURE_ENABLE_DISABLE, feature_enable_disable) \ _(SW_INTERFACE_TAG_ADD_DEL, sw_interface_tag_add_del) \ _(SW_INTERFACE_SET_MTU, sw_interface_set_mtu) \ _(P2P_ETHERNET_ADD, p2p_ethernet_add) \ -_(P2P_ETHERNET_DEL, p2p_ethernet_del) +_(P2P_ETHERNET_DEL, p2p_ethernet_del) \ +_(TCP_CONFIGURE_SRC_ADDRESSES, tcp_configure_src_addresses) void vl_msg_api_custom_dump_configure (api_main_t * am) { -- cgit 1.2.3-korg From f7a55ad74c90928d86f1bbf56590d9571c1b828f Mon Sep 17 00:00:00 2001 From: Ole Troan Date: Tue, 16 May 2017 14:59:29 +0200 Subject: PUNT socket: External control plane processes connected via UNIX domain sockets. An external (to VPP) process can register (over the VPP binary API) to receive control plane packets over a UNIX domain socket. The packets are prepended with a packet descriptor containing meta-data (if_index of interface, etc). Currently only UDP is supported. The socket supports sending of packets/frames as well. The sent packet is prepended with a descriptor, telling VPP to route the packet (via ip4-lookup, ip6-lookup) or as an pre-formed Ethernet frame that is sent directly to interface-output. The intended use case for this is for an external DHCP client or a RIP implementation. New configuration option: punt { socket } To register use the punt_socket API message. TODO: - Add support for pre-routing. I.e send L3 packet to given TX interface, but do ARP/ND (ip[46]-rewrite) - Add test scripts - Support for abstract names (starting with \0) - Add rate limiting (COP) - Support for other protocols, e.g. IPv6 ND Change-Id: I4a0afc8020deebb3d9d74686dde694ee5bcb8d0f Signed-off-by: Ole Troan --- src/vnet/ip/punt.c | 506 ++++++++++++++++++++++++++++++++++++++++++++- src/vnet/ip/punt.h | 52 ++++- src/vnet/ip/punt_error.def | 8 + src/vpp/api/api.c | 64 ++++++ src/vpp/api/vpe.api | 33 +++ 5 files changed, 652 insertions(+), 11 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c index 48558401..92f76b8d 100644 --- a/src/vnet/ip/punt.c +++ b/src/vnet/ip/punt.c @@ -17,16 +17,27 @@ * @file * @brief Local TCP/IP stack punt infrastructure. * - * Provides a set of VPP nodes togather with the relevant APIs and CLI + * Provides a set of VPP nodes together with the relevant APIs and CLI * commands in order to adjust and dispatch packets from the VPP data plane * to the local TCP/IP stack */ + +#include #include #include #include #include +#include +#include + +#include +#include +#include +#include +#include +#include -#define foreach_punt_next \ +#define foreach_punt_next \ _ (PUNT, "error-punt") typedef enum @@ -37,8 +48,28 @@ typedef enum PUNT_N_NEXT, } punt_next_t; +enum punt_socket_rx_next_e +{ + PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT, + PUNT_SOCKET_RX_NEXT_IP4_LOOKUP, + PUNT_SOCKET_RX_NEXT_IP6_LOOKUP, + PUNT_SOCKET_RX_N_NEXT +}; + vlib_node_registration_t udp4_punt_node; vlib_node_registration_t udp6_punt_node; +vlib_node_registration_t udp4_punt_socket_node; +vlib_node_registration_t udp6_punt_socket_node; +static vlib_node_registration_t punt_socket_rx_node; + +punt_main_t punt_main; + +char * +vnet_punt_get_server_pathname (void) +{ + punt_main_t *pm = &punt_main; + return pm->sun_path; +} /** @brief IPv4/IPv6 UDP punt node main loop. @@ -194,10 +225,389 @@ VLIB_REGISTER_NODE (udp6_punt_node) = { #undef _ }, }; -/* *INDENT-ON* */ VLIB_NODE_FUNCTION_MULTIARCH (udp6_punt_node, udp6_punt);; +/* *INDENT-ON* */ + +static struct sockaddr_un * +punt_socket_get (bool is_ip4, u16 port) +{ + punt_main_t *pm = &punt_main; + punt_client_t *v = is_ip4 ? pm->clients_by_dst_port4 : + pm->clients_by_dst_port6; + + u16 i = sparse_vec_index (v, port); + if (i == SPARSE_VEC_INVALID_INDEX) + return 0; + + return &vec_elt (v, i).caddr; +} + +static void +punt_socket_register (bool is_ip4, u8 protocol, u16 port, + char *client_pathname) +{ + punt_main_t *pm = &punt_main; + punt_client_t c, *n; + punt_client_t *v = is_ip4 ? pm->clients_by_dst_port4 : + pm->clients_by_dst_port6; + + memset (&c, 0, sizeof (c)); + memcpy (c.caddr.sun_path, client_pathname, sizeof (c.caddr.sun_path)); + c.caddr.sun_family = AF_UNIX; + c.port = port; + n = sparse_vec_validate (v, port); + n[0] = c; +} + +/* $$$$ Just leaves the mapping in place for now */ +static void +punt_socket_unregister (bool is_ip4, u8 protocol, u16 port) +{ + return; +} + +always_inline uword +udp46_punt_socket_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, bool is_ip4) +{ + u32 *buffers = vlib_frame_args (frame); + uword n_packets = frame->n_vectors; + struct iovec *iovecs = 0; + punt_main_t *pm = &punt_main; + int i; + + u32 node_index = is_ip4 ? udp4_punt_socket_node.index : + udp6_punt_socket_node.index; + + for (i = 0; i < n_packets; i++) + { + struct iovec *iov; + vlib_buffer_t *b; + uword l; + punt_packetdesc_t packetdesc; + + b = vlib_get_buffer (vm, buffers[i]); + + /* Reverse UDP Punt advance */ + udp_header_t *udp; + if (is_ip4) + { + vlib_buffer_advance (b, -(sizeof (ip4_header_t) + + sizeof (udp_header_t))); + ip4_header_t *ip = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip + 1); + } + else + { + vlib_buffer_advance (b, -(sizeof (ip6_header_t) + + sizeof (udp_header_t))); + ip6_header_t *ip = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip + 1); + } + + u16 port = clib_net_to_host_u16 (udp->dst_port); + + /* + * Find registerered client + * If no registered client, drop packet and count + */ + struct sockaddr_un *caddr; + caddr = punt_socket_get (is_ip4, port); + if (!caddr) + { + vlib_node_increment_counter (vm, node_index, + PUNT_ERROR_SOCKET_TX_ERROR, 1); + goto error; + } + + /* Re-set iovecs if present. */ + if (iovecs) + _vec_len (iovecs) = 0; + + /* Add packet descriptor */ + packetdesc.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + packetdesc.action = 0; + vec_add2 (iovecs, iov, 1); + iov->iov_base = &packetdesc; + iov->iov_len = sizeof (packetdesc); + + /** VLIB buffer chain -> Unix iovec(s). */ + vlib_buffer_advance (b, -(sizeof (ethernet_header_t))); + vec_add2 (iovecs, iov, 1); + iov->iov_base = b->data + b->current_data; + iov->iov_len = l = b->current_length; + + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + do + { + b = vlib_get_buffer (vm, b->next_buffer); + + vec_add2 (iovecs, iov, 1); + + iov->iov_base = b->data + b->current_data; + iov->iov_len = b->current_length; + l += b->current_length; + } + while (b->flags & VLIB_BUFFER_NEXT_PRESENT); + } + + struct msghdr msg = { + .msg_name = caddr, + .msg_namelen = sizeof (*caddr), + .msg_iov = iovecs, + .msg_iovlen = vec_len (iovecs), + }; + + if (sendmsg (pm->socket_fd, &msg, 0) < l) + vlib_node_increment_counter (vm, node_index, + PUNT_ERROR_SOCKET_TX_ERROR, 1); + } + +error: + vlib_buffer_free_no_next (vm, buffers, n_packets); + + return n_packets; +} + +static uword +udp4_punt_socket (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return udp46_punt_socket_inline (vm, node, from_frame, true /* is_ip4 */ ); +} + +static uword +udp6_punt_socket (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return udp46_punt_socket_inline (vm, node, from_frame, false /* is_ip4 */ ); +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (udp4_punt_socket_node) = { + .function = udp4_punt_socket, + .name = "ip4-udp-punt-socket", + .flags = VLIB_NODE_FLAG_IS_DROP, + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, +}; +VLIB_REGISTER_NODE (udp6_punt_socket_node) = { + .function = udp6_punt_socket, + .name = "ip6-udp-punt-socket", + .flags = VLIB_NODE_FLAG_IS_DROP, + .vector_size = sizeof (u32), + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, +}; +/* *INDENT-ON* */ + +typedef struct +{ + enum punt_action_e action; + u32 sw_if_index; +} punt_trace_t; + +static u8 * +format_punt_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + vnet_main_t *vnm = vnet_get_main (); + punt_trace_t *t = va_arg (*va, punt_trace_t *); + s = format (s, "%U Action: %d", format_vnet_sw_if_index_name, + vnm, t->sw_if_index, t->action); + return s; +} + +static uword +punt_socket_rx_fd (vlib_main_t * vm, vlib_node_runtime_t * node, u32 fd) +{ + const uword buffer_size = VLIB_BUFFER_DATA_SIZE; + u32 n_trace = vlib_get_trace_count (vm, node); + u32 next = node->cached_next_index; + u32 n_left_to_next, next_index; + u32 *to_next; + u32 error = PUNT_ERROR_NONE; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + /* $$$$ Only dealing with one buffer at the time for now */ + + u32 bi; + vlib_buffer_t *b; + punt_packetdesc_t packetdesc; + ssize_t size; + struct iovec io[2]; + + if (vlib_buffer_alloc (vm, &bi, 1) != 1) + { + error = PUNT_ERROR_NOBUFFER; + goto error; + } + + b = vlib_get_buffer (vm, bi); + io[0].iov_base = &packetdesc; + io[0].iov_len = sizeof (packetdesc); + io[1].iov_base = b->data; + io[1].iov_len = buffer_size; + + size = readv (fd, io, 2); + /* We need at least the packet descriptor plus a header */ + if (size <= (int) (sizeof (packetdesc) + sizeof (ip4_header_t))) + { + vlib_buffer_free (vm, &bi, 1); + error = PUNT_ERROR_READV; + goto error; + } + + b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED; + b->current_length = size - sizeof (packetdesc); + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); + + switch (packetdesc.action) + { + case PUNT_L2: + vnet_buffer (b)->sw_if_index[VLIB_TX] = packetdesc.sw_if_index; + next_index = PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT; + break; + + case PUNT_IP4_ROUTED: + vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index; + vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; + next_index = PUNT_SOCKET_RX_NEXT_IP4_LOOKUP; + break; + + case PUNT_IP6_ROUTED: + vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index; + vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; + next_index = PUNT_SOCKET_RX_NEXT_IP6_LOOKUP; + break; + + default: + error = PUNT_ERROR_ACTION; + vlib_buffer_free (vm, &bi, 1); + goto error; + } + + if (PREDICT_FALSE (n_trace > 0)) + { + punt_trace_t *t; + vlib_trace_buffer (vm, node, next_index, b, 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t = vlib_add_trace (vm, node, b, sizeof (*t)); + t->sw_if_index = packetdesc.sw_if_index; + t->action = packetdesc.action; + } + + to_next[0] = bi; + to_next++; + n_left_to_next--; + + vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, n_left_to_next, + bi, next_index); + vlib_put_next_frame (vm, node, next, n_left_to_next); + return 1; + +error: + vlib_node_increment_counter (vm, punt_socket_rx_node.index, error, 1); + return 0; +} + +static uword +punt_socket_rx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + punt_main_t *pm = &punt_main; + u32 total_count = 0; + int i; + + for (i = 0; i < vec_len (pm->ready_fds); i++) + { + total_count += punt_socket_rx_fd (vm, node, pm->ready_fds[i]); + vec_del1 (pm->ready_fds, i); + } + return total_count; +} + +VLIB_REGISTER_NODE (punt_socket_rx_node, static) = +{ + .function = punt_socket_rx,.name = "punt-socket-rx",.type = + VLIB_NODE_TYPE_INPUT,.state = VLIB_NODE_STATE_INTERRUPT,.vector_size = + 1,.n_errors = PUNT_N_ERROR,.error_strings = + punt_error_strings,.n_next_nodes = PUNT_SOCKET_RX_N_NEXT,.next_nodes = + { +[PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT] = "interface-output", + [PUNT_SOCKET_RX_NEXT_IP4_LOOKUP] = "ip4-lookup", + [PUNT_SOCKET_RX_NEXT_IP6_LOOKUP] = "ip6-lookup",},.format_trace = + format_punt_trace,}; + +static clib_error_t * +punt_socket_read_ready (unix_file_t * uf) +{ + vlib_main_t *vm = vlib_get_main (); + punt_main_t *pm = &punt_main; + + /** Schedule the rx node */ + vlib_node_set_interrupt_pending (vm, punt_socket_rx_node.index); + vec_add1 (pm->ready_fds, uf->file_descriptor); + + return 0; +} + +clib_error_t * +vnet_punt_socket_add (vlib_main_t * vm, u32 header_version, + bool is_ip4, u8 protocol, u16 port, + char *client_pathname) +{ + punt_main_t *pm = &punt_main; + + if (!pm->is_configured) + return clib_error_return (0, "socket is not configured"); + + if (header_version != PUNT_PACKETDESC_VERSION) + return clib_error_return (0, "Invalid packet descriptor version"); + + /* For now we only support UDP punt */ + if (protocol != IP_PROTOCOL_UDP) + return clib_error_return (0, + "only UDP protocol (%d) is supported, got %d", + IP_PROTOCOL_UDP, protocol); + + if (port == (u16) ~ 0) + return clib_error_return (0, "UDP port number required"); + + /* Register client */ + punt_socket_register (is_ip4, protocol, port, client_pathname); + + u32 node_index = is_ip4 ? udp4_punt_socket_node.index : + udp6_punt_socket_node.index; + + udp_register_dst_port (vm, port, node_index, is_ip4); + + return 0; +} + +clib_error_t * +vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4, u8 l4_protocol, u16 port) +{ + punt_main_t *pm = &punt_main; + + if (!pm->is_configured) + return clib_error_return (0, "socket is not configured"); + + punt_socket_unregister (is_ip4, l4_protocol, port); + udp_unregister_dst_port (vm, port, is_ip4); + + return 0; +} + /** * @brief Request IP traffic punt to the local TCP/IP stack. * @@ -217,7 +627,7 @@ VLIB_NODE_FUNCTION_MULTIARCH (udp6_punt_node, udp6_punt);; */ clib_error_t * vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port, - int is_add) + bool is_add) { /* For now we only support UDP punt */ if (protocol != IP_PROTOCOL_UDP) @@ -249,7 +659,6 @@ vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port, return 0; } - else return clib_error_return (0, "punt delete is not supported yet"); } @@ -259,13 +668,13 @@ udp_punt_cli (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { u32 udp_port; - int is_add = 1; + bool is_add = true; clib_error_t *error; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "del")) - is_add = 0; + is_add = false; if (unformat (input, "all")) { /* punt both IPv6 and IPv4 when used in CLI */ @@ -276,8 +685,8 @@ udp_punt_cli (vlib_main_t * vm, else if (unformat (input, "%d", &udp_port)) { /* punt both IPv6 and IPv4 when used in CLI */ - error = vnet_punt_add_del (vm, ~0, IP_PROTOCOL_UDP, - udp_port, is_add); + error = + vnet_punt_add_del (vm, ~0, IP_PROTOCOL_UDP, udp_port, is_add); if (error) clib_error_report (error); } @@ -314,6 +723,85 @@ VLIB_CLI_COMMAND (punt_udp_command, static) = { }; /* *INDENT-ON* */ +clib_error_t * +punt_init (vlib_main_t * vm) +{ + punt_main_t *pm = &punt_main; + + pm->clients_by_dst_port6 = sparse_vec_new + (sizeof (pm->clients_by_dst_port6[0]), + BITS (((udp_header_t *) 0)->dst_port)); + pm->clients_by_dst_port4 = sparse_vec_new + (sizeof (pm->clients_by_dst_port4[0]), + BITS (((udp_header_t *) 0)->dst_port)); + + pm->is_configured = false; + pm->interface_output_node = vlib_get_node_by_name (vm, + (u8 *) + "interface-output"); + return 0; +} + +VLIB_INIT_FUNCTION (punt_init); + +static clib_error_t * +punt_config (vlib_main_t * vm, unformat_input_t * input) +{ + punt_main_t *pm = &punt_main; + char *socket_path = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "socket %s", &socket_path)) + strncpy (pm->sun_path, socket_path, 108 - 1); + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (socket_path == 0) + return 0; + + /* UNIX domain socket */ + struct sockaddr_un addr; + if ((pm->socket_fd = socket (AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0)) == -1) + { + return clib_error_return (0, "socket error"); + } + + memset (&addr, 0, sizeof (addr)); + addr.sun_family = AF_UNIX; + if (*socket_path == '\0') + { + *addr.sun_path = '\0'; + strncpy (addr.sun_path + 1, socket_path + 1, + sizeof (addr.sun_path) - 2); + } + else + { + strncpy (addr.sun_path, socket_path, sizeof (addr.sun_path) - 1); + unlink (socket_path); + } + + if (bind (pm->socket_fd, (struct sockaddr *) &addr, sizeof (addr)) == -1) + { + return clib_error_return (0, "bind error"); + } + + /* Register socket */ + unix_main_t *um = &unix_main; + unix_file_t template = { 0 }; + template.read_function = punt_socket_read_ready; + template.file_descriptor = pm->socket_fd; + pm->unix_file_index = unix_file_add (um, &template); + + pm->is_configured = true; + + return 0; +} + +VLIB_CONFIG_FUNCTION (punt_config, "punt"); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h index 09a9d4c5..0103249c 100644 --- a/src/vnet/ip/punt.h +++ b/src/vnet/ip/punt.h @@ -20,6 +20,7 @@ #ifndef included_punt_h #define included_punt_h +#include typedef enum { #define punt_error(n,s) PUNT_ERROR_##n, @@ -30,9 +31,56 @@ typedef enum clib_error_t *vnet_punt_add_del (vlib_main_t * vm, u8 ipv, - u8 protocol, u16 port, int is_add); + u8 protocol, u16 port, bool is_add); +clib_error_t *vnet_punt_socket_add (vlib_main_t * vm, u32 header_version, + bool is_ip4, u8 protocol, u16 port, + char *client_pathname); +clib_error_t *vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4, + u8 l4_protocol, u16 port); +char *vnet_punt_get_server_pathname (void); -#endif /* included_punt_h */ +enum punt_action_e +{ + PUNT_L2 = 0, + PUNT_IP4_ROUTED, + PUNT_IP6_ROUTED, +}; + +/* + * Packet descriptor header. Version 1 + * If this header changes, the version must also change to notify clients. + */ +#define PUNT_PACKETDESC_VERSION 1 +typedef struct __attribute__ ((packed)) +{ + u32 sw_if_index; /* RX or TX interface */ + enum punt_action_e action; +} punt_packetdesc_t; + +/* + * Client registration + */ +typedef struct +{ + u16 port; + struct sockaddr_un caddr; +} punt_client_t; + +typedef struct +{ + int socket_fd; + char sun_path[sizeof (struct sockaddr_un)]; + punt_client_t *clients_by_dst_port4; + punt_client_t *clients_by_dst_port6; + u32 unix_file_index; + bool is_configured; + vlib_node_t *interface_output_node; + u32 *ready_fds; + u32 *rx_buffers; +} punt_main_t; +extern punt_main_t punt_main; + +#endif /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/ip/punt_error.def b/src/vnet/ip/punt_error.def index a76d7e7b..13afa2c7 100644 --- a/src/vnet/ip/punt_error.def +++ b/src/vnet/ip/punt_error.def @@ -17,3 +17,11 @@ punt_error (NONE, "no error") punt_error (UDP_PORT, "udp port punt") +punt_error (SOCKET_RX, "Socket RX") +punt_error (SOCKET_TX, "Socket TX") +punt_error (SOCKET_RX_ERROR, "Socket RX error") +punt_error (SOCKET_TX_ERROR, "Socket TX error") +punt_error (NOBUFFER, "buffer allocation failure") +punt_error (READV, "socket read failure") +punt_error (ACTION, "invalid packet descriptor") + diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 4e892431..e8bc22ae 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -144,6 +144,8 @@ _(IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL, \ ip_source_and_port_range_check_interface_add_del) \ _(DELETE_SUBIF, delete_subif) \ _(PUNT, punt) \ +_(PUNT_SOCKET_REGISTER, punt_socket_register) \ +_(PUNT_SOCKET_DEREGISTER, punt_socket_deregister) \ _(FEATURE_ENABLE_DISABLE, feature_enable_disable) #define QUOTE_(x) #x @@ -1940,6 +1942,68 @@ vl_api_punt_t_handler (vl_api_punt_t * mp) REPLY_MACRO (VL_API_PUNT_REPLY); } +static void +vl_api_punt_socket_register_t_handler (vl_api_punt_socket_register_t * mp) +{ + vl_api_punt_socket_register_reply_t *rmp; + vlib_main_t *vm = vlib_get_main (); + int rv = 0; + clib_error_t *error; + unix_shared_memory_queue_t *q; + u32 handle; + + error = vnet_punt_socket_add (vm, ntohl (mp->header_version), + mp->is_ip4, mp->l4_protocol, + ntohs (mp->l4_port), (char *) mp->pathname); + if (error) + { + rv = -1; + clib_error_report (error); + } + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = htons (VL_API_PUNT_SOCKET_REGISTER_REPLY); + rmp->context = mp->context; + rmp->retval = htonl (rv); + char *p = vnet_punt_get_server_pathname (); + /* Abstract pathnames start with \0 */ + memcpy ((char *) rmp->pathname, p, sizeof (rmp->pathname)); + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_punt_socket_deregister_t_handler (vl_api_punt_socket_deregister_t * mp) +{ + vl_api_punt_socket_deregister_reply_t *rmp; + vlib_main_t *vm = vlib_get_main (); + int rv = 0; + clib_error_t *error; + unix_shared_memory_queue_t *q; + u32 handle; + + error = vnet_punt_socket_del (vm, mp->is_ip4, mp->l4_protocol, + ntohs (mp->l4_port)); + if (error) + { + rv = -1; + clib_error_report (error); + } + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = htons (VL_API_PUNT_SOCKET_DEREGISTER_REPLY); + rmp->context = mp->context; + rmp->retval = htonl (rv); + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + static void vl_api_feature_enable_disable_t_handler (vl_api_feature_enable_disable_t * mp) { diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index ec5ffbe9..0dee1533 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -991,6 +991,39 @@ autoreply define punt { u16 l4_port; }; +/** \brief Punt traffic to the host via socket + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param header_version - expected meta data header version (currently 1) + @param is_ip4 - L3 protocol 1 - IPv4, 0 - IPv6 + @param l4_protocol - L4 protocol to be punted, only UDP (0x11) is supported + @param l4_port - TCP/UDP port to be punted +*/ +define punt_socket_register { + u32 client_index; + u32 context; + u32 header_version; + u8 is_ip4; + u8 l4_protocol; + u16 l4_port; + u8 pathname[64]; +}; + +define punt_socket_register_reply +{ + u32 context; + i32 retval; + u8 pathname[64]; +}; + +autoreply define punt_socket_deregister { + u32 client_index; + u32 context; + u8 is_ip4; + u8 l4_protocol; + u16 l4_port; +}; + /** \brief Feature path enable/disable request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request -- cgit 1.2.3-korg From 4802632dd1da02c021af99a179264f4a9a163f36 Mon Sep 17 00:00:00 2001 From: Ole Troan Date: Thu, 7 Sep 2017 10:04:41 +0200 Subject: Punt socket: Fix coverity error for pathname length mismatch between API and sun_path. Change-Id: I69d67707540e075afe7bad0a70e57d9d80b1bc8e Signed-off-by: Ole Troan --- src/vpp/api/vpe.api | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/vpp/api') diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 0dee1533..eda95ce5 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -1006,7 +1006,7 @@ define punt_socket_register { u8 is_ip4; u8 l4_protocol; u16 l4_port; - u8 pathname[64]; + u8 pathname[108]; /* Linux sun_path defined to be 108 bytes, see unix(7) */ }; define punt_socket_register_reply -- cgit 1.2.3-korg From 8a19f12a0cfe6d611f6e266931af691fb69a74ad Mon Sep 17 00:00:00 2001 From: "Keith Burns (alagalah)" Date: Sun, 6 Aug 2017 08:26:29 -0700 Subject: Allow individual stats API and introduce stats.api - want_interface_simple_stats - want_interface_combined_stats - want_ip4|6_fib|nbr_stats Change-Id: I4e97461def508958b3e429c3fe8859b36fef2d18 Signed-off-by: Keith Burns (alagalah) --- .gitignore | 1 + src/uri.am | 4 +- src/vlibapi/api_helper_macros.h | 1 - src/vpp.am | 12 +- src/vpp/api/api.c | 59 -- src/vpp/api/vpe.api | 112 +--- src/vpp/api/vpe_all_api_h.h | 3 + src/vpp/stats/stats.api | 222 +++++++ src/vpp/stats/stats.c | 1265 ++++++++++++++++++++++++++------------- src/vpp/stats/stats.h | 45 +- 10 files changed, 1132 insertions(+), 592 deletions(-) create mode 100644 src/vpp/stats/stats.api (limited to 'src/vpp/api') diff --git a/.gitignore b/.gitignore index 10aab98c..dabb31e8 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,7 @@ test-driver *.iml .bootstrap.ok .settings +.autotools # stop autotools ignore # OSX and some IDE diff --git a/src/uri.am b/src/uri.am index ae6feb6f..660f897d 100644 --- a/src/uri.am +++ b/src/uri.am @@ -1,4 +1,4 @@ -# Copyright (c) 2016 Cisco and/or its affiliates. +# Copyright (c) 2017 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -58,7 +58,7 @@ vcl_test_server_SOURCES = uri/vcl_test_server.c vcl_test_server_LDADD = libvppcom.la vcl_test_client_SOURCES = uri/vcl_test_client.c -vcl_test_client_LDADD = libvppcom.la +vcl_test_client_LDADD = libvppcom.la sock_test_server_SOURCES = uri/sock_test_server.c sock_test_client_SOURCES = uri/sock_test_client.c diff --git a/src/vlibapi/api_helper_macros.h b/src/vlibapi/api_helper_macros.h index 2e29d4d7..b8a9978a 100644 --- a/src/vlibapi/api_helper_macros.h +++ b/src/vlibapi/api_helper_macros.h @@ -217,7 +217,6 @@ _(from_netconf_client) \ _(oam_events) \ _(bfd_events) -/* WARNING: replicated in vpp/stats.h */ typedef struct { u32 client_index; /* in memclnt registration pool */ diff --git a/src/vpp.am b/src/vpp.am index 10a4e311..9a07cefe 100644 --- a/src/vpp.am +++ b/src/vpp.am @@ -42,9 +42,11 @@ bin_vpp_CFLAGS = @APICLI@ nobase_include_HEADERS += \ vpp/api/vpe_all_api_h.h \ vpp/api/vpe_msg_enum.h \ + vpp/stats/stats.api.h \ vpp/api/vpe.api.h API_FILES += vpp/api/vpe.api +API_FILES += vpp/stats/stats.api BUILT_SOURCES += .version @@ -116,13 +118,13 @@ endif noinst_PROGRAMS += bin/summary_stats_client bin_summary_stats_client_SOURCES = \ - vpp/api/summary_stats_client.c + vpp/api/summary_stats_client.c bin_summary_stats_client_LDADD = \ - libvlibmemoryclient.la \ - libsvm.la \ - libvppinfra.la \ - -lpthread -lm -lrt + libvlibmemoryclient.la \ + libsvm.la \ + libvppinfra.la \ + -lpthread -lm -lrt bin_PROGRAMS += bin/vpp_get_metrics diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index e8bc22ae..f9c3129c 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -110,7 +110,6 @@ _(CREATE_VLAN_SUBIF, create_vlan_subif) \ _(CREATE_SUBIF, create_subif) \ _(PROXY_ARP_ADD_DEL, proxy_arp_add_del) \ _(PROXY_ARP_INTFC_ENABLE_DISABLE, proxy_arp_intfc_enable_disable) \ -_(VNET_GET_SUMMARY_STATS, vnet_get_summary_stats) \ _(RESET_FIB, reset_fib) \ _(CREATE_LOOPBACK, create_loopback) \ _(CREATE_LOOPBACK_INSTANCE, create_loopback_instance) \ @@ -745,64 +744,6 @@ vl_api_oam_add_del_t_handler (vl_api_oam_add_del_t * mp) REPLY_MACRO (VL_API_OAM_ADD_DEL_REPLY); } -static void -vl_api_vnet_get_summary_stats_t_handler (vl_api_vnet_get_summary_stats_t * mp) -{ - stats_main_t *sm = &stats_main; - vnet_interface_main_t *im = sm->interface_main; - vl_api_vnet_get_summary_stats_reply_t *rmp; - vlib_combined_counter_main_t *cm; - vlib_counter_t v; - int i, which; - u64 total_pkts[VLIB_N_RX_TX]; - u64 total_bytes[VLIB_N_RX_TX]; - - unix_shared_memory_queue_t *q = - vl_api_client_index_to_input_queue (mp->client_index); - - if (!q) - return; - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_VNET_GET_SUMMARY_STATS_REPLY); - rmp->context = mp->context; - rmp->retval = 0; - - memset (total_pkts, 0, sizeof (total_pkts)); - memset (total_bytes, 0, sizeof (total_bytes)); - - vnet_interface_counter_lock (im); - - vec_foreach (cm, im->combined_sw_if_counters) - { - which = cm - im->combined_sw_if_counters; - - for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) - { - vlib_get_combined_counter (cm, i, &v); - total_pkts[which] += v.packets; - total_bytes[which] += v.bytes; - } - } - vnet_interface_counter_unlock (im); - - rmp->total_pkts[VLIB_RX] = clib_host_to_net_u64 (total_pkts[VLIB_RX]); - rmp->total_bytes[VLIB_RX] = clib_host_to_net_u64 (total_bytes[VLIB_RX]); - rmp->total_pkts[VLIB_TX] = clib_host_to_net_u64 (total_pkts[VLIB_TX]); - rmp->total_bytes[VLIB_TX] = clib_host_to_net_u64 (total_bytes[VLIB_TX]); - rmp->vector_rate = - clib_host_to_net_u64 (vlib_last_vector_length_per_node (sm->vlib_main)); - - vl_msg_api_send_shmem (q, (u8 *) & rmp); -} - -/* *INDENT-OFF* */ -typedef CLIB_PACKED (struct { - ip4_address_t address; - u32 address_length: 6; - u32 index:26; -}) ip4_route_t; -/* *INDENT-ON* */ static int ip4_reset_fib_t_handler (vl_api_reset_fib_t * mp) diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index eda95ce5..d68beae1 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -47,6 +47,7 @@ * DHCP APIs: see ... /src/vnet/dhcp/{dhcpk.api, dhcp_api.c} * COP APIs: see ... /src/vnet/cop/{cop.api, cop_api.c} * POLICER APIs: see ... /src/vnet/policer/{policer.api, policer_api.c} + * STATS APIs: see .../src/vpp/stats/{stats.api, stats.c} */ /** \brief Create a new subinterface with the given vlan id @@ -136,117 +137,6 @@ autoreply define reset_vrf u32 vrf_id; }; -/** \brief Want Stats, register for stats updates - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param enable_disable - 1 = enable stats, 0 = disable - @param pid - pid of process requesting stats updates -*/ -autoreply define want_stats -{ - u32 client_index; - u32 context; - u32 enable_disable; - u32 pid; -}; - -typeonly manual_print manual_endian define ip4_fib_counter -{ - u32 address; - u8 address_length; - u64 packets; - u64 bytes; -}; - -manual_print manual_endian define vnet_ip4_fib_counters -{ - u32 vrf_id; - u32 count; - vl_api_ip4_fib_counter_t c[count]; -}; - -typeonly manual_print manual_endian define ip4_nbr_counter -{ - u32 address; - u8 link_type; - u64 packets; - u64 bytes; -}; - -/** - * @brief Per-neighbour (i.e. per-adjacency) coutners - * @param count The size of the array of counters - * @param sw_if_index The interface the adjacency is on - * @param begin Flag to indicate this is the first set of stats for this - * interface. If this flag is not set the it is a continuation of - * stats for this interface - * @param c counters - */ -manual_print manual_endian define vnet_ip4_nbr_counters -{ - u32 count; - u32 sw_if_index; - u8 begin; - vl_api_ip4_nbr_counter_t c[count]; -}; - -typeonly manual_print manual_endian define ip6_fib_counter -{ - u64 address[2]; - u8 address_length; - u64 packets; - u64 bytes; -}; - -manual_print manual_endian define vnet_ip6_fib_counters -{ - u32 vrf_id; - u32 count; - vl_api_ip6_fib_counter_t c[count]; -}; - -typeonly manual_print manual_endian define ip6_nbr_counter -{ - u64 address[2]; - u8 link_type; - u64 packets; - u64 bytes; -}; - -manual_print manual_endian define vnet_ip6_nbr_counters -{ - u32 count; - u32 sw_if_index; - u8 begin; - vl_api_ip6_nbr_counter_t c[count]; -}; - -/** \brief Request for a single block of summary stats - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request -*/ -define vnet_get_summary_stats -{ - u32 client_index; - u32 context; -}; - -/** \brief Reply for vnet_get_summary_stats request - @param context - sender context, to match reply w/ request - @param retval - return code for request - @param total_pkts - - @param total_bytes - - @param vector_rate - -*/ -define vnet_get_summary_stats_reply -{ - u32 context; - i32 retval; - u64 total_pkts[2]; - u64 total_bytes[2]; - f64 vector_rate; -}; - /** \brief OAM event structure @param dst_address[] - @param state diff --git a/src/vpp/api/vpe_all_api_h.h b/src/vpp/api/vpe_all_api_h.h index 397cd807..d35a0535 100644 --- a/src/vpp/api/vpe_all_api_h.h +++ b/src/vpp/api/vpe_all_api_h.h @@ -28,6 +28,9 @@ /* Include the current layer (third) vpp API definition layer */ #include +/* Include stats APIs */ +#include + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vpp/stats/stats.api b/src/vpp/stats/stats.api new file mode 100644 index 00000000..7f745859 --- /dev/null +++ b/src/vpp/stats/stats.api @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \file + + This file defines the stats API +*/ + + +/** \brief Want Stats, enable/disable ALL stats updates + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 = enable stats, 0 = disable + @param pid - pid of process requesting stats updates +*/ +autoreply define want_stats +{ + u32 client_index; + u32 context; + u32 enable_disable; + u32 pid; +}; + +/** \brief Want Interface Simple Stats, register for detailed interface stats + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 = enable stats, 0 = disable + @param pid - pid of process requesting stats updates +*/ +autoreply define want_interface_simple_stats +{ + u32 client_index; + u32 context; + u32 enable_disable; + u32 pid; +}; + +/** \brief Want Interface Combined Stats, register for continuous stats + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 = enable stats, 0 = disable + @param pid - pid of process requesting stats updates +*/ +autoreply define want_interface_combined_stats +{ + u32 client_index; + u32 context; + u32 enable_disable; + u32 pid; +}; + +/** \brief Want IP4 FIB Stats, register for continuous stats + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 = enable stats, 0 = disable + @param pid - pid of process requesting stats updates +*/ +autoreply define want_ip4_fib_stats +{ + u32 client_index; + u32 context; + u32 enable_disable; + u32 pid; +}; + +/** \brief Want IP6 FIB Stats, register for continuous stats + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 = enable stats, 0 = disable + @param pid - pid of process requesting stats updates +*/ +autoreply define want_ip6_fib_stats +{ + u32 client_index; + u32 context; + u32 enable_disable; + u32 pid; +}; + +/** \brief Want IP4 NBR Stats, register for continuous stats + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 = enable stats, 0 = disable + @param pid - pid of process requesting stats updates +*/ +autoreply define want_ip4_nbr_stats +{ + u32 client_index; + u32 context; + u32 enable_disable; + u32 pid; +}; + +/** \brief Want IP6 NBR Stats, register for continuous stats + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 = enable stats, 0 = disable + @param pid - pid of process requesting stats updates +*/ +autoreply define want_ip6_nbr_stats +{ + u32 client_index; + u32 context; + u32 enable_disable; + u32 pid; +}; + +typeonly manual_print manual_endian define ip4_fib_counter +{ + u32 address; + u8 address_length; + u64 packets; + u64 bytes; +}; + +manual_print manual_endian define vnet_ip4_fib_counters +{ + u32 vrf_id; + u32 count; + vl_api_ip4_fib_counter_t c[count]; +}; + +typeonly manual_print manual_endian define ip4_nbr_counter +{ + u32 address; + u8 link_type; + u64 packets; + u64 bytes; +}; + +/** + * @brief Per-neighbour (i.e. per-adjacency) coutners + * @param count The size of the array of counters + * @param sw_if_index The interface the adjacency is on + * @param begin Flag to indicate this is the first set of stats for this + * interface. If this flag is not set the it is a continuation of + * stats for this interface + * @param c counters + */ +manual_print manual_endian define vnet_ip4_nbr_counters +{ + u32 count; + u32 sw_if_index; + u8 begin; + vl_api_ip4_nbr_counter_t c[count]; +}; + +typeonly manual_print manual_endian define ip6_fib_counter +{ + u64 address[2]; + u8 address_length; + u64 packets; + u64 bytes; +}; + +manual_print manual_endian define vnet_ip6_fib_counters +{ + u32 vrf_id; + u32 count; + vl_api_ip6_fib_counter_t c[count]; +}; + +typeonly manual_print manual_endian define ip6_nbr_counter +{ + u64 address[2]; + u8 link_type; + u64 packets; + u64 bytes; +}; + +manual_print manual_endian define vnet_ip6_nbr_counters +{ + u32 count; + u32 sw_if_index; + u8 begin; + vl_api_ip6_nbr_counter_t c[count]; +}; + + +/** \brief Request for a single block of summary stats + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define vnet_get_summary_stats +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply for vnet_get_summary_stats request + @param context - sender context, to match reply w/ request + @param retval - return code for request + @param total_pkts - + @param total_bytes - + @param vector_rate - +*/ +define vnet_get_summary_stats_reply +{ + u32 context; + i32 retval; + u64 total_pkts[2]; + u64 total_bytes[2]; + f64 vector_rate; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c index b0fac73d..c78a8aef 100644 --- a/src/vpp/stats/stats.c +++ b/src/vpp/stats/stats.c @@ -14,10 +14,8 @@ */ #include #include -#include -#include -#include #include +#include #include #define STATS_DEBUG 0 @@ -48,11 +46,19 @@ stats_main_t stats_main; #define foreach_stats_msg \ _(WANT_STATS, want_stats) \ _(VNET_INTERFACE_SIMPLE_COUNTERS, vnet_interface_simple_counters) \ +_(WANT_INTERFACE_SIMPLE_STATS, want_interface_simple_stats) \ _(VNET_INTERFACE_COMBINED_COUNTERS, vnet_interface_combined_counters) \ +_(WANT_INTERFACE_COMBINED_STATS, want_interface_combined_stats) \ _(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \ +_(WANT_IP4_FIB_STATS, want_ip4_fib_stats) \ _(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \ +_(WANT_IP6_FIB_STATS, want_ip6_fib_stats) \ _(VNET_IP4_NBR_COUNTERS, vnet_ip4_nbr_counters) \ -_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) +_(WANT_IP4_NBR_STATS, want_ip4_nbr_stats) \ +_(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) \ +_(WANT_IP6_NBR_STATS, want_ip6_nbr_stats) \ +_(VNET_GET_SUMMARY_STATS, vnet_get_summary_stats) + /* These constants ensure msg sizes <= 1024, aka ring allocation */ #define SIMPLE_COUNTER_BATCH_SIZE 126 @@ -64,6 +70,110 @@ _(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters) #define STATS_RELEASE_DELAY_NS (1000 * 1000 * 5) /* ns/us us/ms */ +u8 * +format_vnet_interface_combined_counters (u8 * s, va_list * args) +{ + stats_main_t *sm = &stats_main; + vl_api_vnet_interface_combined_counters_t *mp = + va_arg (*args, vl_api_vnet_interface_combined_counters_t *); + + char *counter_name; + u32 count, sw_if_index; + int i; + count = ntohl (mp->count); + sw_if_index = ntohl (mp->first_sw_if_index); + + vlib_counter_t *vp; + u64 packets, bytes; + vp = (vlib_counter_t *) mp->data; + + switch (mp->vnet_counter_type) + { + case VNET_INTERFACE_COUNTER_RX: + counter_name = "rx"; + break; + case VNET_INTERFACE_COUNTER_TX: + counter_name = "tx"; + break; + default: + counter_name = "bogus"; + break; + } + for (i = 0; i < count; i++) + { + packets = clib_mem_unaligned (&vp->packets, u64); + packets = clib_net_to_host_u64 (packets); + bytes = clib_mem_unaligned (&vp->bytes, u64); + bytes = clib_net_to_host_u64 (bytes); + vp++; + s = format (s, "%U.%s.packets %lld\n", + format_vnet_sw_if_index_name, + sm->vnet_main, sw_if_index, counter_name, packets); + s = format (s, "%U.%s.bytes %lld\n", + format_vnet_sw_if_index_name, + sm->vnet_main, sw_if_index, counter_name, bytes); + sw_if_index++; + } + return s; +} + +u8 * +format_vnet_interface_simple_counters (u8 * s, va_list * args) +{ + stats_main_t *sm = &stats_main; + vl_api_vnet_interface_simple_counters_t *mp = + va_arg (*args, vl_api_vnet_interface_simple_counters_t *); + char *counter_name; + u32 count, sw_if_index; + count = ntohl (mp->count); + sw_if_index = ntohl (mp->first_sw_if_index); + u64 *vp, v; + vp = (u64 *) mp->data; + int i; + + switch (mp->vnet_counter_type) + { + case VNET_INTERFACE_COUNTER_DROP: + counter_name = "drop"; + break; + case VNET_INTERFACE_COUNTER_PUNT: + counter_name = "punt"; + break; + case VNET_INTERFACE_COUNTER_IP4: + counter_name = "ip4"; + break; + case VNET_INTERFACE_COUNTER_IP6: + counter_name = "ip6"; + break; + case VNET_INTERFACE_COUNTER_RX_NO_BUF: + counter_name = "rx-no-buff"; + break; + case VNET_INTERFACE_COUNTER_RX_MISS: + counter_name = "rx-miss"; + break; + case VNET_INTERFACE_COUNTER_RX_ERROR: + counter_name = "rx-error (fifo-full)"; + break; + case VNET_INTERFACE_COUNTER_TX_ERROR: + counter_name = "tx-error (fifo-full)"; + break; + default: + counter_name = "bogus"; + break; + } + for (i = 0; i < count; i++) + { + v = clib_mem_unaligned (vp, u64); + v = clib_net_to_host_u64 (v); + vp++; + s = format (s, "%U.%s %lld\n", format_vnet_sw_if_index_name, + sm->vnet_main, sw_if_index, counter_name, v); + sw_if_index++; + } + + return s; +} + void dslock (stats_main_t * sm, int release_hint, int tag) { @@ -231,14 +341,6 @@ do_combined_interface_counters (stats_main_t * sm) vnet_interface_counter_unlock (im); } -/* from .../vnet/vnet/ip/lookup.c. Yuck */ -typedef CLIB_PACKED (struct - { - ip4_address_t address; -u32 address_length: 6; -u32 index: 26; - }) ip4_route_t; - static void ip46_fib_stats_delay (stats_main_t * sm, u32 sec, u32 nsec) { @@ -573,167 +675,177 @@ do_ip4_fibs (stats_main_t * sm) api_main_t *am = sm->api_main; vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr; unix_shared_memory_queue_t *q = shmem_hdr->vl_input_queue; - static ip4_route_t *routes; ip4_route_t *r; fib_table_t *fib; ip4_fib_t *v4_fib; - static uword *results; + do_ip46_fibs_t *do_fibs; vl_api_vnet_ip4_fib_counters_t *mp = 0; u32 items_this_message; vl_api_ip4_fib_counter_t *ctrp = 0; u32 start_at_fib_index = 0; - int i; + int i, j, k; + + do_fibs = &sm->do_ip46_fibs; again: + vec_reset_length (do_fibs->fibs); /* *INDENT-OFF* */ pool_foreach (fib, im4->fibs, - ({ - /* We may have bailed out due to control-plane activity */ - while ((fib - im4->fibs) < start_at_fib_index) - continue; + ({vec_add1(do_fibs->fibs,fib);})); - v4_fib = pool_elt_at_index (im4->v4_fibs, fib->ft_index); + /* *INDENT-ON* */ - if (mp == 0) - { - items_this_message = IP4_FIB_COUNTER_BATCH_SIZE; - mp = vl_msg_api_alloc_as_if_client - (sizeof (*mp) + - items_this_message * sizeof (vl_api_ip4_fib_counter_t)); - mp->_vl_msg_id = ntohs (VL_API_VNET_IP4_FIB_COUNTERS); - mp->count = 0; - mp->vrf_id = ntohl (fib->ft_table_id); - ctrp = (vl_api_ip4_fib_counter_t *) mp->c; - } - else - { - /* happens if the last FIB was empty... */ - ASSERT (mp->count == 0); - mp->vrf_id = ntohl (fib->ft_table_id); - } + for (j = 0; j < vec_len (do_fibs->fibs); j++) + { + fib = do_fibs->fibs[j]; + /* We may have bailed out due to control-plane activity */ + while ((fib - im4->fibs) < start_at_fib_index) + continue; - dslock (sm, 0 /* release hint */ , 1 /* tag */ ); + v4_fib = pool_elt_at_index (im4->v4_fibs, fib->ft_index); - vec_reset_length (routes); - vec_reset_length (results); + if (mp == 0) + { + items_this_message = IP4_FIB_COUNTER_BATCH_SIZE; + mp = vl_msg_api_alloc_as_if_client + (sizeof (*mp) + + items_this_message * sizeof (vl_api_ip4_fib_counter_t)); + mp->_vl_msg_id = ntohs (VL_API_VNET_IP4_FIB_COUNTERS); + mp->count = 0; + mp->vrf_id = ntohl (fib->ft_table_id); + ctrp = (vl_api_ip4_fib_counter_t *) mp->c; + } + else + { + /* happens if the last FIB was empty... */ + ASSERT (mp->count == 0); + mp->vrf_id = ntohl (fib->ft_table_id); + } - for (i = 0; i < ARRAY_LEN (v4_fib->fib_entry_by_dst_address); i++) - { - uword *hash = v4_fib->fib_entry_by_dst_address[i]; - hash_pair_t *p; - ip4_route_t x; - - x.address_length = i; - - hash_foreach_pair (p, hash, - ({ - x.address.data_u32 = p->key; - x.index = p->value[0]; - - vec_add1 (routes, x); - if (sm->data_structure_lock->release_hint) - { - start_at_fib_index = fib - im4->fibs; - dsunlock (sm); - ip46_fib_stats_delay (sm, 0 /* sec */, - STATS_RELEASE_DELAY_NS); - mp->count = 0; - ctrp = (vl_api_ip4_fib_counter_t *)mp->c; - goto again; - } - })); - } + dslock (sm, 0 /* release hint */ , 1 /* tag */ ); - vec_foreach (r, routes) - { - vlib_counter_t c; - const dpo_id_t *dpo_id; - - dpo_id = fib_entry_contribute_ip_forwarding(r->index); - vlib_get_combined_counter (&load_balance_main.lbm_to_counters, - (u32)dpo_id->dpoi_index, &c); - /* - * If it has actually - * seen at least one packet, send it. - */ - if (c.packets > 0) - { + vec_reset_length (do_fibs->ip4routes); + vec_reset_length (do_fibs->results); - /* already in net byte order */ - ctrp->address = r->address.as_u32; - ctrp->address_length = r->address_length; - ctrp->packets = clib_host_to_net_u64 (c.packets); - ctrp->bytes = clib_host_to_net_u64 (c.bytes); - mp->count++; - ctrp++; + for (i = 0; i < ARRAY_LEN (v4_fib->fib_entry_by_dst_address); i++) + { + uword *hash = v4_fib->fib_entry_by_dst_address[i]; + hash_pair_t *p; + ip4_route_t x; + + vec_reset_length (do_fibs->pvec); + + x.address_length = i; + + hash_foreach_pair (p, hash, ( + { + vec_add1 (do_fibs->pvec, p);} + )); + for (k = 0; k < vec_len (do_fibs->pvec); k++) + { + p = do_fibs->pvec[k]; + x.address.data_u32 = p->key; + x.index = p->value[0]; + + vec_add1 (do_fibs->ip4routes, x); + if (sm->data_structure_lock->release_hint) + { + start_at_fib_index = fib - im4->fibs; + dsunlock (sm); + ip46_fib_stats_delay (sm, 0 /* sec */ , + STATS_RELEASE_DELAY_NS); + mp->count = 0; + ctrp = (vl_api_ip4_fib_counter_t *) mp->c; + goto again; + } + } + } - if (mp->count == items_this_message) - { - mp->count = htonl (items_this_message); - /* - * If the main thread's input queue is stuffed, - * drop the data structure lock (which the main thread - * may want), and take a pause. - */ - unix_shared_memory_queue_lock (q); - if (unix_shared_memory_queue_is_full (q)) - { - dsunlock (sm); - vl_msg_api_send_shmem_nolock (q, (u8 *) & mp); - unix_shared_memory_queue_unlock (q); - mp = 0; - ip46_fib_stats_delay (sm, 0 /* sec */ , - STATS_RELEASE_DELAY_NS); - goto again; - } - vl_msg_api_send_shmem_nolock (q, (u8 *) & mp); - unix_shared_memory_queue_unlock (q); - - items_this_message = IP4_FIB_COUNTER_BATCH_SIZE; - mp = vl_msg_api_alloc_as_if_client - (sizeof (*mp) + - items_this_message * sizeof (vl_api_ip4_fib_counter_t)); - mp->_vl_msg_id = ntohs (VL_API_VNET_IP4_FIB_COUNTERS); - mp->count = 0; - mp->vrf_id = ntohl (fib->ft_table_id); - ctrp = (vl_api_ip4_fib_counter_t *) mp->c; - } - } /* for each (mp or single) adj */ - if (sm->data_structure_lock->release_hint) - { - start_at_fib_index = fib - im4->fibs; - dsunlock (sm); - ip46_fib_stats_delay (sm, 0 /* sec */ , STATS_RELEASE_DELAY_NS); - mp->count = 0; - ctrp = (vl_api_ip4_fib_counter_t *) mp->c; - goto again; - } + vec_foreach (r, do_fibs->ip4routes) + { + vlib_counter_t c; + const dpo_id_t *dpo_id; + u32 index; + + dpo_id = fib_entry_contribute_ip_forwarding (r->index); + index = (u32) dpo_id->dpoi_index; + + vlib_get_combined_counter (&load_balance_main.lbm_to_counters, + index, &c); + /* + * If it has actually + * seen at least one packet, send it. + */ + if (c.packets > 0) + { + + /* already in net byte order */ + ctrp->address = r->address.as_u32; + ctrp->address_length = r->address_length; + ctrp->packets = clib_host_to_net_u64 (c.packets); + ctrp->bytes = clib_host_to_net_u64 (c.bytes); + mp->count++; + ctrp++; + + if (mp->count == items_this_message) + { + mp->count = htonl (items_this_message); + /* + * If the main thread's input queue is stuffed, + * drop the data structure lock (which the main thread + * may want), and take a pause. + */ + unix_shared_memory_queue_lock (q); + if (unix_shared_memory_queue_is_full (q)) + { + dsunlock (sm); + vl_msg_api_send_shmem_nolock (q, (u8 *) & mp); + unix_shared_memory_queue_unlock (q); + mp = 0; + ip46_fib_stats_delay (sm, 0 /* sec */ , + STATS_RELEASE_DELAY_NS); + goto again; + } + vl_msg_api_send_shmem_nolock (q, (u8 *) & mp); + unix_shared_memory_queue_unlock (q); + + items_this_message = IP4_FIB_COUNTER_BATCH_SIZE; + mp = vl_msg_api_alloc_as_if_client + (sizeof (*mp) + + items_this_message * sizeof (vl_api_ip4_fib_counter_t)); + mp->_vl_msg_id = ntohs (VL_API_VNET_IP4_FIB_COUNTERS); + mp->count = 0; + mp->vrf_id = ntohl (fib->ft_table_id); + ctrp = (vl_api_ip4_fib_counter_t *) mp->c; + } + } /* for each (mp or single) adj */ + if (sm->data_structure_lock->release_hint) + { + start_at_fib_index = fib - im4->fibs; + dsunlock (sm); + ip46_fib_stats_delay (sm, 0 /* sec */ , STATS_RELEASE_DELAY_NS); + mp->count = 0; + ctrp = (vl_api_ip4_fib_counter_t *) mp->c; + goto again; + } } /* vec_foreach (routes) */ - dsunlock (sm); + dsunlock (sm); - /* Flush any data from this fib */ - if (mp->count) - { - mp->count = htonl (mp->count); - vl_msg_api_send_shmem (q, (u8 *) & mp); - mp = 0; - } - })); - /* *INDENT-ON* */ + /* Flush any data from this fib */ + if (mp->count) + { + mp->count = htonl (mp->count); + vl_msg_api_send_shmem (q, (u8 *) & mp); + mp = 0; + } + } /* If e.g. the last FIB had no reportable routes, free the buffer */ if (mp) vl_msg_api_free (mp); } -typedef struct -{ - ip6_address_t address; - u32 address_length; - u32 index; -} ip6_route_t; - typedef struct { u32 fib_index; @@ -769,137 +881,144 @@ do_ip6_fibs (stats_main_t * sm) api_main_t *am = sm->api_main; vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr; unix_shared_memory_queue_t *q = shmem_hdr->vl_input_queue; - static ip6_route_t *routes; ip6_route_t *r; fib_table_t *fib; - static uword *results; + do_ip46_fibs_t *do_fibs; vl_api_vnet_ip6_fib_counters_t *mp = 0; u32 items_this_message; vl_api_ip6_fib_counter_t *ctrp = 0; u32 start_at_fib_index = 0; BVT (clib_bihash) * h = &im6->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash; add_routes_in_fib_arg_t _a, *a = &_a; + int i; + do_fibs = &sm->do_ip46_fibs; again: + vec_reset_length (do_fibs->fibs); /* *INDENT-OFF* */ pool_foreach (fib, im6->fibs, - ({ - /* We may have bailed out due to control-plane activity */ - while ((fib - im6->fibs) < start_at_fib_index) - continue; - - if (mp == 0) - { - items_this_message = IP6_FIB_COUNTER_BATCH_SIZE; - mp = vl_msg_api_alloc_as_if_client - (sizeof (*mp) + - items_this_message * sizeof (vl_api_ip6_fib_counter_t)); - mp->_vl_msg_id = ntohs (VL_API_VNET_IP6_FIB_COUNTERS); - mp->count = 0; - mp->vrf_id = ntohl (fib->ft_table_id); - ctrp = (vl_api_ip6_fib_counter_t *) mp->c; - } + ({vec_add1(do_fibs->fibs,fib);})); + /* *INDENT-ON* */ - dslock (sm, 0 /* release hint */ , 1 /* tag */ ); - vec_reset_length (routes); - vec_reset_length (results); + for (i = 0; i < vec_len (do_fibs->fibs); i++) + { + fib = do_fibs->fibs[i]; + /* We may have bailed out due to control-plane activity */ + while ((fib - im6->fibs) < start_at_fib_index) + continue; - a->fib_index = fib - im6->fibs; - a->routep = &routes; - a->sm = sm; + if (mp == 0) + { + items_this_message = IP6_FIB_COUNTER_BATCH_SIZE; + mp = vl_msg_api_alloc_as_if_client + (sizeof (*mp) + + items_this_message * sizeof (vl_api_ip6_fib_counter_t)); + mp->_vl_msg_id = ntohs (VL_API_VNET_IP6_FIB_COUNTERS); + mp->count = 0; + mp->vrf_id = ntohl (fib->ft_table_id); + ctrp = (vl_api_ip6_fib_counter_t *) mp->c; + } - if (clib_setjmp (&sm->jmp_buf, 0) == 0) - { - start_at_fib_index = fib - im6->fibs; - BV (clib_bihash_foreach_key_value_pair) (h, add_routes_in_fib, a); - } - else - { - dsunlock (sm); - ip46_fib_stats_delay (sm, 0 /* sec */ , - STATS_RELEASE_DELAY_NS); - mp->count = 0; - ctrp = (vl_api_ip6_fib_counter_t *) mp->c; - goto again; - } + dslock (sm, 0 /* release hint */ , 1 /* tag */ ); - vec_foreach (r, routes) - { - vlib_counter_t c; - - vlib_get_combined_counter (&load_balance_main.lbm_to_counters, - r->index, &c); - /* - * If it has actually - * seen at least one packet, send it. - */ - if (c.packets > 0) - { - /* already in net byte order */ - ctrp->address[0] = r->address.as_u64[0]; - ctrp->address[1] = r->address.as_u64[1]; - ctrp->address_length = (u8) r->address_length; - ctrp->packets = clib_host_to_net_u64 (c.packets); - ctrp->bytes = clib_host_to_net_u64 (c.bytes); - mp->count++; - ctrp++; - - if (mp->count == items_this_message) - { - mp->count = htonl (items_this_message); - /* - * If the main thread's input queue is stuffed, - * drop the data structure lock (which the main thread - * may want), and take a pause. - */ - unix_shared_memory_queue_lock (q); - if (unix_shared_memory_queue_is_full (q)) - { - dsunlock (sm); - vl_msg_api_send_shmem_nolock (q, (u8 *) & mp); - unix_shared_memory_queue_unlock (q); - mp = 0; - ip46_fib_stats_delay (sm, 0 /* sec */ , - STATS_RELEASE_DELAY_NS); - goto again; - } - vl_msg_api_send_shmem_nolock (q, (u8 *) & mp); - unix_shared_memory_queue_unlock (q); - - items_this_message = IP6_FIB_COUNTER_BATCH_SIZE; - mp = vl_msg_api_alloc_as_if_client - (sizeof (*mp) + - items_this_message * sizeof (vl_api_ip6_fib_counter_t)); - mp->_vl_msg_id = ntohs (VL_API_VNET_IP6_FIB_COUNTERS); - mp->count = 0; - mp->vrf_id = ntohl (fib->ft_table_id); - ctrp = (vl_api_ip6_fib_counter_t *) mp->c; - } - } + vec_reset_length (do_fibs->ip6routes); + vec_reset_length (do_fibs->results); - if (sm->data_structure_lock->release_hint) - { - start_at_fib_index = fib - im6->fibs; - dsunlock (sm); - ip46_fib_stats_delay (sm, 0 /* sec */ , STATS_RELEASE_DELAY_NS); - mp->count = 0; - ctrp = (vl_api_ip6_fib_counter_t *) mp->c; - goto again; - } - } /* vec_foreach (routes) */ + a->fib_index = fib - im6->fibs; + a->routep = &do_fibs->ip6routes; + a->sm = sm; - dsunlock (sm); + if (clib_setjmp (&sm->jmp_buf, 0) == 0) + { + start_at_fib_index = fib - im6->fibs; + BV (clib_bihash_foreach_key_value_pair) (h, add_routes_in_fib, a); + } + else + { + dsunlock (sm); + ip46_fib_stats_delay (sm, 0 /* sec */ , + STATS_RELEASE_DELAY_NS); + mp->count = 0; + ctrp = (vl_api_ip6_fib_counter_t *) mp->c; + goto again; + } - /* Flush any data from this fib */ - if (mp->count) + vec_foreach (r, do_fibs->ip6routes) { - mp->count = htonl (mp->count); - vl_msg_api_send_shmem (q, (u8 *) & mp); - mp = 0; - } - })); - /* *INDENT-ON* */ + vlib_counter_t c; + + vlib_get_combined_counter (&load_balance_main.lbm_to_counters, + r->index, &c); + /* + * If it has actually + * seen at least one packet, send it. + */ + if (c.packets > 0) + { + /* already in net byte order */ + ctrp->address[0] = r->address.as_u64[0]; + ctrp->address[1] = r->address.as_u64[1]; + ctrp->address_length = (u8) r->address_length; + ctrp->packets = clib_host_to_net_u64 (c.packets); + ctrp->bytes = clib_host_to_net_u64 (c.bytes); + mp->count++; + ctrp++; + + if (mp->count == items_this_message) + { + mp->count = htonl (items_this_message); + /* + * If the main thread's input queue is stuffed, + * drop the data structure lock (which the main thread + * may want), and take a pause. + */ + unix_shared_memory_queue_lock (q); + if (unix_shared_memory_queue_is_full (q)) + { + dsunlock (sm); + vl_msg_api_send_shmem_nolock (q, (u8 *) & mp); + unix_shared_memory_queue_unlock (q); + mp = 0; + ip46_fib_stats_delay (sm, 0 /* sec */ , + STATS_RELEASE_DELAY_NS); + goto again; + } + vl_msg_api_send_shmem_nolock (q, (u8 *) & mp); + unix_shared_memory_queue_unlock (q); + + items_this_message = IP6_FIB_COUNTER_BATCH_SIZE; + mp = vl_msg_api_alloc_as_if_client + (sizeof (*mp) + + items_this_message * sizeof (vl_api_ip6_fib_counter_t)); + mp->_vl_msg_id = ntohs (VL_API_VNET_IP6_FIB_COUNTERS); + mp->count = 0; + mp->vrf_id = ntohl (fib->ft_table_id); + ctrp = (vl_api_ip6_fib_counter_t *) mp->c; + } + } + + if (sm->data_structure_lock->release_hint) + { + start_at_fib_index = fib - im6->fibs; + dsunlock (sm); + ip46_fib_stats_delay (sm, 0 /* sec */ , STATS_RELEASE_DELAY_NS); + mp->count = 0; + ctrp = (vl_api_ip6_fib_counter_t *) mp->c; + goto again; + } + } /* vec_foreach (routes) */ + + dsunlock (sm); + + /* Flush any data from this fib */ + if (mp->count) + { + mp->count = htonl (mp->count); + vl_msg_api_send_shmem (q, (u8 *) & mp); + mp = 0; + } + } /* If e.g. the last FIB had no reportable routes, free the buffer */ if (mp) @@ -946,84 +1065,45 @@ static void vl_api_vnet_interface_simple_counters_t_handler (vl_api_vnet_interface_simple_counters_t * mp) { - vpe_client_registration_t *reg; + vpe_client_stats_registration_t *reg; stats_main_t *sm = &stats_main; unix_shared_memory_queue_t *q, *q_prev = NULL; vl_api_vnet_interface_simple_counters_t *mp_copy = NULL; u32 mp_size; - -#if STATS_DEBUG > 0 - char *counter_name; - u32 count, sw_if_index; int i; -#endif mp_size = sizeof (*mp) + (ntohl (mp->count) * sizeof (u64)); /* *INDENT-OFF* */ + vec_reset_length(sm->regs); pool_foreach(reg, sm->stats_registrations, ({ - q = vl_api_client_index_to_input_queue (reg->client_index); - if (q) - { - if (q_prev && (q_prev->cursize < q_prev->maxsize)) - { - mp_copy = vl_msg_api_alloc_as_if_client(mp_size); - clib_memcpy(mp_copy, mp, mp_size); - vl_msg_api_send_shmem (q_prev, (u8 *)&mp); - mp = mp_copy; - } - q_prev = q; - } + vec_add1(sm->regs,reg); })); /* *INDENT-ON* */ - -#if STATS_DEBUG > 0 - count = ntohl (mp->count); - sw_if_index = ntohl (mp->first_sw_if_index); - u64 *vp, v; - vp = (u64 *) mp->data; - - switch (mp->vnet_counter_type) - { - case VNET_INTERFACE_COUNTER_DROP: - counter_name = "drop"; - break; - case VNET_INTERFACE_COUNTER_PUNT: - counter_name = "punt"; - break; - case VNET_INTERFACE_COUNTER_IP4: - counter_name = "ip4"; - break; - case VNET_INTERFACE_COUNTER_IP6: - counter_name = "ip6"; - break; - case VNET_INTERFACE_COUNTER_RX_NO_BUF: - counter_name = "rx-no-buff"; - break; - case VNET_INTERFACE_COUNTER_RX_MISS: - , counter_name = "rx-miss"; - break; - case VNET_INTERFACE_COUNTER_RX_ERROR: - , counter_name = "rx-error (fifo-full)"; - break; - case VNET_INTERFACE_COUNTER_TX_ERROR: - , counter_name = "tx-error (fifo-full)"; - break; - default: - counter_name = "bogus"; - break; - } - for (i = 0; i < count; i++) + for (i = 0; i < vec_len (sm->regs); i++) { - v = clib_mem_unaligned (vp, u64); - v = clib_net_to_host_u64 (v); - vp++; - fformat (stdout, "%U.%s %lld\n", format_vnet_sw_if_index_name, - sm->vnet_main, sw_if_index, counter_name, v); - sw_if_index++; + reg = sm->regs[i]; + if (reg->stats_registrations & INTERFACE_SIMPLE_COUNTERS) + { + q = vl_api_client_index_to_input_queue (reg->client.client_index); + if (q) + { + if (q_prev && (q_prev->cursize < q_prev->maxsize)) + { + mp_copy = vl_msg_api_alloc_as_if_client (mp_size); + clib_memcpy (mp_copy, mp, mp_size); + vl_msg_api_send_shmem (q_prev, (u8 *) & mp); + mp = mp_copy; + } + q_prev = q; + } + } } +#if STATS_DEBUG > 0 + fformat (stdout, "%U\n", format_vnet_simple_counters, mp); #endif + if (q_prev && (q_prev->cursize < q_prev->maxsize)) { vl_msg_api_send_shmem (q_prev, (u8 *) & mp); @@ -1038,75 +1118,39 @@ static void vl_api_vnet_interface_combined_counters_t_handler (vl_api_vnet_interface_combined_counters_t * mp) { - vpe_client_registration_t *reg; + vpe_client_stats_registration_t *reg; stats_main_t *sm = &stats_main; unix_shared_memory_queue_t *q, *q_prev = NULL; vl_api_vnet_interface_combined_counters_t *mp_copy = NULL; u32 mp_size; -#if STATS_DEBUG > 0 - char *counter_name; - u32 count, sw_if_index; - int i; -#endif - mp_size = sizeof (*mp) + (ntohl (mp->count) * sizeof (vlib_counter_t)); /* *INDENT-OFF* */ pool_foreach(reg, sm->stats_registrations, ({ - q = vl_api_client_index_to_input_queue (reg->client_index); - if (q) - { - if (q_prev && (q_prev->cursize < q_prev->maxsize)) - { - mp_copy = vl_msg_api_alloc_as_if_client(mp_size); - clib_memcpy(mp_copy, mp, mp_size); - vl_msg_api_send_shmem (q_prev, (u8 *)&mp); - mp = mp_copy; - } - q_prev = q; - } + if (reg->stats_registrations & INTERFACE_COMBINED_COUNTERS) + { + q = vl_api_client_index_to_input_queue (reg->client.client_index); + if (q) + { + if (q_prev && (q_prev->cursize < q_prev->maxsize)) + { + mp_copy = vl_msg_api_alloc_as_if_client(mp_size); + clib_memcpy(mp_copy, mp, mp_size); + vl_msg_api_send_shmem (q_prev, (u8 *)&mp); + mp = mp_copy; + } + q_prev = q; + } + } })); /* *INDENT-ON* */ #if STATS_DEBUG > 0 - count = ntohl (mp->count); - sw_if_index = ntohl (mp->first_sw_if_index); - - vlib_counter_t *vp; - u64 packets, bytes; - vp = (vlib_counter_t *) mp->data; - - switch (mp->vnet_counter_type) - { - case VNET_INTERFACE_COUNTER_RX: - counter_name = "rx"; - break; - case VNET_INTERFACE_COUNTER_TX: - counter_name = "tx"; - break; - default: - counter_name = "bogus"; - break; - } - for (i = 0; i < count; i++) - { - packets = clib_mem_unaligned (&vp->packets, u64); - packets = clib_net_to_host_u64 (packets); - bytes = clib_mem_unaligned (&vp->bytes, u64); - bytes = clib_net_to_host_u64 (bytes); - vp++; - fformat (stdout, "%U.%s.packets %lld\n", - format_vnet_sw_if_index_name, - sm->vnet_main, sw_if_index, counter_name, packets); - fformat (stdout, "%U.%s.bytes %lld\n", - format_vnet_sw_if_index_name, - sm->vnet_main, sw_if_index, counter_name, bytes); - sw_if_index++; - } - + fformat (stdout, "%U\n", format_vnet_combined_counters, mp); #endif + if (q_prev && (q_prev->cursize < q_prev->maxsize)) { vl_msg_api_send_shmem (q_prev, (u8 *) & mp); @@ -1120,7 +1164,7 @@ static void static void vl_api_vnet_ip4_fib_counters_t_handler (vl_api_vnet_ip4_fib_counters_t * mp) { - vpe_client_registration_t *reg; + vpe_client_stats_registration_t *reg; stats_main_t *sm = &stats_main; unix_shared_memory_queue_t *q, *q_prev = NULL; vl_api_vnet_ip4_fib_counters_t *mp_copy = NULL; @@ -1132,17 +1176,20 @@ vl_api_vnet_ip4_fib_counters_t_handler (vl_api_vnet_ip4_fib_counters_t * mp) /* *INDENT-OFF* */ pool_foreach(reg, sm->stats_registrations, ({ - q = vl_api_client_index_to_input_queue (reg->client_index); - if (q) + if (reg->stats_registrations & IP4_FIB_COUNTERS) { - if (q_prev && (q_prev->cursize < q_prev->maxsize)) + q = vl_api_client_index_to_input_queue (reg->client.client_index); + if (q) { - mp_copy = vl_msg_api_alloc_as_if_client(mp_size); - clib_memcpy(mp_copy, mp, mp_size); - vl_msg_api_send_shmem (q_prev, (u8 *)&mp); - mp = mp_copy; + if (q_prev && (q_prev->cursize < q_prev->maxsize)) + { + mp_copy = vl_msg_api_alloc_as_if_client(mp_size); + clib_memcpy(mp_copy, mp, mp_size); + vl_msg_api_send_shmem (q_prev, (u8 *)&mp); + mp = mp_copy; + } + q_prev = q; } - q_prev = q; } })); /* *INDENT-ON* */ @@ -1159,7 +1206,7 @@ vl_api_vnet_ip4_fib_counters_t_handler (vl_api_vnet_ip4_fib_counters_t * mp) static void vl_api_vnet_ip4_nbr_counters_t_handler (vl_api_vnet_ip4_nbr_counters_t * mp) { - vpe_client_registration_t *reg; + vpe_client_stats_registration_t *reg; stats_main_t *sm = &stats_main; unix_shared_memory_queue_t *q, *q_prev = NULL; vl_api_vnet_ip4_nbr_counters_t *mp_copy = NULL; @@ -1171,17 +1218,20 @@ vl_api_vnet_ip4_nbr_counters_t_handler (vl_api_vnet_ip4_nbr_counters_t * mp) /* *INDENT-OFF* */ pool_foreach(reg, sm->stats_registrations, ({ - q = vl_api_client_index_to_input_queue (reg->client_index); - if (q) + if (reg->stats_registrations & IP4_NBR_COUNTERS) { - if (q_prev && (q_prev->cursize < q_prev->maxsize)) + q = vl_api_client_index_to_input_queue (reg->client.client_index); + if (q) { - mp_copy = vl_msg_api_alloc_as_if_client(mp_size); - clib_memcpy(mp_copy, mp, mp_size); - vl_msg_api_send_shmem (q_prev, (u8 *)&mp); - mp = mp_copy; + if (q_prev && (q_prev->cursize < q_prev->maxsize)) + { + mp_copy = vl_msg_api_alloc_as_if_client(mp_size); + clib_memcpy(mp_copy, mp, mp_size); + vl_msg_api_send_shmem (q_prev, (u8 *)&mp); + mp = mp_copy; + } + q_prev = q; } - q_prev = q; } })); /* *INDENT-ON* */ @@ -1198,7 +1248,7 @@ vl_api_vnet_ip4_nbr_counters_t_handler (vl_api_vnet_ip4_nbr_counters_t * mp) static void vl_api_vnet_ip6_fib_counters_t_handler (vl_api_vnet_ip6_fib_counters_t * mp) { - vpe_client_registration_t *reg; + vpe_client_stats_registration_t *reg; stats_main_t *sm = &stats_main; unix_shared_memory_queue_t *q, *q_prev = NULL; vl_api_vnet_ip6_fib_counters_t *mp_copy = NULL; @@ -1210,19 +1260,22 @@ vl_api_vnet_ip6_fib_counters_t_handler (vl_api_vnet_ip6_fib_counters_t * mp) /* *INDENT-OFF* */ pool_foreach(reg, sm->stats_registrations, ({ - q = vl_api_client_index_to_input_queue (reg->client_index); - if (q) + if (reg->stats_registrations & IP6_FIB_COUNTERS) { - if (q_prev && (q_prev->cursize < q_prev->maxsize)) + q = vl_api_client_index_to_input_queue (reg->client.client_index); + if (q) { - mp_copy = vl_msg_api_alloc_as_if_client(mp_size); - clib_memcpy(mp_copy, mp, mp_size); - vl_msg_api_send_shmem (q_prev, (u8 *)&mp); - mp = mp_copy; + if (q_prev && (q_prev->cursize < q_prev->maxsize)) + { + mp_copy = vl_msg_api_alloc_as_if_client(mp_size); + clib_memcpy(mp_copy, mp, mp_size); + vl_msg_api_send_shmem (q_prev, (u8 *)&mp); + mp = mp_copy; + } + q_prev = q; } - q_prev = q; } - })); + })); /* *INDENT-ON* */ if (q_prev && (q_prev->cursize < q_prev->maxsize)) { @@ -1237,7 +1290,7 @@ vl_api_vnet_ip6_fib_counters_t_handler (vl_api_vnet_ip6_fib_counters_t * mp) static void vl_api_vnet_ip6_nbr_counters_t_handler (vl_api_vnet_ip6_nbr_counters_t * mp) { - vpe_client_registration_t *reg; + vpe_client_stats_registration_t *reg; stats_main_t *sm = &stats_main; unix_shared_memory_queue_t *q, *q_prev = NULL; vl_api_vnet_ip6_nbr_counters_t *mp_copy = NULL; @@ -1249,17 +1302,20 @@ vl_api_vnet_ip6_nbr_counters_t_handler (vl_api_vnet_ip6_nbr_counters_t * mp) /* *INDENT-OFF* */ pool_foreach(reg, sm->stats_registrations, ({ - q = vl_api_client_index_to_input_queue (reg->client_index); - if (q) + if (reg->stats_registrations & IP6_NBR_COUNTERS) { - if (q_prev && (q_prev->cursize < q_prev->maxsize)) + q = vl_api_client_index_to_input_queue (reg->client.client_index); + if (q) { - mp_copy = vl_msg_api_alloc_as_if_client(mp_size); - clib_memcpy(mp_copy, mp, mp_size); - vl_msg_api_send_shmem (q_prev, (u8 *)&mp); - mp = mp_copy; + if (q_prev && (q_prev->cursize < q_prev->maxsize)) + { + mp_copy = vl_msg_api_alloc_as_if_client(mp_size); + clib_memcpy(mp_copy, mp, mp_size); + vl_msg_api_send_shmem (q_prev, (u8 *)&mp); + mp = mp_copy; + } + q_prev = q; } - q_prev = q; } })); /* *INDENT-ON* */ @@ -1277,7 +1333,7 @@ static void vl_api_want_stats_t_handler (vl_api_want_stats_t * mp) { stats_main_t *sm = &stats_main; - vpe_client_registration_t *rp; + vpe_client_stats_registration_t *rp; vl_api_want_stats_reply_t *rmp; uword *p; i32 retval = 0; @@ -1307,9 +1363,16 @@ vl_api_want_stats_t_handler (vl_api_want_stats_t * mp) goto reply; } pool_get (sm->stats_registrations, rp); - rp->client_index = mp->client_index; - rp->client_pid = mp->pid; - hash_set (sm->stats_registration_hash, rp->client_index, + rp->client.client_index = mp->client_index; + rp->client.client_pid = mp->pid; + rp->stats_registrations |= INTERFACE_SIMPLE_COUNTERS; + rp->stats_registrations |= INTERFACE_COMBINED_COUNTERS; + rp->stats_registrations |= IP4_FIB_COUNTERS; + rp->stats_registrations |= IP4_NBR_COUNTERS; + rp->stats_registrations |= IP6_FIB_COUNTERS; + rp->stats_registrations |= IP6_NBR_COUNTERS; + + hash_set (sm->stats_registration_hash, rp->client.client_index, rp - sm->stats_registrations); reply: @@ -1331,10 +1394,386 @@ reply: vl_msg_api_send_shmem (q, (u8 *) & rmp); } +static void + vl_api_want_interface_simple_stats_t_handler + (vl_api_want_interface_simple_stats_t * mp) +{ + stats_main_t *sm = &stats_main; + vpe_client_stats_registration_t *rp; + vl_api_want_interface_simple_stats_reply_t *rmp; + uword *p; + i32 retval = 0; + unix_shared_memory_queue_t *q; + + p = hash_get (sm->stats_registration_hash, mp->client_index); + + /* Disable case */ + if (mp->enable_disable == 0) + { + if (!p) // No client to disable + { + clib_warning ("pid %d: already disabled for stats...", mp->pid); + retval = 0; + goto reply; + } + + rp = pool_elt_at_index (sm->stats_registrations, p[0]); + if (!rp->stats_registrations & INTERFACE_SIMPLE_COUNTERS) // Client but doesn't want this. + { + clib_warning + ("pid %d: already disabled for interface simple stats...", + mp->pid); + retval = 0; + goto reply; + } + else + { + rp->stats_registrations &= ~(INTERFACE_SIMPLE_COUNTERS); // Clear flag + if (rp->stats_registrations == 0) // Client isn't listening to anything else + { + pool_put (sm->stats_registrations, rp); + hash_unset (sm->stats_registration_hash, mp->client_index); + } + goto reply; + } + } + /* Enable case */ + /* Get client from pool */ + if (p) + rp = pool_elt_at_index (sm->stats_registrations, p[0]); + + if (!p || !rp) // Doesn't exist, make a new entry + { + pool_get (sm->stats_registrations, rp); + rp->client.client_index = mp->client_index; + rp->client.client_pid = mp->pid; + } + rp->stats_registrations |= INTERFACE_SIMPLE_COUNTERS; + hash_set (sm->stats_registration_hash, rp->client.client_index, + rp - sm->stats_registrations); + +reply: + if (pool_elts (sm->stats_registrations)) // Someone wants something, somewhere so enable globally for now. + sm->enable_poller = 1; + else + sm->enable_poller = 0; + + q = vl_api_client_index_to_input_queue (mp->client_index); + + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_WANT_INTERFACE_SIMPLE_STATS_REPLY); + rmp->context = mp->context; + rmp->retval = retval; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void + vl_api_want_interface_combined_stats_t_handler + (vl_api_want_interface_combined_stats_t * mp) +{ + stats_main_t *sm = &stats_main; + vpe_client_stats_registration_t *rp; + vl_api_want_interface_combined_stats_reply_t *rmp; + uword *p; + i32 retval = 0; + unix_shared_memory_queue_t *q; + + p = hash_get (sm->stats_registration_hash, mp->client_index); + + /* Disable case */ + if (mp->enable_disable == 0) + { + if (!p) // No client to disable + { + clib_warning ("pid %d: already disabled for stats...", mp->pid); + retval = 0; + goto reply; + } + + rp = pool_elt_at_index (sm->stats_registrations, p[0]); + if (!rp->stats_registrations & INTERFACE_COMBINED_COUNTERS) // Client but doesn't want this. + { + clib_warning + ("pid %d: already disabled for interface COMBINED stats...", + mp->pid); + retval = 0; + goto reply; + } + else + { + rp->stats_registrations &= ~(INTERFACE_COMBINED_COUNTERS); // Clear flag + if (rp->stats_registrations == 0) // Client isn't listening to anything else + { + pool_put (sm->stats_registrations, rp); + hash_unset (sm->stats_registration_hash, mp->client_index); + } + goto reply; + } + } + /* Enable case */ + /* Get client from pool */ + if (p) + rp = pool_elt_at_index (sm->stats_registrations, p[0]); + + if (!p || !rp) // Doesn't exist, make a new entry + { + pool_get (sm->stats_registrations, rp); + rp->client.client_index = mp->client_index; + rp->client.client_pid = mp->pid; + } + rp->stats_registrations |= INTERFACE_COMBINED_COUNTERS; + hash_set (sm->stats_registration_hash, rp->client.client_index, + rp - sm->stats_registrations); + +reply: + if (pool_elts (sm->stats_registrations)) // Someone wants something, somewhere so enable globally for now. + sm->enable_poller = 1; + else + sm->enable_poller = 0; + + q = vl_api_client_index_to_input_queue (mp->client_index); + + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_WANT_INTERFACE_COMBINED_STATS_REPLY); + rmp->context = mp->context; + rmp->retval = retval; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_want_ip4_fib_stats_t_handler (vl_api_want_ip4_fib_stats_t * mp) +{ + stats_main_t *sm = &stats_main; + vpe_client_stats_registration_t *rp; + vl_api_want_ip4_fib_stats_reply_t *rmp; + uword *p; + i32 retval = 0; + unix_shared_memory_queue_t *q; + + p = hash_get (sm->stats_registration_hash, mp->client_index); + + /* Disable case */ + /* + $$$ FIXME: need std return codes. Still undecided if enabling already + enabled (and similar for disabled) is really a -'ve error condition or + if 0 is sufficient + */ + if (mp->enable_disable == 0) + { + if (!p) // No client to disable + { + clib_warning ("pid %d: already disabled for stats...", mp->pid); + retval = -3; + goto reply; + } + + rp = pool_elt_at_index (sm->stats_registrations, p[0]); + if (!rp->stats_registrations & IP4_FIB_COUNTERS) // Client but doesn't want this. + { + clib_warning ("pid %d: already disabled for interface ip4 fib...", + mp->pid); + retval = -2; + goto reply; + } + else + { + rp->stats_registrations &= ~(IP4_FIB_COUNTERS); // Clear flag + if (rp->stats_registrations == 0) // Client isn't listening to anything else + { + pool_put (sm->stats_registrations, rp); + hash_unset (sm->stats_registration_hash, mp->client_index); + } + goto reply; + } + } + /* Enable case */ + /* Get client from pool */ + if (p) + rp = pool_elt_at_index (sm->stats_registrations, p[0]); + + if (!p || !rp) // Doesn't exist, make a new entry + { + pool_get (sm->stats_registrations, rp); + rp->client.client_index = mp->client_index; + rp->client.client_pid = mp->pid; + } + rp->stats_registrations |= IP4_FIB_COUNTERS; + hash_set (sm->stats_registration_hash, rp->client.client_index, + rp - sm->stats_registrations); + +reply: + if (pool_elts (sm->stats_registrations)) // Someone wants something, somewhere so enable globally for now. + sm->enable_poller = 1; + else + sm->enable_poller = 0; + + q = vl_api_client_index_to_input_queue (mp->client_index); + + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_WANT_IP4_FIB_STATS_REPLY); + rmp->context = mp->context; + rmp->retval = retval; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_want_ip6_fib_stats_t_handler (vl_api_want_ip6_fib_stats_t * mp) +{ + stats_main_t *sm = &stats_main; + vpe_client_stats_registration_t *rp; + vl_api_want_ip6_fib_stats_reply_t *rmp; + uword *p; + i32 retval = 0; + unix_shared_memory_queue_t *q; + + p = hash_get (sm->stats_registration_hash, mp->client_index); + + /* Disable case */ + /* + $$$ FIXME: need std return codes. Still undecided if enabling already + enabled (and similar for disabled) is really a -'ve error condition or + if 0 is sufficient + */ + if (mp->enable_disable == 0) + { + if (!p) // No client to disable + { + clib_warning ("pid %d: already disabled for stats...", mp->pid); + retval = -3; + goto reply; + } + + rp = pool_elt_at_index (sm->stats_registrations, p[0]); + if (!rp->stats_registrations & IP6_FIB_COUNTERS) // Client but doesn't want this. + { + clib_warning ("pid %d: already disabled for interface ip6 fib...", + mp->pid); + retval = -2; + goto reply; + } + else + { + rp->stats_registrations &= ~(IP6_FIB_COUNTERS); // Clear flag + if (rp->stats_registrations == 0) // Client isn't listening to anything else + { + pool_put (sm->stats_registrations, rp); + hash_unset (sm->stats_registration_hash, mp->client_index); + } + goto reply; + } + } + /* Enable case */ + /* Get client from pool */ + if (p) + rp = pool_elt_at_index (sm->stats_registrations, p[0]); + + if (!p || !rp) // Doesn't exist, make a new entry + { + pool_get (sm->stats_registrations, rp); + rp->client.client_index = mp->client_index; + rp->client.client_pid = mp->pid; + } + rp->stats_registrations |= IP6_FIB_COUNTERS; + hash_set (sm->stats_registration_hash, rp->client.client_index, + rp - sm->stats_registrations); + +reply: + if (pool_elts (sm->stats_registrations)) // Someone wants something, somewhere so enable globally for now. + sm->enable_poller = 1; + else + sm->enable_poller = 0; + + q = vl_api_client_index_to_input_queue (mp->client_index); + + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_WANT_IP6_FIB_STATS_REPLY); + rmp->context = mp->context; + rmp->retval = retval; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +/* FIXME - NBR stats broken - this will be fixed in subsequent patch */ +static void +vl_api_want_ip4_nbr_stats_t_handler (vl_api_want_ip4_nbr_stats_t * mp) +{ +} + +static void +vl_api_want_ip6_nbr_stats_t_handler (vl_api_want_ip6_nbr_stats_t * mp) +{ +} + +static void +vl_api_vnet_get_summary_stats_t_handler (vl_api_vnet_get_summary_stats_t * mp) +{ + stats_main_t *sm = &stats_main; + vnet_interface_main_t *im = sm->interface_main; + vl_api_vnet_get_summary_stats_reply_t *rmp; + vlib_combined_counter_main_t *cm; + vlib_counter_t v; + int i, which; + u64 total_pkts[VLIB_N_RX_TX]; + u64 total_bytes[VLIB_N_RX_TX]; + + unix_shared_memory_queue_t *q = + vl_api_client_index_to_input_queue (mp->client_index); + + if (!q) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_VNET_GET_SUMMARY_STATS_REPLY); + rmp->context = mp->context; + rmp->retval = 0; + + memset (total_pkts, 0, sizeof (total_pkts)); + memset (total_bytes, 0, sizeof (total_bytes)); + + vnet_interface_counter_lock (im); + + vec_foreach (cm, im->combined_sw_if_counters) + { + which = cm - im->combined_sw_if_counters; + + for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) + { + vlib_get_combined_counter (cm, i, &v); + total_pkts[which] += v.packets; + total_bytes[which] += v.bytes; + } + } + vnet_interface_counter_unlock (im); + + rmp->total_pkts[VLIB_RX] = clib_host_to_net_u64 (total_pkts[VLIB_RX]); + rmp->total_bytes[VLIB_RX] = clib_host_to_net_u64 (total_bytes[VLIB_RX]); + rmp->total_pkts[VLIB_TX] = clib_host_to_net_u64 (total_pkts[VLIB_TX]); + rmp->total_bytes[VLIB_TX] = clib_host_to_net_u64 (total_bytes[VLIB_TX]); + rmp->vector_rate = + clib_host_to_net_u64 (vlib_last_vector_length_per_node (sm->vlib_main)); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + int stats_memclnt_delete_callback (u32 client_index) { - vpe_client_registration_t *rp; + vpe_client_stats_registration_t *rp; stats_main_t *sm = &stats_main; uword *p; diff --git a/src/vpp/stats/stats.h b/src/vpp/stats/stats.h index 024dc78e..167b09bd 100644 --- a/src/vpp/stats/stats.h +++ b/src/vpp/stats/stats.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,44 @@ typedef struct int tag; } data_structure_lock_t; +typedef struct +{ + vpe_client_registration_t client; + u8 stats_registrations; +#define INTERFACE_SIMPLE_COUNTERS (1 << 0) +#define INTERFACE_COMBINED_COUNTERS (1 << 1) +#define IP4_FIB_COUNTERS (1 << 2) +#define IP4_NBR_COUNTERS (1 << 3) +#define IP6_FIB_COUNTERS (1 << 4) +#define IP6_NBR_COUNTERS (1 << 5) + +} vpe_client_stats_registration_t; + +/* from .../vnet/vnet/ip/lookup.c. Yuck */ +typedef CLIB_PACKED (struct + { + ip4_address_t address; +u32 address_length: 6; +u32 index: 26; + }) ip4_route_t; + +typedef struct +{ + ip6_address_t address; + u32 address_length; + u32 index; +} ip6_route_t; + + +typedef struct +{ + ip4_route_t *ip4routes; + ip6_route_t *ip6routes; + fib_table_t **fibs; + hash_pair_t **pvec; + uword *results; +} do_ip46_fibs_t; + typedef struct { void *mheap; @@ -45,7 +84,8 @@ typedef struct u32 enable_poller; uword *stats_registration_hash; - vpe_client_registration_t *stats_registrations; + vpe_client_stats_registration_t *stats_registrations; + vpe_client_stats_registration_t **regs; /* control-plane data structure lock */ data_structure_lock_t *data_structure_lock; @@ -53,6 +93,9 @@ typedef struct /* bail out of FIB walk if set */ clib_longjmp_t jmp_buf; + /* Vectors for Distribution funcs: do_ip4_fibs and do_ip6_fibs. */ + do_ip46_fibs_t do_ip46_fibs; + /* convenience */ vlib_main_t *vlib_main; vnet_main_t *vnet_main; -- cgit 1.2.3-korg From 1500254bee11355bbd69cc1dd9705be4f002f2bd Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Sun, 10 Sep 2017 04:39:11 -0700 Subject: FIB table add/delete API part 2; - this adds the code to create an IP and MPLS table via the API. - but the enforcement that the table must be created before it is used is still missing, this is so that CSIT can pass. Change-Id: Id124d884ade6cb7da947225200e3bb193454c555 Signed-off-by: Neale Ranns --- src/plugins/nat/nat.c | 17 +- src/plugins/nat/nat64.c | 13 +- src/vnet/classify/vnet_classify.c | 16 +- src/vnet/dhcp/dhcp4_proxy_node.c | 9 +- src/vnet/dhcp/dhcp6_proxy_node.c | 9 +- src/vnet/dhcp/dhcp_proxy.c | 19 ++- src/vnet/dpo/lookup_dpo.c | 20 ++- src/vnet/dpo/mpls_label_dpo.c | 12 +- src/vnet/ethernet/arp.c | 127 +++++++++++---- src/vnet/fib/fib_api.h | 1 - src/vnet/fib/fib_entry.c | 15 +- src/vnet/fib/fib_entry.h | 1 + src/vnet/fib/fib_entry_src_mpls.c | 7 +- src/vnet/fib/fib_table.c | 43 +++-- src/vnet/fib/fib_table.h | 32 +++- src/vnet/fib/fib_test.c | 27 ++-- src/vnet/fib/ip4_fib.c | 41 +++-- src/vnet/fib/ip4_fib.h | 5 +- src/vnet/fib/ip6_fib.c | 41 +++-- src/vnet/fib/ip6_fib.h | 5 +- src/vnet/fib/mpls_fib.c | 16 +- src/vnet/fib/mpls_fib.h | 5 +- src/vnet/interface_api.c | 177 ++++++++++++++++---- src/vnet/ip/ip.h | 7 + src/vnet/ip/ip4.h | 13 ++ src/vnet/ip/ip4_forward.c | 101 +----------- src/vnet/ip/ip4_source_and_port_range_check.c | 11 +- src/vnet/ip/ip6.h | 13 ++ src/vnet/ip/ip6_forward.c | 103 +----------- src/vnet/ip/ip6_neighbor.c | 108 +++++++++---- src/vnet/ip/ip_api.c | 122 +++++++++++--- src/vnet/ip/lookup.c | 225 ++++++++++++++++++++++++++ src/vnet/lisp-gpe/interface.c | 11 +- src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c | 9 +- src/vnet/lisp-gpe/lisp_gpe_sub_interface.c | 11 +- src/vnet/mfib/ip4_mfib.c | 12 +- src/vnet/mfib/ip4_mfib.h | 5 +- src/vnet/mfib/ip6_mfib.c | 12 +- src/vnet/mfib/ip6_mfib.h | 5 +- src/vnet/mfib/mfib_entry.c | 11 ++ src/vnet/mfib/mfib_entry.h | 2 + src/vnet/mfib/mfib_table.c | 88 ++++++++-- src/vnet/mfib/mfib_table.h | 29 +++- src/vnet/mfib/mfib_test.c | 11 +- src/vnet/mfib/mfib_types.h | 8 +- src/vnet/mpls/interface.c | 26 ++- src/vnet/mpls/mpls.c | 76 ++++++++- src/vnet/mpls/mpls.h | 16 +- src/vnet/mpls/mpls_api.c | 66 ++++++-- src/vnet/srv6/sr_policy_rewrite.c | 6 +- src/vnet/srv6/sr_steering.c | 6 +- src/vpp/api/api.c | 5 +- src/vpp/api/custom_dump.c | 3 - test/test_dhcp.py | 24 ++- test/test_gre.py | 8 +- test/test_ip4.py | 11 +- test/test_ip4_vrf_multi_instance.py | 4 +- test/test_ip6.py | 7 +- test/test_ip6_vrf_multi_instance.py | 4 +- test/test_ip_mcast.py | 98 ++++++++++- test/test_mpls.py | 48 +++++- test/test_nat.py | 13 ++ test/test_neighbor.py | 66 +++++++- test/vpp_ip_route.py | 73 +++++++++ test/vpp_papi_provider.py | 46 ++++-- 65 files changed, 1643 insertions(+), 538 deletions(-) (limited to 'src/vpp/api') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index aa7ef10a..8aecac6d 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -167,7 +167,8 @@ void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id) ap->addr = *addr; if (vrf_id != ~0) ap->fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + FIB_SOURCE_PLUGIN_HI); else ap->fib_index = ~0; #define _(N, i, n, s) \ @@ -625,7 +626,8 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, return VNET_API_ERROR_INVALID_VALUE; fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - vrf_id); + vrf_id, + FIB_SOURCE_PLUGIN_HI); /* Find external address in allocated addresses and reserve port for address and port pair mapping when dynamic translations enabled */ @@ -754,7 +756,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, if (!m) return VNET_API_ERROR_NO_SUCH_ENTRY; - fib_table_unlock (m->fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock (m->fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_PLUGIN_HI); /* Free external address port */ if (!sm->static_mapping_only) @@ -874,7 +876,8 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) } if (a->fib_index != ~0) - fib_table_unlock(a->fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock(a->fib_index, FIB_PROTOCOL_IP4, + FIB_SOURCE_PLUGIN_HI); /* Delete sessions using address */ if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports) @@ -2151,10 +2154,12 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) sm->max_translations_per_user = max_translations_per_user; sm->outside_vrf_id = outside_vrf_id; sm->outside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - outside_vrf_id); + outside_vrf_id, + FIB_SOURCE_PLUGIN_HI); sm->inside_vrf_id = inside_vrf_id; sm->inside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - inside_vrf_id); + inside_vrf_id, + FIB_SOURCE_PLUGIN_HI); sm->static_mapping_only = static_mapping_only; sm->static_mapping_connection_tracking = static_mapping_connection_tracking; diff --git a/src/plugins/nat/nat64.c b/src/plugins/nat/nat64.c index b04901fa..bfcfa9b3 100644 --- a/src/plugins/nat/nat64.c +++ b/src/plugins/nat/nat64.c @@ -107,7 +107,8 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) a->fib_index = 0; if (vrf_id != ~0) a->fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + FIB_SOURCE_PLUGIN_HI); #define _(N, i, n, s) \ clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535); foreach_snat_protocol @@ -119,7 +120,8 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) return VNET_API_ERROR_NO_SUCH_ENTRY; if (a->fib_index) - fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6); + fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, + FIB_SOURCE_PLUGIN_HI); #define _(N, id, n, s) \ clib_bitmap_free (a->busy_##n##_port_bitmap); @@ -353,8 +355,8 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr, { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; - u32 fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id); + u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + FIB_SOURCE_PLUGIN_HI); snat_protocol_t p = ip_proto_to_snat_proto (proto); ip46_address_t addr; int i; @@ -644,7 +646,8 @@ nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, u8 is_add) { vec_add2 (nm->pref64, p, 1); p->fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + FIB_SOURCE_PLUGIN_HI); p->vrf_id = vrf_id; } diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c index 879fba3c..57d86748 100644 --- a/src/vnet/classify/vnet_classify.c +++ b/src/vnet/classify/vnet_classify.c @@ -368,10 +368,10 @@ vnet_classify_entry_claim_resource (vnet_classify_entry_t *e) switch (e->action) { case CLASSIFY_ACTION_SET_IP4_FIB_INDEX: - fib_table_lock (e->metadata, FIB_PROTOCOL_IP4); + fib_table_lock (e->metadata, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY); break; case CLASSIFY_ACTION_SET_IP6_FIB_INDEX: - fib_table_lock (e->metadata, FIB_PROTOCOL_IP6); + fib_table_lock (e->metadata, FIB_PROTOCOL_IP6, FIB_SOURCE_CLASSIFY); break; } } @@ -382,10 +382,10 @@ vnet_classify_entry_release_resource (vnet_classify_entry_t *e) switch (e->action) { case CLASSIFY_ACTION_SET_IP4_FIB_INDEX: - fib_table_unlock (e->metadata, FIB_PROTOCOL_IP4); + fib_table_unlock (e->metadata, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY); break; case CLASSIFY_ACTION_SET_IP6_FIB_INDEX: - fib_table_unlock (e->metadata, FIB_PROTOCOL_IP6); + fib_table_unlock (e->metadata, FIB_PROTOCOL_IP6, FIB_SOURCE_CLASSIFY); break; } } @@ -2096,9 +2096,13 @@ int vnet_classify_add_del_session (vnet_classify_main_t * cm, e->flags = 0; e->action = action; if (e->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX) - e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, metadata); + e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + metadata, + FIB_SOURCE_CLASSIFY); else if (e->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX) - e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, metadata); + e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, + metadata, + FIB_SOURCE_CLASSIFY); else e->metadata = 0; diff --git a/src/vnet/dhcp/dhcp4_proxy_node.c b/src/vnet/dhcp/dhcp4_proxy_node.c index 1b59cdea..339a7885 100644 --- a/src/vnet/dhcp/dhcp4_proxy_node.c +++ b/src/vnet/dhcp/dhcp4_proxy_node.c @@ -785,7 +785,8 @@ dhcp4_proxy_set_server (ip46_address_t *addr, return VNET_API_ERROR_INVALID_SRC_ADDRESS; rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, - rx_table_id); + rx_table_id, + FIB_SOURCE_DHCP); if (is_del) { @@ -795,7 +796,7 @@ dhcp4_proxy_set_server (ip46_address_t *addr, fib_table_entry_special_remove(rx_fib_index, &all_1s, FIB_SOURCE_DHCP); - fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP); } } else @@ -808,10 +809,10 @@ dhcp4_proxy_set_server (ip46_address_t *addr, &all_1s, FIB_SOURCE_DHCP, FIB_ENTRY_FLAG_LOCAL); - fib_table_lock (rx_fib_index, FIB_PROTOCOL_IP4); + fib_table_lock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP); } } - fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP); return (rc); } diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index 9c2f5220..ce7a8fca 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -841,7 +841,8 @@ dhcp6_proxy_set_server (ip46_address_t *addr, return VNET_API_ERROR_INVALID_SRC_ADDRESS; rx_fib_index = mfib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, - rx_table_id); + rx_table_id, + MFIB_SOURCE_DHCP); if (is_del) { @@ -851,7 +852,7 @@ dhcp6_proxy_set_server (ip46_address_t *addr, mfib_table_entry_delete(rx_fib_index, &all_dhcp_servers, MFIB_SOURCE_DHCP); - mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); + mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP); } } else @@ -885,11 +886,11 @@ dhcp6_proxy_set_server (ip46_address_t *addr, MFIB_SOURCE_DHCP, MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF); - mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6); + mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP); } } - mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); + mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP); return (rc); } diff --git a/src/vnet/dhcp/dhcp_proxy.c b/src/vnet/dhcp/dhcp_proxy.c index ba7f354e..1784906b 100644 --- a/src/vnet/dhcp/dhcp_proxy.c +++ b/src/vnet/dhcp/dhcp_proxy.c @@ -29,9 +29,9 @@ dhcp_proxy_rx_table_lock (fib_protocol_t proto, u32 fib_index) { if (FIB_PROTOCOL_IP4 == proto) - fib_table_lock(fib_index, proto); + fib_table_lock(fib_index, proto, FIB_SOURCE_DHCP); else - mfib_table_lock(fib_index, proto); + mfib_table_lock(fib_index, proto, MFIB_SOURCE_DHCP); } static void @@ -39,9 +39,9 @@ dhcp_proxy_rx_table_unlock (fib_protocol_t proto, u32 fib_index) { if (FIB_PROTOCOL_IP4 == proto) - fib_table_unlock(fib_index, proto); + fib_table_unlock(fib_index, proto, FIB_SOURCE_DHCP); else - mfib_table_unlock(fib_index, proto); + mfib_table_unlock(fib_index, proto, MFIB_SOURCE_DHCP); } u32 @@ -169,7 +169,7 @@ dhcp_proxy_server_del (fib_protocol_t proto, if (~0 != index) { server = &proxy->dhcp_servers[index]; - fib_table_unlock (server->server_fib_index, proto); + fib_table_unlock (server->server_fib_index, proto, FIB_SOURCE_DHCP); vec_del1(proxy->dhcp_servers, index); @@ -228,7 +228,8 @@ dhcp_proxy_server_add (fib_protocol_t proto, dhcp_server_t server = { .dhcp_server = *addr, .server_fib_index = fib_table_find_or_create_and_lock(proto, - server_table_id), + server_table_id, + FIB_SOURCE_DHCP), }; vec_add1(proxy->dhcp_servers, server); @@ -297,9 +298,11 @@ int dhcp_proxy_set_vss (fib_protocol_t proto, int rc = 0; if (proto == FIB_PROTOCOL_IP4) - rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id); + rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id, + FIB_SOURCE_DHCP); else - rx_fib_index = mfib_table_find_or_create_and_lock(proto, tbl_id); + rx_fib_index = mfib_table_find_or_create_and_lock(proto, tbl_id, + MFIB_SOURCE_DHCP); v = dhcp_get_vss_info(dm, rx_fib_index, proto); if (NULL != v) diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c index 26363a2f..af189eda 100644 --- a/src/vnet/dpo/lookup_dpo.c +++ b/src/vnet/dpo/lookup_dpo.c @@ -135,11 +135,15 @@ lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index, { if (LOOKUP_UNICAST == cast) { - fib_table_lock(fib_index, dpo_proto_to_fib(proto)); + fib_table_lock(fib_index, + dpo_proto_to_fib(proto), + FIB_SOURCE_RR); } else { - mfib_table_lock(fib_index, dpo_proto_to_fib(proto)); + mfib_table_lock(fib_index, + dpo_proto_to_fib(proto), + MFIB_SOURCE_RR); } } lookup_dpo_add_or_lock_i(fib_index, proto, cast, input, table_config, dpo); @@ -161,13 +165,15 @@ lookup_dpo_add_or_lock_w_table_id (u32 table_id, { fib_index = fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), - table_id); + table_id, + FIB_SOURCE_RR); } else { fib_index = mfib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), - table_id); + table_id, + MFIB_SOURCE_RR); } } @@ -238,12 +244,14 @@ lookup_dpo_unlock (dpo_id_t *dpo) if (LOOKUP_UNICAST == lkd->lkd_cast) { fib_table_unlock(lkd->lkd_fib_index, - dpo_proto_to_fib(lkd->lkd_proto)); + dpo_proto_to_fib(lkd->lkd_proto), + FIB_SOURCE_RR); } else { mfib_table_unlock(lkd->lkd_fib_index, - dpo_proto_to_fib(lkd->lkd_proto)); + dpo_proto_to_fib(lkd->lkd_proto), + MFIB_SOURCE_RR); } } pool_put(lookup_dpo_pool, lkd); diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c index b178a902..2a6e7dd5 100644 --- a/src/vnet/dpo/mpls_label_dpo.c +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -105,10 +105,18 @@ format_mpls_label_dpo (u8 *s, va_list *args) mpls_label_dpo_t *mld; u32 ii; - mld = mpls_label_dpo_get(index); - s = format(s, "mpls-label:[%d]:", index); + if (pool_is_free_index(mpls_label_dpo_pool, index)) + { + /* + * the packet trace can be printed after the DPO has been deleted + */ + return (s); + } + + mld = mpls_label_dpo_get(index); + for (ii = 0; ii < mld->mld_n_labels; ii++) { hdr.label_exp_s_ttl = diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index c84ff47b..08e91373 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -522,6 +522,24 @@ arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) } } +static void +arp_adj_fib_add (ethernet_arp_ip4_entry_t * e, uint32_t fib_index) +{ + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = e->ip4_address, + }; + + e->fib_entry_index = + fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + DPO_PROTO_IP4, &pfx.fp_addr, + e->sw_if_index, ~0, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_lock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ); +} + int vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, vnet_arp_set_ip4_over_ethernet_rpc_args_t @@ -576,21 +594,9 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, if (!is_no_fib_entry) { - fib_prefix_t pfx = { - .fp_len = 32, - .fp_proto = FIB_PROTOCOL_IP4, - .fp_addr.ip4 = a->ip4, - }; - u32 fib_index; - - fib_index = - ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); - e->fib_entry_index = - fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, - FIB_ENTRY_FLAG_ATTACHED, - DPO_PROTO_IP4, &pfx.fp_addr, - e->sw_if_index, ~0, 1, NULL, - FIB_ROUTE_PATH_FLAG_NONE); + arp_adj_fib_add (e, + ip4_fib_table_get_index_for_sw_if_index + (e->sw_if_index)); } else { @@ -1561,6 +1567,65 @@ arp_add_del_interface_address (ip4_main_t * im, } } +void +arp_adj_fib_remove (ethernet_arp_ip4_entry_t * e, uint32_t fib_index) +{ + if (FIB_NODE_INDEX_INVALID != e->fib_entry_index) + { + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = e->ip4_address, + }; + u32 fib_index; + + fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); + + fib_table_entry_path_remove (fib_index, &pfx, + FIB_SOURCE_ADJ, + DPO_PROTO_IP4, + &pfx.fp_addr, + e->sw_if_index, ~0, 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ); + } +} + +static void +arp_table_bind (ip4_main_t * im, + uword opaque, + u32 sw_if_index, u32 new_fib_index, u32 old_fib_index) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_interface_t *eai; + ethernet_arp_ip4_entry_t *e; + hash_pair_t *pair; + + /* + * the IP table that the interface is bound to has changed. + * reinstall all the adj fibs. + */ + + if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index) + return; + + eai = &am->ethernet_arp_by_sw_if_index[sw_if_index]; + + /* *INDENT-OFF* */ + hash_foreach_pair (pair, eai->arp_entries, + ({ + e = pool_elt_at_index(am->ip4_entry_pool, + pair->value[0]); + /* + * remove the adj-fib from the old table and add to the new + */ + arp_adj_fib_remove(e, old_fib_index); + arp_adj_fib_add(e, new_fib_index); + })); + /* *INDENT-ON* */ + +} + static clib_error_t * ethernet_arp_init (vlib_main_t * vm) { @@ -1606,6 +1671,11 @@ ethernet_arp_init (vlib_main_t * vm) cb.function_opaque = 0; vec_add1 (im->add_del_interface_address_callbacks, cb); + ip4_table_bind_callback_t cbt; + cbt.function = arp_table_bind; + cbt.function_opaque = 0; + vec_add1 (im->table_bind_callbacks, cbt); + return 0; } @@ -1616,24 +1686,9 @@ arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e) { ethernet_arp_main_t *am = ðernet_arp_main; - if (FIB_NODE_INDEX_INVALID != e->fib_entry_index) - { - fib_prefix_t pfx = { - .fp_len = 32, - .fp_proto = FIB_PROTOCOL_IP4, - .fp_addr.ip4 = e->ip4_address, - }; - u32 fib_index; - - fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); - - fib_table_entry_path_remove (fib_index, &pfx, - FIB_SOURCE_ADJ, - DPO_PROTO_IP4, - &pfx.fp_addr, - e->sw_if_index, ~0, 1, - FIB_ROUTE_PATH_FLAG_NONE); - } + arp_adj_fib_remove (e, + ip4_fib_table_get_index_for_sw_if_index + (e->sw_if_index)); hash_unset (eai->arp_entries, e->ip4_address.as_u32); pool_put (am->ip4_entry_pool, e); } @@ -1693,7 +1748,11 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm, * does in response to interface events. unset is only done * by the control plane. */ - if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) + if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) + { + e->flags &= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC; + } + else if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) { arp_entry_free (eai, e); } diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h index d07d6cae..f5a107ca 100644 --- a/src/vnet/fib/fib_api.h +++ b/src/vnet/fib/fib_api.h @@ -23,7 +23,6 @@ add_del_route_check (fib_protocol_t table_proto, u32 next_hop_sw_if_index, dpo_proto_t next_hop_table_proto, u32 next_hop_table_id, - u8 create_missing_tables, u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index); diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index 2027f2be..4cb6cf60 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -89,6 +89,17 @@ fib_entry_get_default_chain_type (const fib_entry_t *fib_entry) return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); } +u8 * +format_fib_source (u8 * s, va_list * args) +{ + fib_source_t source = va_arg (*args, int); + + s = format (s, "\n src:%s ", + fib_source_names[source]); + + return (s); +} + u8 * format_fib_entry (u8 * s, va_list * args) { @@ -114,8 +125,8 @@ format_fib_entry (u8 * s, va_list * args) FOR_EACH_SRC_ADDED(fib_entry, src, source, ({ - s = format (s, "\n src:%s ", - fib_source_names[source]); + s = format (s, "\n src:%U ", + format_fib_source, source); s = fib_entry_src_format(fib_entry, source, s); s = format (s, " refs:%d ", src->fes_ref_count); if (FIB_ENTRY_FLAG_NONE != src->fes_entry_flags) { diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index 93b8016d..2f6e37fe 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -431,6 +431,7 @@ typedef struct fib_entry_t_ { #define FIB_ENTRY_FORMAT_DETAIL2 (0x2) extern u8 *format_fib_entry (u8 * s, va_list * args); +extern u8 *format_fib_source (u8 * s, va_list * args); extern fib_node_index_t fib_entry_create_special(u32 fib_index, const fib_prefix_t *prefix, diff --git a/src/vnet/fib/fib_entry_src_mpls.c b/src/vnet/fib/fib_entry_src_mpls.c index a616458f..6fdd5c0a 100644 --- a/src/vnet/fib/fib_entry_src_mpls.c +++ b/src/vnet/fib/fib_entry_src_mpls.c @@ -94,7 +94,9 @@ fib_entry_src_mpls_set_data (fib_entry_src_t *src, fib_table_entry_delete_index(src->mpls.fesm_lfes[eos], FIB_SOURCE_SPECIAL); } - fib_table_unlock(MPLS_FIB_DEFAULT_TABLE_ID, FIB_PROTOCOL_MPLS); + fib_table_unlock(MPLS_FIB_DEFAULT_TABLE_ID, + FIB_PROTOCOL_MPLS, + FIB_SOURCE_MPLS); src->mpls.fesm_label = label; } else @@ -113,7 +115,8 @@ fib_entry_src_mpls_set_data (fib_entry_src_t *src, { fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS, - MPLS_FIB_DEFAULT_TABLE_ID); + MPLS_FIB_DEFAULT_TABLE_ID, + FIB_SOURCE_MPLS); } else { diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 6b6cc5cb..75d15628 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -1039,7 +1039,8 @@ fib_table_find (fib_protocol_t proto, u32 fib_table_find_or_create_and_lock (fib_protocol_t proto, - u32 table_id) + u32 table_id, + fib_source_t src) { fib_table_t *fib_table; fib_node_index_t fi; @@ -1047,13 +1048,13 @@ fib_table_find_or_create_and_lock (fib_protocol_t proto, switch (proto) { case FIB_PROTOCOL_IP4: - fi = ip4_fib_table_find_or_create_and_lock(table_id); + fi = ip4_fib_table_find_or_create_and_lock(table_id, src); break; case FIB_PROTOCOL_IP6: - fi = ip6_fib_table_find_or_create_and_lock(table_id); + fi = ip6_fib_table_find_or_create_and_lock(table_id, src); break; case FIB_PROTOCOL_MPLS: - fi = mpls_fib_table_find_or_create_and_lock(table_id); + fi = mpls_fib_table_find_or_create_and_lock(table_id, src); break; default: return (~0); @@ -1070,6 +1071,7 @@ fib_table_find_or_create_and_lock (fib_protocol_t proto, u32 fib_table_create_and_lock (fib_protocol_t proto, + fib_source_t src, const char *const fmt, ...) { @@ -1082,13 +1084,13 @@ fib_table_create_and_lock (fib_protocol_t proto, switch (proto) { case FIB_PROTOCOL_IP4: - fi = ip4_fib_table_create_and_lock(); + fi = ip4_fib_table_create_and_lock(src); break; case FIB_PROTOCOL_IP6: - fi = ip6_fib_table_create_and_lock(); + fi = ip6_fib_table_create_and_lock(src); break; case FIB_PROTOCOL_MPLS: - fi = mpls_fib_table_create_and_lock(); + fi = mpls_fib_table_create_and_lock(src); break; default: return (~0); @@ -1143,26 +1145,43 @@ fib_table_walk (u32 fib_index, void fib_table_unlock (u32 fib_index, - fib_protocol_t proto) + fib_protocol_t proto, + fib_source_t source) { fib_table_t *fib_table; fib_table = fib_table_get(fib_index, proto); - fib_table->ft_locks--; + fib_table->ft_locks[source]--; + fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]--; - if (0 == fib_table->ft_locks) + if (0 == fib_table->ft_locks[source]) { + /* + * The source no longer needs the table. flush any routes + * from it just in case + */ + fib_table_flush(fib_index, proto, source); + } + + if (0 == fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]) + { + /* + * no more locak from any source - kill it + */ fib_table_destroy(fib_table); } } + void fib_table_lock (u32 fib_index, - fib_protocol_t proto) + fib_protocol_t proto, + fib_source_t source) { fib_table_t *fib_table; fib_table = fib_table_get(fib_index, proto); - fib_table->ft_locks++; + fib_table->ft_locks[source]++; + fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]++; } u32 diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h index 579740e9..6b7011b3 100644 --- a/src/vnet/fib/fib_table.h +++ b/src/vnet/fib/fib_table.h @@ -22,6 +22,12 @@ #include #include +/** + * Keep a lock per-source and a total + */ +#define FIB_TABLE_N_LOCKS (FIB_SOURCE_MAX+1) +#define FIB_TABLE_TOTAL_LOCKS FIB_SOURCE_MAX + /** * @brief * A protocol Independent FIB table @@ -34,9 +40,9 @@ typedef struct fib_table_t_ fib_protocol_t ft_proto; /** - * number of locks on the table + * per-source number of locks on the table */ - u16 ft_locks; + u16 ft_locks[FIB_TABLE_N_LOCKS]; /** * Table ID (hash key) for this FIB. @@ -628,9 +634,13 @@ extern u32 fib_table_find(fib_protocol_t proto, u32 table_id); * * @return fib_index * The index of the FIB + * + * @param source + * The ID of the client/source. */ extern u32 fib_table_find_or_create_and_lock(fib_protocol_t proto, - u32 table_id); + u32 table_id, + fib_source_t source); /** * @brief @@ -643,10 +653,14 @@ extern u32 fib_table_find_or_create_and_lock(fib_protocol_t proto, * @param fmt * A string to describe the table * + * @param source + * The ID of the client/source. + * * @return fib_index * The index of the FIB */ extern u32 fib_table_create_and_lock(fib_protocol_t proto, + fib_source_t source, const char *const fmt, ...); @@ -704,9 +718,13 @@ extern void fib_table_set_flow_hash_config(u32 fib_index, * * @paran proto * The protocol of the FIB (and thus the entries therein) + * + * @param source + * The ID of the client/source. */ extern void fib_table_unlock(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + fib_source_t source); /** * @brief @@ -718,9 +736,13 @@ extern void fib_table_unlock(u32 fib_index, * * @paran proto * The protocol of the FIB (and thus the entries therein) + * + * @param source + * The ID of the client/source. */ extern void fib_table_lock(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + fib_source_t source); /** * @brief diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index 6867cca8..572d7f0d 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -739,7 +739,8 @@ fib_test_v4 (void) lb_count = pool_elts(load_balance_pool); /* Find or create FIB table 11 */ - fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11); + fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11, + FIB_SOURCE_API); for (ii = 0; ii < 4; ii++) { @@ -4150,7 +4151,7 @@ fib_test_v4 (void) FIB_SOURCE_INTERFACE)), "NO INterface Source'd prefixes"); - fib_table_unlock(fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock(fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_API); FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); @@ -4201,7 +4202,8 @@ fib_test_v6 (void) dpo_drop = drop_dpo_get(DPO_PROTO_IP6); /* Find or create FIB table 11 */ - fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 11); + fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 11, + FIB_SOURCE_API); for (ii = 0; ii < 4; ii++) { @@ -5025,7 +5027,7 @@ fib_test_v6 (void) /* * now remove the VRF */ - fib_table_unlock(fib_index, FIB_PROTOCOL_IP6); + fib_table_unlock(fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_API); FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); @@ -5157,7 +5159,9 @@ fib_test_ae (void) */ u32 import_fib_index1; - import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11); + import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, + 11, + FIB_SOURCE_CLI); /* * Add an attached route in the import FIB @@ -5233,7 +5237,8 @@ fib_test_ae (void) */ u32 import_fib_index2; - import_fib_index2 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 12); + import_fib_index2 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 12, + FIB_SOURCE_CLI); /* * Add an attached route in the import FIB @@ -5595,8 +5600,8 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API); - fib_table_unlock(import_fib_index1, FIB_PROTOCOL_IP4); - fib_table_unlock(import_fib_index2, FIB_PROTOCOL_IP4); + fib_table_unlock(import_fib_index1, FIB_PROTOCOL_IP4, FIB_SOURCE_CLI); + fib_table_unlock(import_fib_index2, FIB_PROTOCOL_IP4, FIB_SOURCE_CLI); FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", adj_nbr_db_size()); @@ -8168,9 +8173,10 @@ lfib_test (void) /* * MPLS enable an interface so we get the MPLS table created */ + mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API); mpls_sw_interface_enable_disable(&mpls_main, tm->hw[0]->sw_if_index, - 1); + 1, 1); ip46_address_t nh_10_10_10_1 = { .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01), @@ -8662,7 +8668,8 @@ lfib_test (void) */ mpls_sw_interface_enable_disable(&mpls_main, tm->hw[0]->sw_if_index, - 0); + 0, 1); + mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API); FIB_TEST(lb_count == pool_elts(load_balance_pool), "Load-balance resources freed %d of %d", diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index d563bafd..865e2dd5 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -101,7 +101,8 @@ static const ip4_fib_table_special_prefix_t ip4_specials[] = { static u32 -ip4_create_fib_with_table_id (u32 table_id) +ip4_create_fib_with_table_id (u32 table_id, + fib_source_t src) { fib_table_t *fib_table; ip4_fib_t *v4_fib; @@ -128,7 +129,7 @@ ip4_create_fib_with_table_id (u32 table_id) v4_fib->fwd_classify_table_index = ~0; v4_fib->rev_classify_table_index = ~0; - fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4); + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4, src); ip4_mtrie_init(&v4_fib->mtrie); @@ -198,23 +199,24 @@ ip4_fib_table_destroy (u32 fib_index) u32 -ip4_fib_table_find_or_create_and_lock (u32 table_id) +ip4_fib_table_find_or_create_and_lock (u32 table_id, + fib_source_t src) { u32 index; index = ip4_fib_index_from_table_id(table_id); if (~0 == index) - return ip4_create_fib_with_table_id(table_id); + return ip4_create_fib_with_table_id(table_id, src); - fib_table_lock(index, FIB_PROTOCOL_IP4); + fib_table_lock(index, FIB_PROTOCOL_IP4, src); return (index); } u32 -ip4_fib_table_create_and_lock (void) +ip4_fib_table_create_and_lock (fib_source_t src) { - return (ip4_create_fib_with_table_id(~0)); + return (ip4_create_fib_with_table_id(~0, src)); } u32 @@ -525,17 +527,32 @@ ip4_show_fib (vlib_main_t * vm, pool_foreach (fib_table, im4->fibs, ({ ip4_fib_t *fib = pool_elt_at_index(im4->v4_fibs, fib_table->ft_index); + fib_source_t source; + u8 *s = NULL; if (table_id >= 0 && table_id != (int)fib->table_id) continue; if (fib_index != ~0 && fib_index != (int)fib->index) continue; - vlib_cli_output (vm, "%U, fib_index:%d, flow hash:[%U] locks:%d", - format_fib_table_name, fib->index, FIB_PROTOCOL_IP4, - fib->index, - format_ip_flow_hash_config, fib_table->ft_flow_hash_config, - fib_table->ft_locks); + s = format(s, "%U, fib_index:%d, flow hash:[%U] locks:[", + format_fib_table_name, fib->index, + FIB_PROTOCOL_IP4, + fib->index, + format_ip_flow_hash_config, + fib_table->ft_flow_hash_config); + FOR_EACH_FIB_SOURCE(source) + { + if (0 != fib_table->ft_locks[source]) + { + s = format(s, "%U:%d, ", + format_fib_source, source, + fib_table->ft_locks[source]); + } + } + s = format (s, "]"); + vlib_cli_output (vm, "%V", s); + vec_free(s); /* Show summary? */ if (! verbose) diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h index 006163b4..495b45cc 100644 --- a/src/vnet/fib/ip4_fib.h +++ b/src/vnet/fib/ip4_fib.h @@ -127,8 +127,9 @@ ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst) * @returns A pointer to the retrieved or created fib. * */ -extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id); -extern u32 ip4_fib_table_create_and_lock(void); +extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id, + fib_source_t src); +extern u32 ip4_fib_table_create_and_lock(fib_source_t src); static inline diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index 8fde6f9f..3ddb8453 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -50,7 +50,8 @@ vnet_ip6_fib_init (u32 fib_index) } static u32 -create_fib_with_table_id (u32 table_id) +create_fib_with_table_id (u32 table_id, + fib_source_t src) { fib_table_t *fib_table; ip6_fib_t *v6_fib; @@ -77,29 +78,30 @@ create_fib_with_table_id (u32 table_id) fib_table->ft_flow_hash_config = IP_FLOW_HASH_DEFAULT; vnet_ip6_fib_init(fib_table->ft_index); - fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6); + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6, src); return (fib_table->ft_index); } u32 -ip6_fib_table_find_or_create_and_lock (u32 table_id) +ip6_fib_table_find_or_create_and_lock (u32 table_id, + fib_source_t src) { uword * p; p = hash_get (ip6_main.fib_index_by_table_id, table_id); if (NULL == p) - return create_fib_with_table_id(table_id); + return create_fib_with_table_id(table_id, src); - fib_table_lock(p[0], FIB_PROTOCOL_IP6); + fib_table_lock(p[0], FIB_PROTOCOL_IP6, src); return (p[0]); } u32 -ip6_fib_table_create_and_lock (void) +ip6_fib_table_create_and_lock (fib_source_t src) { - return (create_fib_with_table_id(~0)); + return (create_fib_with_table_id(~0, src)); } void @@ -588,16 +590,33 @@ ip6_show_fib (vlib_main_t * vm, pool_foreach (fib_table, im6->fibs, ({ + fib_source_t source; + u8 *s = NULL; + fib = pool_elt_at_index(im6->v6_fibs, fib_table->ft_index); if (table_id >= 0 && table_id != (int)fib->table_id) continue; if (fib_index != ~0 && fib_index != (int)fib->index) continue; - vlib_cli_output (vm, "%s, fib_index:%d, flow hash:[%U] locks:%d", - fib_table->ft_desc, fib->index, - format_ip_flow_hash_config, fib_table->ft_flow_hash_config, - fib_table->ft_locks); + s = format(s, "%U, fib_index:%d, flow hash:[%U] locks:[", + format_fib_table_name, fib->index, + FIB_PROTOCOL_IP6, + fib->index, + format_ip_flow_hash_config, + fib_table->ft_flow_hash_config); + FOR_EACH_FIB_SOURCE(source) + { + if (0 != fib_table->ft_locks[source]) + { + s = format(s, "%U:%d, ", + format_fib_source, source, + fib_table->ft_locks[source]); + } + } + s = format (s, "]"); + vlib_cli_output (vm, "%V", s); + vec_free(s); /* Show summary? */ if (! verbose) diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h index aad8305c..9728eecc 100644 --- a/src/vnet/fib/ip6_fib.h +++ b/src/vnet/fib/ip6_fib.h @@ -144,8 +144,9 @@ ip6_src_lookup_for_packet (ip6_main_t * im, * \returns A pointer to the retrieved or created fib. * */ -extern u32 ip6_fib_table_find_or_create_and_lock(u32 table_id); -extern u32 ip6_fib_table_create_and_lock(void); +extern u32 ip6_fib_table_find_or_create_and_lock(u32 table_id, + fib_source_t src); +extern u32 ip6_fib_table_create_and_lock(fib_source_t src); static inline ip6_fib_t * ip6_fib_get (fib_node_index_t index) diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c index ca6271fe..4eeef7ab 100644 --- a/src/vnet/fib/mpls_fib.c +++ b/src/vnet/fib/mpls_fib.c @@ -83,7 +83,8 @@ mpls_fib_index_from_table_id (u32 table_id) } static u32 -mpls_fib_create_with_table_id (u32 table_id) +mpls_fib_create_with_table_id (u32 table_id, + fib_source_t src) { dpo_id_t dpo = DPO_INVALID; fib_table_t *fib_table; @@ -107,7 +108,7 @@ mpls_fib_create_with_table_id (u32 table_id) fib_table->ft_table_id = table_id; fib_table->ft_flow_hash_config = MPLS_FLOW_HASH_DEFAULT; - fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS); + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS, src); if (INDEX_INVALID == mpls_fib_drop_dpo_index) { @@ -220,22 +221,23 @@ mpls_fib_create_with_table_id (u32 table_id) } u32 -mpls_fib_table_find_or_create_and_lock (u32 table_id) +mpls_fib_table_find_or_create_and_lock (u32 table_id, + fib_source_t src) { u32 index; index = mpls_fib_index_from_table_id(table_id); if (~0 == index) - return mpls_fib_create_with_table_id(table_id); + return mpls_fib_create_with_table_id(table_id, src); - fib_table_lock(index, FIB_PROTOCOL_MPLS); + fib_table_lock(index, FIB_PROTOCOL_MPLS, src); return (index); } u32 -mpls_fib_table_create_and_lock (void) +mpls_fib_table_create_and_lock (fib_source_t src) { - return (mpls_fib_create_with_table_id(~0)); + return (mpls_fib_create_with_table_id(~0, src)); } void diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h index dfb8b7fc..29cd1d20 100644 --- a/src/vnet/fib/mpls_fib.h +++ b/src/vnet/fib/mpls_fib.h @@ -59,8 +59,9 @@ mpls_fib_get (fib_node_index_t index) return (pool_elt_at_index(mpls_main.mpls_fibs, index)); } -extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id); -extern u32 mpls_fib_table_create_and_lock(void); +extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id, + fib_source_t src); +extern u32 mpls_fib_table_create_and_lock(fib_source_t src); // extern mpls_fib_t * mpls_fib_find(u32 table_id); extern u32 mpls_fib_index_from_table_id(u32 table_id); diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 113728cd..419fef94 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -320,68 +320,189 @@ stats_dsunlock (void) static void vl_api_sw_interface_set_table_t_handler (vl_api_sw_interface_set_table_t * mp) { - int rv = 0; - u32 table_id = ntohl (mp->vrf_id); - u32 sw_if_index = ntohl (mp->sw_if_index); vl_api_sw_interface_set_table_reply_t *rmp; - CLIB_UNUSED (ip_interface_address_t * ia); - u32 fib_index; + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 table_id = ntohl (mp->vrf_id); + int rv = 0; VALIDATE_SW_IF_INDEX (mp); stats_dslock_with_hint (1 /* release hint */ , 4 /* tag */ ); if (mp->is_ipv6) + rv = ip_table_bind (FIB_PROTOCOL_IP6, sw_if_index, table_id, 1); + else + rv = ip_table_bind (FIB_PROTOCOL_IP4, sw_if_index, table_id, 1); + + stats_dsunlock (); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_TABLE_REPLY); +} + +int +ip_table_bind (fib_protocol_t fproto, + uint32_t sw_if_index, uint32_t table_id, u8 is_api) +{ + CLIB_UNUSED (ip_interface_address_t * ia); + u32 fib_index, mfib_index; + fib_source_t src; + mfib_source_t msrc; + + if (is_api) + { + src = FIB_SOURCE_API; + msrc = MFIB_SOURCE_API; + } + else + { + src = FIB_SOURCE_CLI; + msrc = MFIB_SOURCE_CLI; + } + + /* + * This is temporary whilst I do the song and dance with the CSIT version + */ + if (0 != table_id) { + fib_index = fib_table_find_or_create_and_lock (fproto, table_id, src); + mfib_index = + mfib_table_find_or_create_and_lock (fproto, table_id, msrc); + } + else + { + fib_index = 0; + mfib_index = 0; + } + + /* + * This if table does not exist = error is what we want in the end. + */ + /* fib_index = fib_table_find (fproto, table_id); */ + /* mfib_index = mfib_table_find (fproto, table_id); */ + + /* if (~0 == fib_index || ~0 == mfib_index) */ + /* { */ + /* return (VNET_API_ERROR_NO_SUCH_FIB); */ + /* } */ + + if (FIB_PROTOCOL_IP6 == fproto) + { + /* + * If the interface already has in IP address, then a change int + * VRF is not allowed. The IP address applied must first be removed. + * We do not do that automatically here, since VPP has no knowledge + * of whether thoses subnets are valid in the destination VRF. + */ /* *INDENT-OFF* */ foreach_ip_interface_address (&ip6_main.lookup_main, ia, sw_if_index, 1 /* honor unnumbered */ , ({ - rv = VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE; - goto done; + return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE); })); /* *INDENT-ON* */ - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - table_id); vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); - ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - table_id); vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index); - ip6_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index; + + /* + * tell those that are interested that the binding is changing. + */ + ip6_table_bind_callback_t *cb; + vec_foreach (cb, ip6_main.table_bind_callbacks) + cb->function (&ip6_main, cb->function_opaque, + sw_if_index, + fib_index, + ip6_main.fib_index_by_sw_if_index[sw_if_index]); + + if (0 == table_id) + { + /* reset back to default */ + if (0 != ip6_main.fib_index_by_sw_if_index[sw_if_index]) + fib_table_unlock (ip6_main.fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_IP6, src); + if (0 != ip6_main.mfib_index_by_sw_if_index[sw_if_index]) + mfib_table_unlock (ip6_main.mfib_index_by_sw_if_index + [sw_if_index], FIB_PROTOCOL_IP6, msrc); + + } + else + { + /* we need to lock the table now it's inuse */ + fib_table_lock (fib_index, FIB_PROTOCOL_IP6, src); + mfib_table_lock (mfib_index, FIB_PROTOCOL_IP6, msrc); + } + + ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + ip6_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index; } else { + /* + * If the interface already has in IP address, then a change int + * VRF is not allowed. The IP address applied must first be removed. + * We do not do that automatically here, since VPP has no knowledge + * of whether thoses subnets are valid in the destination VRF. + */ /* *INDENT-OFF* */ foreach_ip_interface_address (&ip4_main.lookup_main, ia, sw_if_index, 1 /* honor unnumbered */ , ({ - rv = VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE; - goto done; + return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE); })); /* *INDENT-ON* */ - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - table_id); vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); - ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - table_id); vec_validate (ip4_main.mfib_index_by_sw_if_index, sw_if_index); - ip4_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index; - } -done: - stats_dsunlock (); + /* + * tell those that are interested that the binding is changing. + */ + ip4_table_bind_callback_t *cb; + vec_foreach (cb, ip4_main.table_bind_callbacks) + cb->function (&ip4_main, cb->function_opaque, + sw_if_index, + fib_index, + ip4_main.fib_index_by_sw_if_index[sw_if_index]); + + if (0 == table_id) + { + /* reset back to default */ + if (0 != ip4_main.fib_index_by_sw_if_index[sw_if_index]) + fib_table_unlock (ip4_main.fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_IP4, src); + if (0 != ip4_main.mfib_index_by_sw_if_index[sw_if_index]) + mfib_table_unlock (ip4_main.mfib_index_by_sw_if_index + [sw_if_index], FIB_PROTOCOL_IP4, msrc); - BAD_SW_IF_INDEX_LABEL; + } + else + { + /* we need to lock the table now it's inuse */ + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + table_id, src); - REPLY_MACRO (VL_API_SW_INTERFACE_SET_TABLE_REPLY); + mfib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + table_id, msrc); + } + + ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + ip4_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index; + } + + /* + * Temporary. undo the locks from the find and create at the staart + */ + if (0 != table_id) + { + fib_table_unlock (fib_index, fproto, src); + mfib_table_unlock (mfib_index, fproto, msrc); + } + + return (0); } static void diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h index 70b4ccd8..7aae73ff 100644 --- a/src/vnet/ip/ip.h +++ b/src/vnet/ip/ip.h @@ -184,6 +184,13 @@ void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index); extern vlib_node_registration_t ip4_inacl_node; extern vlib_node_registration_t ip6_inacl_node; +void ip_table_create (fib_protocol_t fproto, uint32_t table_id, u8 is_api); + +void ip_table_delete (fib_protocol_t fproto, uint32_t table_id, u8 is_api); + +int ip_table_bind (fib_protocol_t fproto, + uint32_t sw_if_index, uint32_t table_id, u8 is_api); + #endif /* included_ip_main_h */ /* diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index 8f9a8e27..decb840b 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -72,6 +72,16 @@ typedef struct uword function_opaque; } ip4_add_del_interface_address_callback_t; +typedef void (ip4_table_bind_function_t) + (struct ip4_main_t * im, + uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index); + +typedef struct +{ + ip4_table_bind_function_t *function; + uword function_opaque; +} ip4_table_bind_callback_t; + /** * @brief IPv4 main type. * @@ -117,6 +127,9 @@ typedef struct ip4_main_t ip4_add_del_interface_address_callback_t * add_del_interface_address_callbacks; + /** Functions to call when interface to table biding changes. */ + ip4_table_bind_callback_t *table_bind_callbacks; + /** Template used to generate IP4 ARP packets. */ vlib_packet_template_t ip4_arp_request_packet_template; diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 2d48e8a9..ec4287bb 100755 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1198,8 +1198,10 @@ ip4_lookup_init (vlib_main_t * vm) ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0); /* Create FIB with index 0 and table id of 0. */ - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0); - mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0, + FIB_SOURCE_DEFAULT_ROUTE); + mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0, + MFIB_SOURCE_DEFAULT_ROUTE); { pg_node_t *pn; @@ -2794,101 +2796,6 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = { VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain); /* *INDENT-ON */ -static clib_error_t * -add_del_interface_table (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - vnet_main_t *vnm = vnet_get_main (); - ip_interface_address_t *ia; - clib_error_t *error = 0; - u32 sw_if_index, table_id; - - sw_if_index = ~0; - - if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) - { - error = clib_error_return (0, "unknown interface `%U'", - format_unformat_error, input); - goto done; - } - - if (unformat (input, "%d", &table_id)) - ; - else - { - error = clib_error_return (0, "expected table id `%U'", - format_unformat_error, input); - goto done; - } - - /* - * If the interface already has in IP address, then a change int - * VRF is not allowed. The IP address applied must first be removed. - * We do not do that automatically here, since VPP has no knowledge - * of whether thoses subnets are valid in the destination VRF. - */ - /* *INDENT-OFF* */ - foreach_ip_interface_address (&ip4_main.lookup_main, - ia, sw_if_index, - 1 /* honor unnumbered */, - ({ - ip4_address_t * a; - - a = ip_interface_address_get_address (&ip4_main.lookup_main, ia); - error = clib_error_return (0, "interface %U has address %U", - format_vnet_sw_if_index_name, vnm, - sw_if_index, - format_ip4_address, a); - goto done; - })); - /* *INDENT-ON* */ - -{ - ip4_main_t *im = &ip4_main; - u32 fib_index; - - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); - - vec_validate (im->fib_index_by_sw_if_index, sw_if_index); - im->fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); - vec_validate (im->mfib_index_by_sw_if_index, sw_if_index); - im->mfib_index_by_sw_if_index[sw_if_index] = fib_index; -} - -done: -return error; -} - -/*? - * Place the indicated interface into the supplied IPv4 FIB table (also known - * as a VRF). If the FIB table does not exist, this command creates it. To - * display the current IPv4 FIB table, use the command 'show ip fib'. - * FIB table will only be displayed if a route has been added to the table, or - * an IP Address is assigned to an interface in the table (which adds a route - * automatically). - * - * @note IP addresses added after setting the interface IP table are added to - * the indicated FIB table. If an IP address is added prior to changing the - * table then this is an error. The control plane must remove these addresses - * first and then change the table. VPP will not automatically move the - * addresses from the old to the new table as it does not know the validity - * of such a change. - * - * @cliexpar - * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id): - * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2} - ?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = -{ - .path = "set interface ip table", - .function = add_del_interface_table, - .short_help = "set interface ip table ", -}; -/* *INDENT-ON* */ - int ip4_lookup_validate (ip4_address_t * a, u32 fib_index0) { diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c index ae836a11..9aa880ae 100644 --- a/src/vnet/ip/ip4_source_and_port_range_check.c +++ b/src/vnet/ip/ip4_source_and_port_range_check.c @@ -1126,6 +1126,14 @@ ip6_source_and_port_range_check_add_del (ip6_address_t * address, u16 * low_ports, u16 * high_ports, int is_add) { + uint32_t fib_index; + + fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id); + + ASSERT (~0 != fib_index); + + fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY); + return 0; } @@ -1138,7 +1146,8 @@ ip4_source_and_port_range_check_add_del (ip4_address_t * address, { u32 fib_index; - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + FIB_SOURCE_CLASSIFY); if (is_add == 0) { diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index fa922725..8aef53a9 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -103,6 +103,16 @@ typedef struct uword function_opaque; } ip6_add_del_interface_address_callback_t; +typedef void (ip6_table_bind_function_t) + (struct ip6_main_t * im, + uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index); + +typedef struct +{ + ip6_table_bind_function_t *function; + uword function_opaque; +} ip6_table_bind_callback_t; + /** * Enumeration of the FIB table instance types */ @@ -183,6 +193,9 @@ typedef struct ip6_main_t ip6_add_del_interface_address_callback_t * add_del_interface_address_callbacks; + /** Functions to call when interface to table biding changes. */ + ip6_table_bind_callback_t *table_bind_callbacks; + /* Template used to generate IP6 neighbor solicitation packets. */ vlib_packet_template_t discover_neighbor_packet_template; diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 5832bd0b..1002f6b6 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -2999,8 +2999,10 @@ ip6_lookup_init (vlib_main_t * vm) im->lookup_table_nbuckets, im->lookup_table_size); /* Create FIB with index 0 and table id of 0. */ - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0); - mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0, + FIB_SOURCE_DEFAULT_ROUTE); + mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0, + MFIB_SOURCE_DEFAULT_ROUTE); { pg_node_t *pn; @@ -3045,103 +3047,6 @@ ip6_lookup_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (ip6_lookup_init); -static clib_error_t * -add_del_ip6_interface_table (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vnet_main_t *vnm = vnet_get_main (); - ip_interface_address_t *ia; - clib_error_t *error = 0; - u32 sw_if_index, table_id; - - sw_if_index = ~0; - - if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) - { - error = clib_error_return (0, "unknown interface `%U'", - format_unformat_error, input); - goto done; - } - - if (unformat (input, "%d", &table_id)) - ; - else - { - error = clib_error_return (0, "expected table id `%U'", - format_unformat_error, input); - goto done; - } - - /* - * If the interface already has in IP address, then a change int - * VRF is not allowed. The IP address applied must first be removed. - * We do not do that automatically here, since VPP has no knowledge - * of whether thoses subnets are valid in the destination VRF. - */ - /* *INDENT-OFF* */ - foreach_ip_interface_address (&ip6_main.lookup_main, - ia, sw_if_index, - 1 /* honor unnumbered */, - ({ - ip4_address_t * a; - - a = ip_interface_address_get_address (&ip6_main.lookup_main, ia); - error = clib_error_return (0, "interface %U has address %U", - format_vnet_sw_if_index_name, vnm, - sw_if_index, - format_ip6_address, a); - goto done; - })); - /* *INDENT-ON* */ - - { - u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - table_id); - - vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); - ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - table_id); - - vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index); - ip6_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index; - } - - -done: - return error; -} - -/*? - * Place the indicated interface into the supplied IPv6 FIB table (also known - * as a VRF). If the FIB table does not exist, this command creates it. To - * display the current IPv6 FIB table, use the command 'show ip6 fib'. - * FIB table will only be displayed if a route has been added to the table, or - * an IP Address is assigned to an interface in the table (which adds a route - * automatically). - * - * @note IP addresses added after setting the interface IP table are added to - * the indicated FIB table. If an IP address is added prior to changing the - * table then this is an error. The control plane must remove these addresses - * first and then change the table. VPP will not automatically move the - * addresses from the old to the new table as it does not know the validity - * of such a change. - * - * @cliexpar - * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id): - * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2} - ?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = -{ - .path = "set interface ip6 table", - .function = add_del_ip6_interface_table, - .short_help = "set interface ip6 table " -}; -/* *INDENT-ON* */ - void ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip, u8 * mac) diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 62cf23ac..56f33ac8 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -250,6 +250,26 @@ format_ip6_neighbor_ip6_entry (u8 * s, va_list * va) return s; } +static void +ip6_neighbor_adj_fib_remove (ip6_neighbor_t * n, uint32_t fib_index) +{ + if (FIB_NODE_INDEX_INVALID != n->fib_entry_index) + { + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = n->key.ip6_address, + }; + fib_table_entry_path_remove (fib_index, + &pfx, + FIB_SOURCE_ADJ, + DPO_PROTO_IP6, + &pfx.fp_addr, + n->key.sw_if_index, ~0, + 1, FIB_ROUTE_PATH_FLAG_NONE); + } +} + static clib_error_t * ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) @@ -273,22 +293,10 @@ ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm, { n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]); mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); - if (FIB_NODE_INDEX_INVALID != n->fib_entry_index) - { - fib_prefix_t pfx = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr.ip6 = n->key.ip6_address, - }; - fib_table_entry_path_remove - (ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index), - &pfx, - FIB_SOURCE_ADJ, - DPO_PROTO_IP6, - &pfx.fp_addr, - n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); - pool_put (nm->neighbor_pool, n); - } + ip6_neighbor_adj_fib_remove (n, + ip6_fib_table_get_index_for_sw_if_index + (n->key.sw_if_index)); + pool_put (nm->neighbor_pool, n); } vec_free (to_delete); } @@ -579,6 +587,24 @@ ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) } } + +static void +ip6_neighbor_adj_fib_add (ip6_neighbor_t * n, uint32_t fib_index) +{ + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = n->key.ip6_address, + }; + + n->fib_entry_index = + fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + DPO_PROTO_IP6, &pfx.fp_addr, + n->key.sw_if_index, ~0, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); +} + int vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, @@ -633,21 +659,9 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, */ if (!is_no_fib_entry) { - fib_prefix_t pfx = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr.ip6 = k.ip6_address, - }; - u32 fib_index; - - fib_index = - ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index); - n->fib_entry_index = - fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, - FIB_ENTRY_FLAG_ATTACHED, - DPO_PROTO_IP6, &pfx.fp_addr, - n->key.sw_if_index, ~0, 1, NULL, - FIB_ROUTE_PATH_FLAG_NONE); + ip6_neighbor_adj_fib_add (n, + ip6_fib_table_get_index_for_sw_if_index + (n->key.sw_if_index)); } else { @@ -3843,6 +3857,33 @@ ip6_set_neighbor_limit (u32 neighbor_limit) return 0; } +static void +ip6_neighbor_table_bind (ip6_main_t * im, + uword opaque, + u32 sw_if_index, + u32 new_fib_index, u32 old_fib_index) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_neighbor_t *n = NULL; + u32 i, *to_re_add = 0; + + /* *INDENT-OFF* */ + pool_foreach (n, nm->neighbor_pool, + ({ + if (n->key.sw_if_index == sw_if_index) + vec_add1 (to_re_add, n - nm->neighbor_pool); + })); + /* *INDENT-ON* */ + + for (i = 0; i < vec_len (to_re_add); i++) + { + n = pool_elt_at_index (nm->neighbor_pool, to_re_add[i]); + ip6_neighbor_adj_fib_remove (n, old_fib_index); + ip6_neighbor_adj_fib_add (n, new_fib_index); + } + vec_free (to_re_add); +} + static clib_error_t * ip6_neighbor_init (vlib_main_t * vm) { @@ -3874,6 +3915,11 @@ ip6_neighbor_init (vlib_main_t * vm) cb.function_opaque = 0; vec_add1 (im->add_del_interface_address_callbacks, cb); + ip6_table_bind_callback_t cbt; + cbt.function = ip6_neighbor_table_bind; + cbt.function_opaque = 0; + vec_add1 (im->table_bind_callbacks, cbt); + mhash_init (&nm->pending_resolutions_by_address, /* value size */ sizeof (uword), /* key size */ sizeof (ip6_address_t)); diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index bba65ab4..384ec3e0 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -699,12 +699,58 @@ vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp, REPLY_MACRO (VL_API_IP_NEIGHBOR_ADD_DEL_REPLY); } +void +ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api) +{ + u32 fib_index, mfib_index; + + /* + * ignore action on the default table - this is always present + * and cannot be added nor deleted from the API + */ + if (0 != table_id) + { + /* + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + * The FIB index for unicast and multicast is not necessarily the + * same, since internal VPP systesm (like LISP and SR) create + * their own unicast tables. + */ + fib_index = fib_table_find (fproto, table_id); + mfib_index = mfib_table_find (fproto, table_id); + + if (~0 != fib_index) + { + fib_table_unlock (fib_index, fproto, + (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI)); + } + if (~0 != mfib_index) + { + mfib_table_unlock (mfib_index, fproto, + (is_api ? MFIB_SOURCE_API : MFIB_SOURCE_CLI)); + } + } +} + void vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp) { vl_api_ip_table_add_del_reply_t *rmp; + fib_protocol_t fproto = (mp->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4); + u32 table_id = ntohl (mp->table_id); int rv = 0; + if (mp->is_add) + { + ip_table_create (fproto, table_id, 1); + } + else + { + ip_table_delete (fproto, table_id, 1); + } + REPLY_MACRO (VL_API_IP_TABLE_ADD_DEL_REPLY); } @@ -866,18 +912,21 @@ add_del_route_check (fib_protocol_t table_proto, u32 next_hop_sw_if_index, dpo_proto_t next_hop_table_proto, u32 next_hop_table_id, - u8 create_missing_tables, u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index) { vnet_main_t *vnm = vnet_get_main (); + /* Temporaray whilst I do the CSIT dance */ + u8 create_missing_tables = 1; + *fib_index = fib_table_find (table_proto, ntohl (table_id)); if (~0 == *fib_index) { if (create_missing_tables) { *fib_index = fib_table_find_or_create_and_lock (table_proto, - ntohl (table_id)); + ntohl (table_id), + FIB_SOURCE_API); } else { @@ -918,12 +967,14 @@ add_del_route_check (fib_protocol_t table_proto, *next_hop_fib_index = mfib_table_find_or_create_and_lock (fib_nh_proto, ntohl - (next_hop_table_id)); + (next_hop_table_id), + MFIB_SOURCE_API); else *next_hop_fib_index = fib_table_find_or_create_and_lock (fib_nh_proto, ntohl - (next_hop_table_id)); + (next_hop_table_id), + FIB_SOURCE_API); } else { @@ -948,8 +999,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->next_hop_sw_if_index, DPO_PROTO_IP4, mp->next_hop_table_id, - mp->create_vrf_if_needed, 0, - &fib_index, &next_hop_fib_index); + 0, &fib_index, &next_hop_fib_index); if (0 != rv) return (rv); @@ -1008,8 +1058,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->next_hop_sw_if_index, DPO_PROTO_IP6, mp->next_hop_table_id, - mp->create_vrf_if_needed, 0, - &fib_index, &next_hop_fib_index); + 0, &fib_index, &next_hop_fib_index); if (0 != rv) return (rv); @@ -1074,27 +1123,57 @@ vl_api_ip_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) REPLY_MACRO (VL_API_IP_ADD_DEL_ROUTE_REPLY); } +void +ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api) +{ + u32 fib_index, mfib_index; + + /* + * ignore action on the default table - this is always present + * and cannot be added nor deleted from the API + */ + if (0 != table_id) + { + /* + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + * The FIB index for unicast and multicast is not necessarily the + * same, since internal VPP systesm (like LISP and SR) create + * their own unicast tables. + */ + fib_index = fib_table_find (fproto, table_id); + mfib_index = mfib_table_find (fproto, table_id); + + if (~0 == fib_index) + { + fib_table_find_or_create_and_lock (fproto, table_id, + (is_api ? + FIB_SOURCE_API : + FIB_SOURCE_CLI)); + } + if (~0 == mfib_index) + { + mfib_table_find_or_create_and_lock (fproto, table_id, + (is_api ? + MFIB_SOURCE_API : + MFIB_SOURCE_CLI)); + } + } +} + static int add_del_mroute_check (fib_protocol_t table_proto, u32 table_id, - u32 next_hop_sw_if_index, - u8 is_local, u8 create_missing_tables, u32 * fib_index) + u32 next_hop_sw_if_index, u8 is_local, u32 * fib_index) { vnet_main_t *vnm = vnet_get_main (); *fib_index = mfib_table_find (table_proto, ntohl (table_id)); if (~0 == *fib_index) { - if (create_missing_tables) - { - *fib_index = mfib_table_find_or_create_and_lock (table_proto, - ntohl (table_id)); - } - else - { - /* No such VRF, and we weren't asked to create one */ - return VNET_API_ERROR_NO_SUCH_FIB; - } + /* No such table */ + return VNET_API_ERROR_NO_SUCH_FIB; } if (~0 != ntohl (next_hop_sw_if_index)) @@ -1163,8 +1242,7 @@ api_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp) rv = add_del_mroute_check (fproto, mp->table_id, mp->next_hop_sw_if_index, - mp->is_local, - mp->create_vrf_if_needed, &fib_index); + mp->is_local, &fib_index); if (0 != rv) return (rv); diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 5537bb04..667c6791 100755 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -687,6 +687,78 @@ done: return error; } +clib_error_t * +vnet_ip_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, + vlib_cli_command_t * cmd, fib_protocol_t fproto) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + u32 table_id, is_add; + + is_add = 1; + table_id = ~0; + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "add")) + is_add = 1; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == table_id) + { + error = clib_error_return (0, "No table id"); + goto done; + } + else if (0 == table_id) + { + error = clib_error_return (0, "Can't change the default table"); + goto done; + } + else + { + if (is_add) + { + ip_table_create (fproto, table_id, 0); + } + else + { + ip_table_delete (fproto, table_id, 0); + } + } + +done: + unformat_free (line_input); + return error; +} + +clib_error_t * +vnet_ip4_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP4)); +} + +clib_error_t * +vnet_ip6_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6)); +} + /* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = { .path = "ip", @@ -749,6 +821,159 @@ VLIB_CLI_COMMAND (ip_route_command, static) = { .function = vnet_ip_route_cmd, .is_mp_safe = 1, }; + +/* *INDENT-ON* */ +/*? + * This command is used to add or delete IPv4 Tables. All + * Tables must be explicitly added before that can be used. Creating a + * table will add both unicast and multicast FIBs + * + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip4_table_command, static) = { + .path = "ip table", + .short_help = "ip table [add|del] ", + .function = vnet_ip4_table_cmd, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +/* *INDENT-ON* */ +/*? + * This command is used to add or delete IPv4 Tables. All + * Tables must be explicitly added before that can be used. Creating a + * table will add both unicast and multicast FIBs + * + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_table_command, static) = { + .path = "ip6 table", + .short_help = "ip6 table [add|del] ", + .function = vnet_ip6_table_cmd, + .is_mp_safe = 1, +}; + +static clib_error_t * +ip_table_bind_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd, + fib_protocol_t fproto) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index, table_id; + int rv; + + sw_if_index = ~0; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "%d", &table_id)) + ; + else + { + error = clib_error_return (0, "expected table id `%U'", + format_unformat_error, input); + goto done; + } + + rv = ip_table_bind (fproto, sw_if_index, table_id, 0); + + if (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE == rv) + { + error = clib_error_return (0, "IP addresses are still present on %U", + format_vnet_sw_if_index_name, + vnet_get_main(), + sw_if_index); + } + else if (VNET_API_ERROR_NO_SUCH_FIB == rv) + { + error = clib_error_return (0, "no such table %d", table_id); + } + else if (0 != rv) + { + error = clib_error_return (0, "unknown error"); + } + + done: + return error; +} + +static clib_error_t * +ip4_table_bind_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP4)); +} + +static clib_error_t * +ip6_table_bind_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP6)); +} + +/*? + * Place the indicated interface into the supplied IPv4 FIB table (also known + * as a VRF). If the FIB table does not exist, this command creates it. To + * display the current IPv4 FIB table, use the command 'show ip fib'. + * FIB table will only be displayed if a route has been added to the table, or + * an IP Address is assigned to an interface in the table (which adds a route + * automatically). + * + * @note IP addresses added after setting the interface IP table are added to + * the indicated FIB table. If an IP address is added prior to changing the + * table then this is an error. The control plane must remove these addresses + * first and then change the table. VPP will not automatically move the + * addresses from the old to the new table as it does not know the validity + * of such a change. + * + * @cliexpar + * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id): + * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2} + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = +{ + .path = "set interface ip table", + .function = ip4_table_bind_cmd, + .short_help = "set interface ip table ", +}; +/* *INDENT-ON* */ + +/*? + * Place the indicated interface into the supplied IPv6 FIB table (also known + * as a VRF). If the FIB table does not exist, this command creates it. To + * display the current IPv6 FIB table, use the command 'show ip6 fib'. + * FIB table will only be displayed if a route has been added to the table, or + * an IP Address is assigned to an interface in the table (which adds a route + * automatically). + * + * @note IP addresses added after setting the interface IP table are added to + * the indicated FIB table. If an IP address is added prior to changing the + * table then this is an error. The control plane must remove these addresses + * first and then change the table. VPP will not automatically move the + * addresses from the old to the new table as it does not know the validity + * of such a change. + * + * @cliexpar + * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id): + * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2} + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = +{ + .path = "set interface ip6 table", + .function = ip6_table_bind_cmd, + .short_help = "set interface ip6 table " +}; /* *INDENT-ON* */ clib_error_t * diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c index e832c23f..a0c05e85 100644 --- a/src/vnet/lisp-gpe/interface.c +++ b/src/vnet/lisp-gpe/interface.c @@ -505,12 +505,14 @@ lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id) { fib_node_index_t fib_index; - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id, + FIB_SOURCE_LISP); vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; ip4_sw_interface_enable_disable (sw_if_index, 1); - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id, + FIB_SOURCE_LISP); vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; ip6_sw_interface_enable_disable (sw_if_index, 1); @@ -530,7 +532,7 @@ lisp_gpe_tenant_del_default_routes (u32 table_id) fib_index = fib_table_find (prefix.fp_proto, table_id); fib_table_entry_special_remove (fib_index, &prefix, FIB_SOURCE_LISP); - fib_table_unlock (fib_index, prefix.fp_proto); + fib_table_unlock (fib_index, prefix.fp_proto, FIB_SOURCE_LISP); } } @@ -549,7 +551,8 @@ lisp_gpe_tenant_add_default_routes (u32 table_id) /* * Add a deafult route that results in a control plane punt DPO */ - fib_index = fib_table_find_or_create_and_lock (prefix.fp_proto, table_id); + fib_index = fib_table_find_or_create_and_lock (prefix.fp_proto, table_id, + FIB_SOURCE_LISP); fib_table_entry_special_dpo_add (fib_index, &prefix, FIB_SOURCE_LISP, FIB_ENTRY_FLAG_EXCLUSIVE, lisp_cp_dpo_get (fib_proto_to_dpo diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c index d7d3cb86..0a8dc039 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c +++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c @@ -66,6 +66,7 @@ ip_dst_fib_add_route (u32 dst_fib_index, const ip_prefix_t * dst_prefix) /* create a new src FIB. */ src_fib_index = fib_table_create_and_lock (dst_fib_prefix.fp_proto, + FIB_SOURCE_LISP, "LISP-src for [%d,%U]", dst_fib_index, format_fib_prefix, &dst_fib_prefix); @@ -180,7 +181,8 @@ ip_src_dst_fib_del_route (u32 src_fib_index, */ fib_table_entry_special_remove (dst_fib_index, &dst_fib_prefix, FIB_SOURCE_LISP); - fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto); + fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto, + FIB_SOURCE_LISP); } } @@ -544,7 +546,8 @@ add_ip_fwd_entry (lisp_gpe_main_t * lgm, lfe->tenant = lisp_gpe_tenant_find_or_create (lfe->key->vni); lfe->eid_table_id = a->table_id; lfe->eid_fib_index = fib_table_find_or_create_and_lock (fproto, - lfe->eid_table_id); + lfe->eid_table_id, + FIB_SOURCE_LISP); lfe->is_src_dst = a->is_src_dst; if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type) @@ -578,7 +581,7 @@ del_ip_fwd_entry_i (lisp_gpe_main_t * lgm, lisp_gpe_fwd_entry_t * lfe) fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); - fib_table_unlock (lfe->eid_fib_index, fproto); + fib_table_unlock (lfe->eid_fib_index, fproto, FIB_SOURCE_LISP); hash_unset_mem (lgm->lisp_gpe_fwd_entries, lfe->key); clib_mem_free (lfe->key); diff --git a/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c b/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c index b234d9dc..26664f53 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c +++ b/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c @@ -89,13 +89,15 @@ lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id) { fib_node_index_t fib_index; - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id, + FIB_SOURCE_LISP); ASSERT (FIB_NODE_INDEX_INVALID != fib_index); vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id, + FIB_SOURCE_LISP); ASSERT (FIB_NODE_INDEX_INVALID != fib_index); vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); @@ -105,9 +107,13 @@ lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id) static void lisp_gpe_sub_interface_unset_table (u32 sw_if_index, u32 table_id) { + fib_table_unlock (ip4_main.fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_IP4, FIB_SOURCE_LISP); ip4_main.fib_index_by_sw_if_index[sw_if_index] = 0; ip4_sw_interface_enable_disable (sw_if_index, 0); + fib_table_unlock (ip6_main.fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_IP6, FIB_SOURCE_LISP); ip6_main.fib_index_by_sw_if_index[sw_if_index] = 0; ip6_sw_interface_enable_disable (sw_if_index, 0); } @@ -185,6 +191,7 @@ lisp_gpe_sub_interface_unlock (index_t l3si) l3s = lisp_gpe_sub_interface_get_i (l3si); + ASSERT (0 != l3s->locks); l3s->locks--; if (0 == l3s->locks) diff --git a/src/vnet/mfib/ip4_mfib.c b/src/vnet/mfib/ip4_mfib.c index 1849a3a4..b2482580 100644 --- a/src/vnet/mfib/ip4_mfib.c +++ b/src/vnet/mfib/ip4_mfib.c @@ -33,7 +33,8 @@ static const mfib_prefix_t ip4_specials[] = { }; static u32 -ip4_create_mfib_with_table_id (u32 table_id) +ip4_create_mfib_with_table_id (u32 table_id, + mfib_source_t src) { mfib_table_t *mfib_table; @@ -53,7 +54,7 @@ ip4_create_mfib_with_table_id (u32 table_id) mfib_table->v4.table_id = table_id; - mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP4); + mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP4, src); /* * add the special entries into the new FIB @@ -113,14 +114,15 @@ ip4_mfib_table_destroy (ip4_mfib_t *mfib) } u32 -ip4_mfib_table_find_or_create_and_lock (u32 table_id) +ip4_mfib_table_find_or_create_and_lock (u32 table_id, + mfib_source_t src) { u32 index; index = ip4_mfib_index_from_table_id(table_id); if (~0 == index) - return ip4_create_mfib_with_table_id(table_id); - mfib_table_lock(index, FIB_PROTOCOL_IP4); + return ip4_create_mfib_with_table_id(table_id, src); + mfib_table_lock(index, FIB_PROTOCOL_IP4, src); return (index); } diff --git a/src/vnet/mfib/ip4_mfib.h b/src/vnet/mfib/ip4_mfib.h index ea682651..e31fb744 100644 --- a/src/vnet/mfib/ip4_mfib.h +++ b/src/vnet/mfib/ip4_mfib.h @@ -72,8 +72,9 @@ ip4_mfib_get (u32 index) * @returns A pointer to the retrieved or created fib. * */ -extern u32 ip4_mfib_table_find_or_create_and_lock(u32 table_id); -extern u32 ip4_mfib_table_create_and_lock(void); +extern u32 ip4_mfib_table_find_or_create_and_lock(u32 table_id, + mfib_source_t src); +extern u32 ip4_mfib_table_create_and_lock(mfib_source_t src); static inline u32 ip4_mfib_index_from_table_id (u32 table_id) diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c index 5e48e919..e4861330 100644 --- a/src/vnet/mfib/ip6_mfib.c +++ b/src/vnet/mfib/ip6_mfib.c @@ -151,7 +151,8 @@ static const ip6_mfib_special_t ip6_mfib_specials[] = static u32 -ip6_create_mfib_with_table_id (u32 table_id) +ip6_create_mfib_with_table_id (u32 table_id, + mfib_source_t src) { mfib_table_t *mfib_table; mfib_prefix_t pfx = { @@ -182,7 +183,7 @@ ip6_create_mfib_with_table_id (u32 table_id) mfib_table->v6.table_id = table_id; - mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP6); + mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP6, src); mfib_table->v6.rhead = clib_mem_alloc_aligned (sizeof(*mfib_table->v6.rhead), @@ -297,14 +298,15 @@ ip6_mfib_interface_enable_disable (u32 sw_if_index, int is_enable) } u32 -ip6_mfib_table_find_or_create_and_lock (u32 table_id) +ip6_mfib_table_find_or_create_and_lock (u32 table_id, + mfib_source_t src) { u32 index; index = ip6_mfib_index_from_table_id(table_id); if (~0 == index) - return ip6_create_mfib_with_table_id(table_id); - mfib_table_lock(index, FIB_PROTOCOL_IP6); + return ip6_create_mfib_with_table_id(table_id, src); + mfib_table_lock(index, FIB_PROTOCOL_IP6, src); return (index); } diff --git a/src/vnet/mfib/ip6_mfib.h b/src/vnet/mfib/ip6_mfib.h index adaa7ec2..ea81b553 100644 --- a/src/vnet/mfib/ip6_mfib.h +++ b/src/vnet/mfib/ip6_mfib.h @@ -79,8 +79,9 @@ ip6_mfib_get (u32 index) * @returns A pointer to the retrieved or created fib. * */ -extern u32 ip6_mfib_table_find_or_create_and_lock(u32 table_id); -extern u32 ip6_mfib_table_create_and_lock(void); +extern u32 ip6_mfib_table_find_or_create_and_lock(u32 table_id, + mfib_source_t src); +extern u32 ip6_mfib_table_create_and_lock(mfib_source_t src); static inline diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c index 804e10ab..2302b9a1 100644 --- a/src/vnet/mfib/mfib_entry.c +++ b/src/vnet/mfib/mfib_entry.c @@ -334,6 +334,17 @@ mfib_entry_get_best_src (const mfib_entry_t *mfib_entry) return (bsrc); } +int +mfib_entry_is_sourced (fib_node_index_t mfib_entry_index, + mfib_source_t source) +{ + mfib_entry_t *mfib_entry; + + mfib_entry = mfib_entry_get(mfib_entry_index); + + return (NULL != mfib_entry_src_find(mfib_entry, source, NULL)); +} + static void mfib_entry_src_flush (mfib_entry_src_t *msrc) { diff --git a/src/vnet/mfib/mfib_entry.h b/src/vnet/mfib/mfib_entry.h index d4377878..96ee49f7 100644 --- a/src/vnet/mfib/mfib_entry.h +++ b/src/vnet/mfib/mfib_entry.h @@ -130,6 +130,8 @@ extern void mfib_entry_unlock(fib_node_index_t fib_entry_index); extern void mfib_entry_get_prefix(fib_node_index_t fib_entry_index, mfib_prefix_t *pfx); extern u32 mfib_entry_get_fib_index(fib_node_index_t fib_entry_index); +extern int mfib_entry_is_sourced(fib_node_index_t fib_entry_index, + mfib_source_t source); extern void mfib_entry_contribute_forwarding( fib_node_index_t mfib_entry_index, diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c index 7ffe8941..e5550adc 100644 --- a/src/vnet/mfib/mfib_table.c +++ b/src/vnet/mfib/mfib_table.c @@ -424,7 +424,8 @@ mfib_table_find (fib_protocol_t proto, u32 mfib_table_find_or_create_and_lock (fib_protocol_t proto, - u32 table_id) + u32 table_id, + mfib_source_t src) { mfib_table_t *mfib_table; fib_node_index_t fi; @@ -432,10 +433,10 @@ mfib_table_find_or_create_and_lock (fib_protocol_t proto, switch (proto) { case FIB_PROTOCOL_IP4: - fi = ip4_mfib_table_find_or_create_and_lock(table_id); + fi = ip4_mfib_table_find_or_create_and_lock(table_id, src); break; case FIB_PROTOCOL_IP6: - fi = ip6_mfib_table_find_or_create_and_lock(table_id); + fi = ip6_mfib_table_find_or_create_and_lock(table_id, src); break; case FIB_PROTOCOL_MPLS: default: @@ -451,6 +452,59 @@ mfib_table_find_or_create_and_lock (fib_protocol_t proto, return (fi); } +/** + * @brief Table flush context. Store the indicies of matching FIB entries + * that need to be removed. + */ +typedef struct mfib_table_flush_ctx_t_ +{ + /** + * The list of entries to flush + */ + fib_node_index_t *mftf_entries; + + /** + * The source we are flushing + */ + mfib_source_t mftf_source; +} mfib_table_flush_ctx_t; + +static int +mfib_table_flush_cb (fib_node_index_t mfib_entry_index, + void *arg) +{ + mfib_table_flush_ctx_t *ctx = arg; + + if (mfib_entry_is_sourced(mfib_entry_index, ctx->mftf_source)) + { + vec_add1(ctx->mftf_entries, mfib_entry_index); + } + return (1); +} + +void +mfib_table_flush (u32 mfib_index, + fib_protocol_t proto, + mfib_source_t source) +{ + fib_node_index_t *mfib_entry_index; + mfib_table_flush_ctx_t ctx = { + .mftf_entries = NULL, + .mftf_source = source, + }; + + mfib_table_walk(mfib_index, proto, + mfib_table_flush_cb, + &ctx); + + vec_foreach(mfib_entry_index, ctx.mftf_entries) + { + mfib_table_entry_delete_index(*mfib_entry_index, source); + } + + vec_free(ctx.mftf_entries); +} + static void mfib_table_destroy (mfib_table_t *mfib_table) { @@ -472,27 +526,43 @@ mfib_table_destroy (mfib_table_t *mfib_table) void mfib_table_unlock (u32 fib_index, - fib_protocol_t proto) + fib_protocol_t proto, + mfib_source_t source) { mfib_table_t *mfib_table; mfib_table = mfib_table_get(fib_index, proto); - mfib_table->mft_locks--; + mfib_table->mft_locks[source]--; + mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]--; + + if (0 == mfib_table->mft_locks[source]) + { + /* + * The source no longer needs the table. flush any routes + * from it just in case + */ + mfib_table_flush(fib_index, proto, source); + } - if (0 == mfib_table->mft_locks) + if (0 == mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]) { - mfib_table_destroy(mfib_table); + /* + * no more locak from any source - kill it + */ + mfib_table_destroy(mfib_table); } } void mfib_table_lock (u32 fib_index, - fib_protocol_t proto) + fib_protocol_t proto, + mfib_source_t source) { mfib_table_t *mfib_table; mfib_table = mfib_table_get(fib_index, proto); - mfib_table->mft_locks++; + mfib_table->mft_locks[source]++; + mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]++; } void diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h index 83aa04ef..c6b0b097 100644 --- a/src/vnet/mfib/mfib_table.h +++ b/src/vnet/mfib/mfib_table.h @@ -22,6 +22,12 @@ #include +/** + * Keep a lock per-source and a total + */ +#define MFIB_TABLE_N_LOCKS (MFIB_N_SOURCES+1) +#define MFIB_TABLE_TOTAL_LOCKS MFIB_N_SOURCES + /** * @brief * A protocol Independent IP multicast FIB table @@ -47,7 +53,7 @@ typedef struct mfib_table_t_ /** * number of locks on the table */ - u16 mft_locks; + u16 mft_locks[MFIB_TABLE_N_LOCKS]; /** * Table ID (hash key) for this FIB. @@ -259,7 +265,8 @@ extern fib_node_index_t mfib_table_entry_special_add(u32 fib_index, * the source to flush */ extern void mfib_table_flush(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + mfib_source_t source); /** * @brief @@ -307,9 +314,13 @@ extern u32 mfib_table_find(fib_protocol_t proto, u32 table_id); * * @return fib_index * The index of the FIB + * + * @param source + * The ID of the client/source. */ extern u32 mfib_table_find_or_create_and_lock(fib_protocol_t proto, - u32 table_id); + u32 table_id, + mfib_source_t source); /** @@ -321,9 +332,13 @@ extern u32 mfib_table_find_or_create_and_lock(fib_protocol_t proto, * * @paran proto * The protocol of the FIB (and thus the entries therein) + * + * @param source + * The ID of the client/source. */ extern void mfib_table_unlock(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + mfib_source_t source); /** * @brief @@ -335,9 +350,13 @@ extern void mfib_table_unlock(u32 fib_index, * * @paran proto * The protocol of the FIB (and thus the entries therein) + * + * @param source + * The ID of the client/source. */ extern void mfib_table_lock(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + mfib_source_t source); /** * @brief diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c index 57787eca..3055844d 100644 --- a/src/vnet/mfib/mfib_test.c +++ b/src/vnet/mfib/mfib_test.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -366,7 +367,7 @@ mfib_test_i (fib_protocol_t PROTO, MFIB_TEST(3 == adj_mcast_db_size(), "3 MCAST adjs"); /* Find or create FIB table 11 */ - fib_index = mfib_table_find_or_create_and_lock(PROTO, 11); + fib_index = mfib_table_find_or_create_and_lock(PROTO, 11, MFIB_SOURCE_API); mfib_prefix_t pfx_dft = { .fp_len = 0, @@ -1113,9 +1114,10 @@ mfib_test_i (fib_protocol_t PROTO, /* * MPLS enable an interface so we get the MPLS table created */ + mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API); mpls_sw_interface_enable_disable(&mpls_main, tm->hw[0]->sw_if_index, - 1); + 1, 0); lfei = fib_table_entry_update_one_path(0, // default MPLS Table &pfx_3500, @@ -1192,7 +1194,7 @@ mfib_test_i (fib_protocol_t PROTO, /* * Unlock the table - it's the last lock so should be gone thereafter */ - mfib_table_unlock(fib_index, PROTO); + mfib_table_unlock(fib_index, PROTO, MFIB_SOURCE_API); MFIB_TEST((FIB_NODE_INDEX_INVALID == mfib_table_find(PROTO, fib_index)), @@ -1207,7 +1209,8 @@ mfib_test_i (fib_protocol_t PROTO, */ mpls_sw_interface_enable_disable(&mpls_main, tm->hw[0]->sw_if_index, - 0); + 0, 0); + mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API); /* * test we've leaked no resources diff --git a/src/vnet/mfib/mfib_types.h b/src/vnet/mfib/mfib_types.h index 863fad16..50aede04 100644 --- a/src/vnet/mfib/mfib_types.h +++ b/src/vnet/mfib/mfib_types.h @@ -166,9 +166,10 @@ typedef enum mfib_source_t_ MFIB_SOURCE_VXLAN, MFIB_SOURCE_DHCP, MFIB_SOURCE_SRv6, - MFIB_SOURCE_DEFAULT_ROUTE, MFIB_SOURCE_GTPU, MFIB_SOURCE_VXLAN_GPE, + MFIB_SOURCE_RR, + MFIB_SOURCE_DEFAULT_ROUTE, } mfib_source_t; #define MFIB_SOURCE_NAMES { \ @@ -178,11 +179,14 @@ typedef enum mfib_source_t_ [MFIB_SOURCE_DHCP] = "DHCP", \ [MFIB_SOURCE_VXLAN] = "VXLAN", \ [MFIB_SOURCE_SRv6] = "SRv6", \ - [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \ [MFIB_SOURCE_GTPU] = "GTPU", \ [MFIB_SOURCE_VXLAN_GPE] = "VXLAN-GPE", \ + [MFIB_SOURCE_RR] = "Recursive-resolution", \ + [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \ } +#define MFIB_N_SOURCES (MFIB_SOURCE_DEFAULT_ROUTE) + /** * \brief Compare two prefixes for equality */ diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c index a085aaa2..d7c8e7d3 100644 --- a/src/vnet/mpls/interface.c +++ b/src/vnet/mpls/interface.c @@ -35,25 +35,33 @@ mpls_sw_interface_is_enabled (u32 sw_if_index) return (mm->mpls_enabled_by_sw_if_index[sw_if_index]); } -void +int mpls_sw_interface_enable_disable (mpls_main_t * mm, u32 sw_if_index, - u8 is_enable) + u8 is_enable, + u8 is_api) { fib_node_index_t lfib_index; vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); + lfib_index = fib_table_find(FIB_PROTOCOL_MPLS, + MPLS_FIB_DEFAULT_TABLE_ID); + + if (~0 == lfib_index) + return VNET_API_ERROR_NO_SUCH_FIB; + /* * enable/disable only on the 1<->0 transition */ if (is_enable) { if (1 != ++mm->mpls_enabled_by_sw_if_index[sw_if_index]) - return; + return (0); + + fib_table_lock(lfib_index, FIB_PROTOCOL_MPLS, + (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI)); - lfib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS, - MPLS_FIB_DEFAULT_TABLE_ID); vec_validate(mm->fib_index_by_sw_if_index, 0); mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index; } @@ -61,15 +69,17 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm, { ASSERT(mm->mpls_enabled_by_sw_if_index[sw_if_index] > 0); if (0 != --mm->mpls_enabled_by_sw_if_index[sw_if_index]) - return; + return (0); fib_table_unlock(mm->fib_index_by_sw_if_index[sw_if_index], - FIB_PROTOCOL_MPLS); + FIB_PROTOCOL_MPLS, + (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI)); } vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled", sw_if_index, !is_enable, 0, 0); + return (0); } static clib_error_t * @@ -101,7 +111,7 @@ mpls_interface_enable_disable (vlib_main_t * vm, goto done; } - mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable); + mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable, 0); done: return error; diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c index 5021ac23..7bdfd8c7 100644 --- a/src/vnet/mpls/mpls.c +++ b/src/vnet/mpls/mpls.c @@ -536,6 +536,78 @@ VLIB_CLI_COMMAND (mpls_local_label_command, static) = { .short_help = "Create/Delete MPL local labels", }; +clib_error_t * +vnet_mpls_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, + vlib_cli_command_t * cmdo) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + u32 table_id, is_add; + + is_add = 1; + table_id = ~0; + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "add")) + is_add = 1; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == table_id) + { + error = clib_error_return (0, "No table id"); + goto done; + } + else if (0 == table_id) + { + error = clib_error_return (0, "Can't change the default table"); + goto done; + } + else + { + if (is_add) + { + mpls_table_create (table_id, 0); + } + else + { + mpls_table_delete (table_id, 0); + } + } + + done: + unformat_free (line_input); + return error; +} + +/* *INDENT-ON* */ +/*? + * This command is used to add or delete MPLS Tables. All + * Tables must be explicitly added before that can be used, + * Including the default table. + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_table_command, static) = { + .path = "mpla table", + .short_help = "mpls table [add|del] ", + .function = vnet_mpls_table_cmd, + .is_mp_safe = 1, +}; + int mpls_fib_reset_labels (u32 fib_id) { @@ -546,12 +618,8 @@ mpls_fib_reset_labels (u32 fib_id) static clib_error_t * mpls_init (vlib_main_t * vm) { - mpls_main_t * mm = &mpls_main; clib_error_t * error; - mm->vlib_main = vm; - mm->vnet_main = vnet_get_main(); - if ((error = vlib_call_init_function (vm, ip_main_init))) return error; diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h index b0125e60..31cb1746 100644 --- a/src/vnet/mpls/mpls.h +++ b/src/vnet/mpls/mpls.h @@ -56,10 +56,6 @@ typedef struct { /* IP4 enabled count by software interface */ u8 * mpls_enabled_by_sw_if_index; - - /* convenience */ - vlib_main_t * vlib_main; - vnet_main_t * vnet_main; } mpls_main_t; extern mpls_main_t mpls_main; @@ -77,8 +73,6 @@ extern vlib_node_registration_t mpls_midchain_node; /* Parse mpls protocol as 0xXXXX or protocol name. In either host or network byte order. */ -unformat_function_t unformat_mpls_protocol_host_byte_order; -unformat_function_t unformat_mpls_protocol_net_byte_order; unformat_function_t unformat_mpls_label_net_byte_order; unformat_function_t unformat_mpls_unicast_label; @@ -86,9 +80,10 @@ unformat_function_t unformat_mpls_unicast_label; unformat_function_t unformat_mpls_header; unformat_function_t unformat_pg_mpls_header; -void mpls_sw_interface_enable_disable (mpls_main_t * mm, - u32 sw_if_index, - u8 is_enable); +int mpls_sw_interface_enable_disable (mpls_main_t * mm, + u32 sw_if_index, + u8 is_enable, + u8 is_api); u8 mpls_sw_interface_is_enabled (u32 sw_if_index); @@ -103,4 +98,7 @@ mpls_fib_index_cmp(void * a1, void * a2); int mpls_label_cmp(void * a1, void * a2); +void mpls_table_create(uint32_t table_id, u8 is_api); +void mpls_table_delete(uint32_t table_id, u8 is_api); + #endif /* included_vnet_mpls_h */ diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index a44b1a25..38f5b014 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -58,6 +58,29 @@ _(MPLS_FIB_DUMP, mpls_fib_dump) extern void stats_dslock_with_hint (int hint, int tag); extern void stats_dsunlock (void); +void +mpls_table_delete (u32 table_id, u8 is_api) +{ + u32 fib_index; + + /* + * The MPLS defult table must also be explicitly created via the API. + * So in contrast to IP, it gets no special treatment here. + * + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + */ + fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id); + + if (~0 != fib_index) + { + fib_table_unlock (fib_index, + FIB_PROTOCOL_MPLS, + (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI)); + } +} + void vl_api_mpls_table_add_del_t_handler (vl_api_mpls_table_add_del_t * mp) { @@ -68,6 +91,13 @@ vl_api_mpls_table_add_del_t_handler (vl_api_mpls_table_add_del_t * mp) vnm = vnet_get_main (); vnm->api_errno = 0; + if (mp->mt_is_add) + mpls_table_create (ntohl (mp->mt_table_id), 1); + else + mpls_table_delete (ntohl (mp->mt_table_id), 1); + + rv = (rv == 0) ? vnm->api_errno : rv; + REPLY_MACRO (VL_API_MPLS_TABLE_ADD_DEL_REPLY); } @@ -82,14 +112,7 @@ mpls_ip_bind_unbind_handler (vnet_main_t * vnm, if (~0 == mpls_fib_index) { - if (mp->mb_create_table_if_needed) - { - mpls_fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS, - ntohl (mp->mb_mpls_table_id)); - } - else - return VNET_API_ERROR_NO_SUCH_FIB; + return VNET_API_ERROR_NO_SUCH_FIB; } ip_fib_index = fib_table_find ((mp->mb_is_ip4 ? @@ -170,7 +193,6 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, mp->mr_next_hop_sw_if_index, pfx.fp_payload_proto, mp->mr_next_hop_table_id, - mp->mr_create_table_if_needed, mp->mr_is_rpf_id, &fib_index, &next_hop_fib_index); @@ -235,6 +257,32 @@ vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp) REPLY_MACRO (VL_API_MPLS_ROUTE_ADD_DEL_REPLY); } +void +mpls_table_create (u32 table_id, u8 is_api) +{ + u32 fib_index; + + /* + * The MPLS defult table must also be explicitly created via the API. + * So in contrast to IP, it gets no special treatment here. + */ + + /* + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + */ + fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id); + + if (~0 == fib_index) + { + fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS, + table_id, + (is_api ? + FIB_SOURCE_API : FIB_SOURCE_CLI)); + } +} + static void vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) { diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c index f427bbf3..2f90993a 100755 --- a/src/vnet/srv6/sr_policy_rewrite.c +++ b/src/vnet/srv6/sr_policy_rewrite.c @@ -595,8 +595,10 @@ sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments, if (sm->fib_table_ip6 == (u32) ~ 0) { sm->fib_table_ip6 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, + FIB_SOURCE_SR, "SRv6 steering of IP6 prefixes through BSIDs"); sm->fib_table_ip4 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, + FIB_SOURCE_SR, "SRv6 steering of IP4 prefixes through BSIDs"); } @@ -684,8 +686,8 @@ sr_policy_del (ip6_address_t * bsid, u32 index) /* If FIB empty unlock it */ if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies)) { - fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6); - fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6); + fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6, FIB_SOURCE_SR); + fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6, FIB_SOURCE_SR); sm->fib_table_ip6 = (u32) ~ 0; sm->fib_table_ip4 = (u32) ~ 0; } diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c index 57fe21f6..cf4e81ab 100755 --- a/src/vnet/srv6/sr_steering.c +++ b/src/vnet/srv6/sr_steering.c @@ -159,8 +159,10 @@ sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index, /* If no more SR policies or steering policies */ if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies)) { - fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6); - fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6); + fib_table_unlock (sm->fib_table_ip6, + FIB_PROTOCOL_IP6, FIB_SOURCE_SR); + fib_table_unlock (sm->fib_table_ip4, + FIB_PROTOCOL_IP6, FIB_SOURCE_SR); sm->fib_table_ip6 = (u32) ~ 0; sm->fib_table_ip4 = (u32) ~ 0; } diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index f9c3129c..044ddb5b 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -699,8 +699,9 @@ static void VALIDATE_SW_IF_INDEX (mp); - mpls_sw_interface_enable_disable (&mpls_main, - ntohl (mp->sw_if_index), mp->enable); + rv = mpls_sw_interface_enable_disable (&mpls_main, + ntohl (mp->sw_if_index), + mp->enable, 1); BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_SW_INTERFACE_SET_MPLS_ENABLE_REPLY); diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 1353fe28..be74b83a 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -560,9 +560,6 @@ static void *vl_api_ip_add_del_route_t_print if (mp->table_id != 0) s = format (s, "vrf %d ", ntohl (mp->table_id)); - if (mp->create_vrf_if_needed) - s = format (s, "create-vrf "); - if (mp->next_hop_weight != 1) s = format (s, "weight %d ", mp->next_hop_weight); diff --git a/test/test_dhcp.py b/test/test_dhcp.py index 6fc29182..fe97f6c9 100644 --- a/test/test_dhcp.py +++ b/test/test_dhcp.py @@ -6,7 +6,7 @@ import struct from framework import VppTestCase, VppTestRunner from vpp_neighbor import VppNeighbor -from vpp_ip_route import find_route +from vpp_ip_route import find_route, VppIpTable from util import mk_ll_addr from scapy.layers.l2 import Ether, getmacbyip, ARP @@ -34,9 +34,19 @@ class TestDHCP(VppTestCase): # create 3 pg interfaces self.create_pg_interfaces(range(4)) + self.tables = [] # pg0 and 1 are IP configured in VRF 0 and 1. # pg2 and 3 are non IP-configured in VRF 0 and 1 + table_id = 0 + for table_id in range(1, 4): + tbl4 = VppIpTable(self, table_id) + tbl4.add_vpp_config() + self.tables.append(tbl4) + tbl6 = VppIpTable(self, table_id, is_ip6=1) + tbl6.add_vpp_config() + self.tables.append(tbl6) + table_id = 0 for i in self.pg_interfaces[:2]: i.admin_up() @@ -56,11 +66,15 @@ class TestDHCP(VppTestCase): table_id += 1 def tearDown(self): - super(TestDHCP, self).tearDown() - for i in self.pg_interfaces: + for i in self.pg_interfaces[:2]: i.unconfig_ip4() i.unconfig_ip6() + + for i in self.pg_interfaces: + i.set_table_ip4(0) + i.set_table_ip6(0) i.admin_down() + super(TestDHCP, self).tearDown() def send_and_assert_no_replies(self, intf, pkts, remark): intf.add_stream(pkts) @@ -667,6 +681,8 @@ class TestDHCP(VppTestCase): "DHCP cleanup VRF 0") self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1, "DHCP cleanup VRF 1") + self.pg2.unconfig_ip4() + self.pg3.unconfig_ip4() def test_dhcp6_proxy(self): """ DHCPv6 Proxy""" @@ -1045,6 +1061,8 @@ class TestDHCP(VppTestCase): server_table_id=0, is_ipv6=1, is_add=0) + self.pg2.unconfig_ip6() + self.pg3.unconfig_ip6() def test_dhcp_client(self): """ DHCP Client""" diff --git a/test/test_gre.py b/test/test_gre.py index 1afc44fb..9046b05f 100644 --- a/test/test_gre.py +++ b/test/test_gre.py @@ -6,7 +6,7 @@ from logging import * from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppDot1QSubint from vpp_gre_interface import VppGreInterface, VppGre6Interface -from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto +from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto, VppIpTable from vpp_papi_provider import L2_VTR_OP from scapy.packet import Raw @@ -30,6 +30,9 @@ class TestGRE(VppTestCase): # create 3 pg interfaces - set one in a non-default table. self.create_pg_interfaces(range(3)) + + self.tbl = VppIpTable(self, 1) + self.tbl.add_vpp_config() self.pg1.set_table_ip4(1) for i in self.pg_interfaces: @@ -43,11 +46,12 @@ class TestGRE(VppTestCase): self.pg2.resolve_ndp() def tearDown(self): - super(TestGRE, self).tearDown() for i in self.pg_interfaces: i.unconfig_ip4() i.unconfig_ip6() i.admin_down() + self.pg1.set_table_ip4(0) + super(TestGRE, self).tearDown() def create_stream_ip4(self, src_if, src_ip, dst_ip): pkts = [] diff --git a/test/test_ip4.py b/test/test_ip4.py index 7a7098c3..55d16735 100644 --- a/test/test_ip4.py +++ b/test/test_ip4.py @@ -6,7 +6,8 @@ import unittest from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppSubInterface, VppDot1QSubint, VppDot1ADSubint from vpp_ip_route import VppIpRoute, VppRoutePath, VppIpMRoute, \ - VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind + VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind, \ + VppMplsTable from scapy.packet import Raw from scapy.layers.l2 import Ether, Dot1Q, ARP @@ -774,6 +775,8 @@ class TestIPLoadBalance(VppTestCase): super(TestIPLoadBalance, self).setUp() self.create_pg_interfaces(range(5)) + mpls_tbl = VppMplsTable(self, 0) + mpls_tbl.add_vpp_config() for i in self.pg_interfaces: i.admin_up() @@ -782,11 +785,11 @@ class TestIPLoadBalance(VppTestCase): i.enable_mpls() def tearDown(self): - super(TestIPLoadBalance, self).tearDown() for i in self.pg_interfaces: i.disable_mpls() i.unconfig_ip4() i.admin_down() + super(TestIPLoadBalance, self).tearDown() def send_and_expect_load_balancing(self, input, pkts, outputs): input.add_stream(pkts) @@ -966,6 +969,8 @@ class TestIPVlan0(VppTestCase): super(TestIPVlan0, self).setUp() self.create_pg_interfaces(range(2)) + mpls_tbl = VppMplsTable(self, 0) + mpls_tbl.add_vpp_config() for i in self.pg_interfaces: i.admin_up() @@ -974,11 +979,11 @@ class TestIPVlan0(VppTestCase): i.enable_mpls() def tearDown(self): - super(TestIPVlan0, self).tearDown() for i in self.pg_interfaces: i.disable_mpls() i.unconfig_ip4() i.admin_down() + super(TestIPVlan0, self).tearDown() def send_and_expect(self, input, pkts, output): input.add_stream(pkts) diff --git a/test/test_ip4_vrf_multi_instance.py b/test/test_ip4_vrf_multi_instance.py index b73ac948..5a8d6760 100644 --- a/test/test_ip4_vrf_multi_instance.py +++ b/test/test_ip4_vrf_multi_instance.py @@ -172,9 +172,10 @@ class TestIp4VrfMultiInst(VppTestCase): pg_if = self.pg_if_by_vrf_id[vrf_id][0] dest_addr = pg_if.remote_hosts[0].ip4n dest_addr_len = 24 + self.vapi.ip_table_add_del(vrf_id, is_add=1) self.vapi.ip_add_del_route( dest_addr, dest_addr_len, pg_if.local_ip4n, - table_id=vrf_id, create_vrf_if_needed=1, is_multipath=1) + table_id=vrf_id, is_multipath=1) self.logger.info("IPv4 VRF ID %d created" % vrf_id) if vrf_id not in self.vrf_list: self.vrf_list.append(vrf_id) @@ -216,6 +217,7 @@ class TestIp4VrfMultiInst(VppTestCase): self.logger.info("IPv4 VRF ID %d reset" % vrf_id) self.logger.debug(self.vapi.ppcli("show ip fib")) self.logger.debug(self.vapi.ppcli("show ip arp")) + self.vapi.ip_table_add_del(vrf_id, is_add=0) def create_stream(self, src_if, packet_sizes): """ diff --git a/test/test_ip6.py b/test/test_ip6.py index 285ce181..aad3713c 100644 --- a/test/test_ip6.py +++ b/test/test_ip6.py @@ -8,7 +8,7 @@ from vpp_sub_interface import VppSubInterface, VppDot1QSubint from vpp_pg_interface import is_ipv6_misc from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, VppIpMRoute, \ VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind, \ - VppMplsRoute, DpoProto + VppMplsRoute, DpoProto, VppMplsTable from vpp_neighbor import find_nbr, VppNeighbor from scapy.packet import Raw @@ -1260,6 +1260,9 @@ class TestIP6LoadBalance(VppTestCase): self.create_pg_interfaces(range(5)) + mpls_tbl = VppMplsTable(self, 0) + mpls_tbl.add_vpp_config() + for i in self.pg_interfaces: i.admin_up() i.config_ip6() @@ -1267,11 +1270,11 @@ class TestIP6LoadBalance(VppTestCase): i.enable_mpls() def tearDown(self): - super(TestIP6LoadBalance, self).tearDown() for i in self.pg_interfaces: i.unconfig_ip6() i.admin_down() i.disable_mpls() + super(TestIP6LoadBalance, self).tearDown() def send_and_expect_load_balancing(self, input, pkts, outputs): input.add_stream(pkts) diff --git a/test/test_ip6_vrf_multi_instance.py b/test/test_ip6_vrf_multi_instance.py index af80b5ba..769cb2e5 100644 --- a/test/test_ip6_vrf_multi_instance.py +++ b/test/test_ip6_vrf_multi_instance.py @@ -187,9 +187,10 @@ class TestIP6VrfMultiInst(VppTestCase): pg_if = self.pg_if_by_vrf_id[vrf_id][0] dest_addr = pg_if.remote_hosts[0].ip6n dest_addr_len = 64 + self.vapi.ip_table_add_del(vrf_id, is_add=1, is_ipv6=1) self.vapi.ip_add_del_route( dest_addr, dest_addr_len, pg_if.local_ip6n, is_ipv6=1, - table_id=vrf_id, create_vrf_if_needed=1, is_multipath=1) + table_id=vrf_id, is_multipath=1) self.logger.info("IPv6 VRF ID %d created" % vrf_id) if vrf_id not in self.vrf_list: self.vrf_list.append(vrf_id) @@ -232,6 +233,7 @@ class TestIP6VrfMultiInst(VppTestCase): self.logger.info("IPv6 VRF ID %d reset" % vrf_id) self.logger.debug(self.vapi.ppcli("show ip6 fib")) self.logger.debug(self.vapi.ppcli("show ip6 neighbors")) + self.vapi.ip_table_add_del(vrf_id, is_add=0, is_ipv6=1) def create_stream(self, src_if, packet_sizes): """ diff --git a/test/test_ip_mcast.py b/test/test_ip_mcast.py index 276555d6..7cad683c 100644 --- a/test/test_ip_mcast.py +++ b/test/test_ip_mcast.py @@ -5,7 +5,7 @@ import unittest from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppSubInterface, VppDot1QSubint, VppDot1ADSubint from vpp_ip_route import VppIpMRoute, VppMRoutePath, VppMFibSignal, \ - MRouteItfFlags, MRouteEntryFlags + MRouteItfFlags, MRouteEntryFlags, VppIpTable from scapy.packet import Raw from scapy.layers.l2 import Ether @@ -44,16 +44,37 @@ class TestIPMcast(VppTestCase): super(TestIPMcast, self).setUp() # create 8 pg interfaces - self.create_pg_interfaces(range(8)) + self.create_pg_interfaces(range(9)) # setup interfaces - for i in self.pg_interfaces: + for i in self.pg_interfaces[:8]: i.admin_up() i.config_ip4() i.config_ip6() i.resolve_arp() i.resolve_ndp() + # one more in a vrf + tbl4 = VppIpTable(self, 10) + tbl4.add_vpp_config() + self.pg8.set_table_ip4(10) + self.pg8.config_ip4() + + tbl6 = VppIpTable(self, 10, is_ip6=1) + tbl6.add_vpp_config() + self.pg8.set_table_ip6(10) + self.pg8.config_ip6() + + def tearDown(self): + for i in self.pg_interfaces: + i.unconfig_ip4() + i.unconfig_ip6() + i.admin_down() + + self.pg8.set_table_ip4(0) + self.pg8.set_table_ip6(0) + super(TestIPMcast, self).tearDown() + def create_stream_ip4(self, src_if, src_ip, dst_ip, payload_size=0): pkts = [] # default to small packet sizes @@ -663,6 +684,77 @@ class TestIPMcast(VppTestCase): # route_232_1_1_1.remove_vpp_config() + def test_ip_mcast_vrf(self): + """ IP Multicast Replication in non-default table""" + + # + # An (S,G). + # one accepting interface, pg0, 2 forwarding interfaces + # + route_1_1_1_1_232_1_1_1 = VppIpMRoute( + self, + "1.1.1.1", + "232.1.1.1", 64, + MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE, + [VppMRoutePath(self.pg8.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT), + VppMRoutePath(self.pg1.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD), + VppMRoutePath(self.pg2.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)], + table_id=10) + route_1_1_1_1_232_1_1_1.add_vpp_config() + + # + # a stream that matches the route for (1.1.1.1,232.1.1.1) + # small packets + # + self.vapi.cli("clear trace") + tx = self.create_stream_ip4(self.pg8, "1.1.1.1", "232.1.1.1") + self.pg8.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # We expect replications on Pg1 & 2 + self.verify_capture_ip4(self.pg1, tx) + self.verify_capture_ip4(self.pg2, tx) + + def test_ip6_mcast_vrf(self): + """ IPv6 Multicast Replication in non-default table""" + + # + # An (S,G). + # one accepting interface, pg0, 2 forwarding interfaces + # + route_2001_ff01_1 = VppIpMRoute( + self, + "2001::1", + "ff01::1", 256, + MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE, + [VppMRoutePath(self.pg8.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT), + VppMRoutePath(self.pg1.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD), + VppMRoutePath(self.pg2.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)], + table_id=10, + is_ip6=1) + route_2001_ff01_1.add_vpp_config() + + # + # a stream that matches the route for (2001::1, ff00::1) + # + self.vapi.cli("clear trace") + tx = self.create_stream_ip6(self.pg8, "2001::1", "ff01::1") + self.pg8.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # We expect replications on Pg1, 2, + self.verify_capture_ip6(self.pg1, tx) + self.verify_capture_ip6(self.pg2, tx) if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/test_mpls.py b/test/test_mpls.py index b2226a74..460a32d1 100644 --- a/test/test_mpls.py +++ b/test/test_mpls.py @@ -6,7 +6,7 @@ import socket from framework import VppTestCase, VppTestRunner from vpp_ip_route import VppIpRoute, VppRoutePath, VppMplsRoute, \ VppMplsIpBind, VppIpMRoute, VppMRoutePath, \ - MRouteItfFlags, MRouteEntryFlags, DpoProto + MRouteItfFlags, MRouteEntryFlags, DpoProto, VppIpTable, VppMplsTable from vpp_mpls_tunnel_interface import VppMPLSTunnelInterface from scapy.packet import Raw @@ -60,9 +60,23 @@ class TestMPLS(VppTestCase): # setup both interfaces # assign them different tables. table_id = 0 + self.tables = [] + + tbl = VppMplsTable(self, 0) + tbl.add_vpp_config() + self.tables.append(tbl) for i in self.pg_interfaces: i.admin_up() + + if table_id != 0: + tbl = VppIpTable(self, table_id) + tbl.add_vpp_config() + self.tables.append(tbl) + tbl = VppIpTable(self, table_id, is_ip6=1) + tbl.add_vpp_config() + self.tables.append(tbl) + i.set_table_ip4(table_id) i.set_table_ip6(table_id) i.config_ip4() @@ -73,12 +87,15 @@ class TestMPLS(VppTestCase): table_id += 1 def tearDown(self): - super(TestMPLS, self).tearDown() for i in self.pg_interfaces: i.unconfig_ip4() i.unconfig_ip6() i.ip6_disable() + i.set_table_ip4(0) + i.set_table_ip6(0) + i.disable_mpls() i.admin_down() + super(TestMPLS, self).tearDown() # the default of 64 matches the IP packet TTL default def create_stream_labelled_ip4( @@ -1092,6 +1109,9 @@ class TestMPLSDisabled(VppTestCase): # create 2 pg interfaces self.create_pg_interfaces(range(2)) + self.tbl = VppMplsTable(self, 0) + self.tbl.add_vpp_config() + # PG0 is MPLS enalbed self.pg0.admin_up() self.pg0.config_ip4() @@ -1102,11 +1122,13 @@ class TestMPLSDisabled(VppTestCase): self.pg1.admin_up() def tearDown(self): - super(TestMPLSDisabled, self).tearDown() for i in self.pg_interfaces: i.unconfig_ip4() i.admin_down() + self.pg0.disable_mpls() + super(TestMPLSDisabled, self).tearDown() + def send_and_assert_no_replies(self, intf, pkts, remark): intf.add_stream(pkts) self.pg_enable_capture(self.pg_interfaces) @@ -1174,6 +1196,13 @@ class TestMPLSPIC(VppTestCase): # create 2 pg interfaces self.create_pg_interfaces(range(4)) + mpls_tbl = VppMplsTable(self, 0) + mpls_tbl.add_vpp_config() + tbl4 = VppIpTable(self, 1) + tbl4.add_vpp_config() + tbl6 = VppIpTable(self, 1, is_ip6=1) + tbl6.add_vpp_config() + # core links self.pg0.admin_up() self.pg0.config_ip4() @@ -1201,14 +1230,15 @@ class TestMPLSPIC(VppTestCase): self.pg3.resolve_ndp() def tearDown(self): - super(TestMPLSPIC, self).tearDown() self.pg0.disable_mpls() + self.pg1.disable_mpls() for i in self.pg_interfaces: i.unconfig_ip4() i.unconfig_ip6() i.set_table_ip4(0) i.set_table_ip6(0) i.admin_down() + super(TestMPLSPIC, self).tearDown() def test_mpls_ibgp_pic(self): """ MPLS iBGP PIC edge convergence @@ -1534,24 +1564,30 @@ class TestMPLSL2(VppTestCase): # create 2 pg interfaces self.create_pg_interfaces(range(2)) + # create the default MPLS table + self.tables = [] + tbl = VppMplsTable(self, 0) + tbl.add_vpp_config() + self.tables.append(tbl) + # use pg0 as the core facing interface self.pg0.admin_up() self.pg0.config_ip4() self.pg0.resolve_arp() self.pg0.enable_mpls() - # use the other 2 for customer facg L2 links + # use the other 2 for customer facing L2 links for i in self.pg_interfaces[1:]: i.admin_up() def tearDown(self): - super(TestMPLSL2, self).tearDown() for i in self.pg_interfaces[1:]: i.admin_down() self.pg0.disable_mpls() self.pg0.unconfig_ip4() self.pg0.admin_down() + super(TestMPLSL2, self).tearDown() def verify_capture_tunneled_ethernet(self, capture, sent, mpls_labels, ttl=255, top=None): diff --git a/test/test_nat.py b/test/test_nat.py index 1f2d17ab..73e9e217 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -549,6 +549,8 @@ class TestNAT44(MethodHolder): cls.pg0.configure_ipv4_neighbors() cls.overlapping_interfaces = list(list(cls.pg_interfaces[4:7])) + cls.vapi.ip_table_add_del(10, is_add=1) + cls.vapi.ip_table_add_del(20, is_add=1) cls.pg4._local_ip4 = "172.16.255.1" cls.pg4._local_ip4n = socket.inet_pton(socket.AF_INET, i.local_ip4) @@ -1797,6 +1799,8 @@ class TestNAT44(MethodHolder): self.pg0.unconfig_ip4() self.pg1.unconfig_ip4() + self.vapi.ip_table_add_del(vrf_id1, is_add=1) + self.vapi.ip_table_add_del(vrf_id2, is_add=1) self.pg0.set_table_ip4(vrf_id1) self.pg1.set_table_ip4(vrf_id2) self.pg0.config_ip4() @@ -1825,6 +1829,13 @@ class TestNAT44(MethodHolder): capture = self.pg2.get_capture(len(pkts)) self.verify_capture_out(capture, nat_ip2) + self.pg0.unconfig_ip4() + self.pg1.unconfig_ip4() + self.pg0.set_table_ip4(0) + self.pg1.set_table_ip4(0) + self.vapi.ip_table_add_del(vrf_id1, is_add=0) + self.vapi.ip_table_add_del(vrf_id2, is_add=0) + def test_vrf_feature_independent(self): """ NAT44 tenant VRF independent address pool mode """ @@ -3042,6 +3053,8 @@ class TestNAT64(MethodHolder): cls.ip6_interfaces.append(cls.pg_interfaces[2]) cls.ip4_interfaces = list(cls.pg_interfaces[1:2]) + cls.vapi.ip_table_add_del(cls.vrf1_id, is_add=1, is_ipv6=1) + cls.pg_interfaces[2].set_table_ip6(cls.vrf1_id) cls.pg0.generate_remote_hosts(2) diff --git a/test/test_neighbor.py b/test/test_neighbor.py index 1c7cc267..68dde2fb 100644 --- a/test/test_neighbor.py +++ b/test/test_neighbor.py @@ -5,7 +5,8 @@ from socket import AF_INET, AF_INET6, inet_pton from framework import VppTestCase, VppTestRunner from vpp_neighbor import VppNeighbor, find_nbr -from vpp_ip_route import VppIpRoute, VppRoutePath, find_route +from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, \ + VppIpTable from scapy.packet import Raw from scapy.layers.l2 import Ether, ARP, Dot1Q @@ -39,11 +40,13 @@ class ARPTestCase(VppTestCase): self.pg1.config_ip6() # pg3 in a different VRF + self.tbl = VppIpTable(self, 1) + self.tbl.add_vpp_config() + self.pg3.set_table_ip4(1) self.pg3.config_ip4() def tearDown(self): - super(ARPTestCase, self).tearDown() self.pg0.unconfig_ip4() self.pg0.unconfig_ip6() @@ -51,10 +54,13 @@ class ARPTestCase(VppTestCase): self.pg1.unconfig_ip6() self.pg3.unconfig_ip4() + self.pg3.set_table_ip4(0) for i in self.pg_interfaces: i.admin_down() + super(ARPTestCase, self).tearDown() + def verify_arp_req(self, rx, smac, sip, dip): ether = rx[Ether] self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff") @@ -1080,6 +1086,62 @@ class ARPTestCase(VppTestCase): self.pg0.remote_ip4, self.pg1.remote_hosts[1].ip4) + def test_arp_static(self): + """ ARP Static""" + self.pg2.generate_remote_hosts(3) + + # + # Add a static ARP entry + # + static_arp = VppNeighbor(self, + self.pg2.sw_if_index, + self.pg2.remote_hosts[1].mac, + self.pg2.remote_hosts[1].ip4, + is_static=1) + static_arp.add_vpp_config() + + # + # Add the connected prefix to the interface + # + self.pg2.config_ip4() + + # + # We should now find the adj-fib + # + self.assertTrue(find_nbr(self, + self.pg2.sw_if_index, + self.pg2.remote_hosts[1].ip4, + is_static=1)) + self.assertTrue(find_route(self, + self.pg2.remote_hosts[1].ip4, + 32)) + + # + # remove the connected + # + self.pg2.unconfig_ip4() + + # + # put the interface into table 1 + # + self.pg2.set_table_ip4(1) + + # + # configure the same connected and expect to find the + # adj fib in the new table + # + self.pg2.config_ip4() + self.assertTrue(find_route(self, + self.pg2.remote_hosts[1].ip4, + 32, + table_id=1)) + + # + # clean-up + # + self.pg2.unconfig_ip4() + self.pg2.set_table_ip4(0) + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py index 2c489e3c..b7993793 100644 --- a/test/vpp_ip_route.py +++ b/test/vpp_ip_route.py @@ -54,6 +54,46 @@ def find_route(test, ip_addr, len, table_id=0, inet=AF_INET): return False +class VppIpTable(VppObject): + + def __init__(self, + test, + table_id, + is_ip6=0): + self._test = test + self.table_id = table_id + self.is_ip6 = is_ip6 + + def add_vpp_config(self): + self._test.vapi.ip_table_add_del( + self.table_id, + is_ipv6=self.is_ip6, + is_add=1) + self._test.registry.register(self, self._test.logger) + + def remove_vpp_config(self): + self._test.vapi.ip_table_add_del( + self.table_id, + is_ipv6=self.is_ip6, + is_add=0) + + def query_vpp_config(self): + # find the default route + return find_route(self._test, + "::" if self.is_ip6 else "0.0.0.0", + 0, + self.table_id, + inet=AF_INET6 if self.is_ip6 == 1 else AF_INET) + + def __str__(self): + return self.object_id() + + def object_id(self): + return ("table-%s-%d" % + ("v6" if self.is_ip6 == 1 else "v4", + self.table_id)) + + class VppRoutePath(object): def __init__( @@ -391,6 +431,39 @@ class VppMplsIpBind(VppObject): self.dest_addr_len)) +class VppMplsTable(VppObject): + + def __init__(self, + test, + table_id): + self._test = test + self.table_id = table_id + + def add_vpp_config(self): + self._test.vapi.mpls_table_add_del( + self.table_id, + is_add=1) + self._test.registry.register(self, self._test.logger) + + def remove_vpp_config(self): + self._test.vapi.mpls_table_add_del( + self.table_id, + is_add=0) + + def query_vpp_config(self): + # find the default route + dump = self._test.vapi.mpls_fib_dump() + if len(dump): + return True + return False + + def __str__(self): + return self.object_id() + + def object_id(self): + return ("table-mpls-%d" % (self.table_id)) + + class VppMplsRoute(VppObject): """ MPLS Route/LSP diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index b70da026..519aff80 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -652,6 +652,24 @@ class VppPapiProvider(object): return self.api(self.papi.delete_loopback, {'sw_if_index': sw_if_index, }) + def ip_table_add_del(self, + table_id, + is_add=1, + is_ipv6=0): + """ + + :param table_id + :param is_add: (Default value = 1) + :param is_ipv6: (Default value = 0) + + """ + + return self.api( + self.papi.ip_table_add_del, + {'table_id': table_id, + 'is_add': is_add, + 'is_ipv6': is_ipv6}) + def ip_add_del_route( self, dst_address, @@ -664,7 +682,6 @@ class VppPapiProvider(object): next_hop_n_out_labels=0, next_hop_out_label_stack=[], next_hop_via_label=MPLS_LABEL_INVALID, - create_vrf_if_needed=0, is_resolve_host=0, is_resolve_attached=0, classify_table_index=0xFFFFFFFF, @@ -687,7 +704,6 @@ class VppPapiProvider(object): :param vrf_id: (Default value = 0) :param lookup_in_vrf: (Default value = 0) :param classify_table_index: (Default value = 0xFFFFFFFF) - :param create_vrf_if_needed: (Default value = 0) :param is_add: (Default value = 1) :param is_drop: (Default value = 0) :param is_ipv6: (Default value = 0) @@ -707,7 +723,6 @@ class VppPapiProvider(object): 'table_id': table_id, 'classify_table_index': classify_table_index, 'next_hop_table_id': next_hop_table_id, - 'create_vrf_if_needed': create_vrf_if_needed, 'is_add': is_add, 'is_drop': is_drop, 'is_unreach': is_unreach, @@ -912,6 +927,22 @@ class VppPapiProvider(object): def mpls_fib_dump(self): return self.api(self.papi.mpls_fib_dump, {}) + def mpls_table_add_del( + self, + table_id, + is_add=1): + """ + + :param table_id + :param is_add: (Default value = 1) + + """ + + return self.api( + self.papi.mpls_table_add_del, + {'mt_table_id': table_id, + 'mt_is_add': is_add}) + def mpls_route_add_del( self, label, @@ -925,7 +956,6 @@ class VppPapiProvider(object): next_hop_n_out_labels=0, next_hop_out_label_stack=[], next_hop_via_label=MPLS_LABEL_INVALID, - create_vrf_if_needed=0, is_resolve_host=0, is_resolve_attached=0, is_interface_rx=0, @@ -947,7 +977,6 @@ class VppPapiProvider(object): :param vrf_id: (Default value = 0) :param lookup_in_vrf: (Default value = 0) :param classify_table_index: (Default value = 0xFFFFFFFF) - :param create_vrf_if_needed: (Default value = 0) :param is_add: (Default value = 1) :param is_drop: (Default value = 0) :param is_ipv6: (Default value = 0) @@ -968,7 +997,6 @@ class VppPapiProvider(object): 'mr_eos': eos, 'mr_table_id': table_id, 'mr_classify_table_index': classify_table_index, - 'mr_create_table_if_needed': create_vrf_if_needed, 'mr_is_add': is_add, 'mr_is_classify': is_classify, 'mr_is_multipath': is_multipath, @@ -994,7 +1022,6 @@ class VppPapiProvider(object): table_id=0, ip_table_id=0, is_ip4=1, - create_vrf_if_needed=0, is_bind=1): """ """ @@ -1003,7 +1030,6 @@ class VppPapiProvider(object): {'mb_mpls_table_id': table_id, 'mb_label': label, 'mb_ip_table_id': ip_table_id, - 'mb_create_table_if_needed': create_vrf_if_needed, 'mb_is_bind': is_bind, 'mb_is_ip4': is_ip4, 'mb_address_length': dst_address_length, @@ -1020,7 +1046,6 @@ class VppPapiProvider(object): next_hop_n_out_labels=0, next_hop_out_label_stack=[], next_hop_via_label=MPLS_LABEL_INVALID, - create_vrf_if_needed=0, is_add=1, l2_only=0, is_multicast=0): @@ -1034,7 +1059,6 @@ class VppPapiProvider(object): :param vrf_id: (Default value = 0) :param lookup_in_vrf: (Default value = 0) :param classify_table_index: (Default value = 0xFFFFFFFF) - :param create_vrf_if_needed: (Default value = 0) :param is_add: (Default value = 1) :param is_drop: (Default value = 0) :param is_ipv6: (Default value = 0) @@ -1844,7 +1868,6 @@ class VppPapiProvider(object): i_flags, rpf_id=0, table_id=0, - create_vrf_if_needed=0, is_add=1, is_ipv6=0, is_local=0): @@ -1857,7 +1880,6 @@ class VppPapiProvider(object): 'itf_flags': i_flags, 'table_id': table_id, 'rpf_id': rpf_id, - 'create_vrf_if_needed': create_vrf_if_needed, 'is_add': is_add, 'is_ipv6': is_ipv6, 'is_local': is_local, -- cgit 1.2.3-korg From 50570ecef6d37b0c9d8c002f6dadb4ed0e138aa3 Mon Sep 17 00:00:00 2001 From: Jerome Tollet Date: Thu, 14 Sep 2017 12:53:56 +0100 Subject: Update of free text tag patch for BD Change-Id: Ia886ff2bfa2cf33ffbaa35ec89494d4300ec2769 Signed-off-by: Jerome Tollet --- src/vat/api_format.c | 23 ++++++++++++++++++++--- src/vnet/l2/l2.api | 1 + src/vnet/l2/l2_bd.c | 9 ++++++--- src/vnet/l2/l2_bd.h | 1 - src/vpp/api/custom_dump.c | 2 ++ 5 files changed, 29 insertions(+), 7 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 43d1eb3d..ff3354c9 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -6356,6 +6356,7 @@ api_bridge_domain_add_del (vat_main_t * vam) u32 bd_id = ~0; u8 is_add = 1; u32 flood = 1, forward = 1, learn = 1, uu_flood = 1, arp_term = 0; + u8 *bd_tag = NULL; u32 mac_age = 0; int ret; @@ -6376,6 +6377,8 @@ api_bridge_domain_add_del (vat_main_t * vam) ; else if (unformat (i, "mac-age %d", &mac_age)) ; + else if (unformat (i, "bd-tag %s", &bd_tag)) + ; else if (unformat (i, "del")) { is_add = 0; @@ -6388,13 +6391,22 @@ api_bridge_domain_add_del (vat_main_t * vam) if (bd_id == ~0) { errmsg ("missing bridge domain"); - return -99; + ret = -99; + goto done; } if (mac_age > 255) { errmsg ("mac age must be less than 256 "); - return -99; + ret = -99; + goto done; + } + + if ((bd_tag) && (strlen ((char *) bd_tag) > 63)) + { + errmsg ("bd-tag cannot be longer than 63"); + ret = -99; + goto done; } M (BRIDGE_DOMAIN_ADD_DEL, mp); @@ -6407,9 +6419,14 @@ api_bridge_domain_add_del (vat_main_t * vam) mp->arp_term = arp_term; mp->is_add = is_add; mp->mac_age = (u8) mac_age; + if (bd_tag) + strcpy ((char *) mp->bd_tag, (char *) bd_tag); S (mp); W (ret); + +done: + vec_free (bd_tag); return ret; } @@ -20152,7 +20169,7 @@ _(sw_interface_set_l2_bridge, \ "enable | disable") \ _(bridge_domain_set_mac_age, "bd_id mac-age 0-255") \ _(bridge_domain_add_del, \ - "bd_id [flood 1|0] [uu-flood 1|0] [forward 1|0] [learn 1|0] [arp-term 1|0] [mac-age 0-255] [del]\n") \ + "bd_id [flood 1|0] [uu-flood 1|0] [forward 1|0] [learn 1|0] [arp-term 1|0] [mac-age 0-255] [bd-tag ] [del]\n") \ _(bridge_domain_dump, "[bd_id ]\n") \ _(l2fib_add_del, \ "mac bd_id [del] | sw_if | sw_if_index [static] [filter] [bvi] [count ]\n") \ diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index baf830fa..ac923de4 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -285,6 +285,7 @@ typeonly manual_print manual_endian define bridge_domain_sw_if @param learn - learning state on all interfaces in the bd @param arp_term - arp termination state on all interfaces in the bd @param mac_age - mac aging time in min, 0 for disabled + @param bd_tag - optional textual tag for the bridge domain @param n_sw_ifs - number of sw_if_index's in the domain */ manual_print manual_endian define bridge_domain_details diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index 3670a4f0..b1abb4c0 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -291,8 +291,11 @@ bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age) L2_MAC_AGE_PROCESS_EVENT_STOP, 0); } +/** + Set the tag for the bridge domain. +*/ -void +static void bd_set_bd_tag (vlib_main_t * vm, u32 bd_index, u8 * bd_tag) { u8 *old; @@ -1191,8 +1194,8 @@ bd_add_del (l2_bridge_domain_add_del_args_t * a) /** Create or delete bridge-domain. The CLI format: - create bridge-domain [learn <0|1>] [forward <0|1>] [uu-flood <0|1>] - [flood <0|1>] [arp-term <0|1>] [mac-age ] [del] + create bridge-domain [learn <0|1>] [forward <0|1>] [uu-flood <0|1>] [flood <0|1>] + [arp-term <0|1>] [mac-age ] [bd-tag ] [del] */ static clib_error_t * diff --git a/src/vnet/l2/l2_bd.h b/src/vnet/l2/l2_bd.h index e60f1ab3..fd34ae67 100644 --- a/src/vnet/l2/l2_bd.h +++ b/src/vnet/l2/l2_bd.h @@ -134,7 +134,6 @@ u32 bd_remove_member (l2_bridge_domain_t * bd_config, u32 sw_if_index); u32 bd_set_flags (vlib_main_t * vm, u32 bd_index, u32 flags, u32 enable); void bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age); -void bd_set_bd_tag (vlib_main_t * vm, u32 bd_index, u8 * bd_tag); int bd_add_del (l2_bridge_domain_add_del_args_t * args); /** diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index be74b83a..2e1f980e 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -279,6 +279,8 @@ static void *vl_api_bridge_domain_add_del_t_print if (mp->is_add) { + if (mp->bd_tag[0]) + s = format (s, "bd_tag %s ", mp->bd_tag); s = format (s, "flood %d uu-flood %d ", mp->flood, mp->uu_flood); s = format (s, "forward %d learn %d ", mp->forward, mp->learn); s = format (s, "arp-term %d mac-age %d", mp->arp_term, mp->mac_age); -- cgit 1.2.3-korg From 2019748a0ef815852281aae0a603f0e970fa9d91 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Wed, 13 Sep 2017 12:29:08 +0300 Subject: L2BD,ARP-TERM:fix arp query report mechanism+test previous mechanism was emitting duplicates of last event, when handling multiple arp queries. tests: * arp events sent for graps * duplicate suppression * verify no events when disabled Change-Id: I84adc23980d43b819261eccf02ec056b5cec61df Signed-off-by: Eyal Bari --- src/vlibapi/api_helper_macros.h | 3 +- src/vnet/ethernet/arp.c | 77 ++++++++++++------ src/vnet/ethernet/ethernet.h | 9 +++ src/vpp/api/api.c | 174 +++++++++++++++++++++++++++++++--------- test/test_l2bd_arp_term.py | 82 ++++++++++++++++++- test/vpp_papi_provider.py | 12 ++- 6 files changed, 288 insertions(+), 69 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vlibapi/api_helper_macros.h b/src/vlibapi/api_helper_macros.h index b8a9978a..13734896 100644 --- a/src/vlibapi/api_helper_macros.h +++ b/src/vlibapi/api_helper_macros.h @@ -215,7 +215,8 @@ _(from_netconf_server) \ _(to_netconf_client) \ _(from_netconf_client) \ _(oam_events) \ -_(bfd_events) +_(bfd_events) \ +_(wc_ip4_arp_events) typedef struct { diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index 957706c1..2f3aa6c7 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -92,6 +92,8 @@ typedef struct /* Proxy arp vector */ ethernet_proxy_arp_t *proxy_arps; + + uword wc_ip4_arp_publisher_node; } ethernet_arp_main_t; static ethernet_arp_main_t ethernet_arp_main; @@ -106,6 +108,7 @@ typedef struct #define ETHERNET_ARP_ARGS_REMOVE (1<<0) #define ETHERNET_ARP_ARGS_FLUSH (1<<1) #define ETHERNET_ARP_ARGS_POPULATE (1<<2) +#define ETHERNET_ARP_ARGS_WC_PUB (1<<3) } vnet_arp_set_ip4_over_ethernet_rpc_args_t; static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 }; @@ -540,7 +543,7 @@ arp_adj_fib_add (ethernet_arp_ip4_entry_t * e, u32 fib_index) fib_table_lock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ); } -int +static int vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, vnet_arp_set_ip4_over_ethernet_rpc_args_t * args) @@ -1507,6 +1510,49 @@ vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm, return 0; } +/** + * @brief publish wildcard arp event + * @param sw_if_index The interface on which the ARP entires are acted + */ +static int +vnet_arp_wc_publish (u32 sw_if_index, void *a_arg) +{ + ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args = { + .flags = ETHERNET_ARP_ARGS_WC_PUB, + .sw_if_index = sw_if_index, + .a = *a + }; + + vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, + (u8 *) & args, sizeof (args)); + return 0; +} + +static void +vnet_arp_wc_publish_internal (vnet_main_t * vnm, + vnet_arp_set_ip4_over_ethernet_rpc_args_t * + args) +{ + vlib_main_t *vm = vlib_get_main (); + ethernet_arp_main_t *am = ðernet_arp_main; + if (am->wc_ip4_arp_publisher_node == (uword) ~ 0) + return; + wc_arp_report_t *r = + vlib_process_signal_event_data (vm, am->wc_ip4_arp_publisher_node, 1, 1, + sizeof *r); + r->ip4 = args->a.ip4.as_u32; + r->sw_if_index = args->sw_if_index; + memcpy (r->mac, args->a.ethernet, sizeof r->mac); +} + +void +wc_arp_set_publisher_node (uword node_index) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + am->wc_ip4_arp_publisher_node = node_index; +} + /* * arp_add_del_interface_address * @@ -1652,6 +1698,7 @@ ethernet_arp_init (vlib_main_t * vm) am->pending_resolutions_by_address = hash_create (0, sizeof (uword)); am->mac_changes_by_address = hash_create (0, sizeof (uword)); + am->wc_ip4_arp_publisher_node = (uword) ~ 0; /* don't trace ARP error packets */ { @@ -1795,6 +1842,8 @@ set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t vnet_arp_flush_ip4_over_ethernet_internal (vm, a); else if (a->flags & ETHERNET_ARP_ARGS_POPULATE) vnet_arp_populate_ip4_over_ethernet_internal (vm, a); + else if (a->flags & ETHERNET_ARP_ARGS_WC_PUB) + vnet_arp_wc_publish_internal (vm, a); else vnet_arp_set_ip4_over_ethernet_internal (vm, a); } @@ -2277,31 +2326,9 @@ arp_term_l2bd (vlib_main_t * vm, /* Check if anyone want ARP request events for L2 BDs */ { - pending_resolution_t *mc; ethernet_arp_main_t *am = ðernet_arp_main; - uword *p = hash_get (am->mac_changes_by_address, 0); - if (p) - { - u32 next_index = p[0]; - while (next_index != (u32) ~ 0) - { - int (*fp) (u32, u8 *, u32, u32); - int rv = 1; - mc = pool_elt_at_index (am->mac_changes, next_index); - fp = mc->data_callback; - /* Call the callback, return 1 to suppress dup events */ - if (fp) - rv = (*fp) (mc->data, - arp0->ip4_over_ethernet[0].ethernet, - sw_if_index0, - arp0->ip4_over_ethernet[0].ip4.as_u32); - /* Signal the resolver process */ - if (rv == 0) - vlib_process_signal_event (vm, mc->node_index, - mc->type_opaque, mc->data); - next_index = mc->next_index; - } - } + if (am->wc_ip4_arp_publisher_node != (uword) ~ 0) + vnet_arp_wc_publish (sw_if_index0, &arp0->ip4_over_ethernet[0]); } /* lookup BD mac_by_ip4 hash table for MAC entry */ diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h index 9ca256c9..1a9e9790 100644 --- a/src/vnet/ethernet/ethernet.h +++ b/src/vnet/ethernet/ethernet.h @@ -543,6 +543,8 @@ int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, uword type_opaque, uword data, int is_add); +void wc_arp_set_publisher_node (uword inode_index); + void ethernet_arp_change_mac (u32 sw_if_index); void ethernet_ndp_change_mac (u32 sw_if_index); @@ -557,6 +559,13 @@ const u8 *ethernet_ip6_mcast_dst_addr (void); extern vlib_node_registration_t ethernet_input_node; +typedef struct +{ + u32 sw_if_index; + u32 ip4; + u8 mac[6]; +} wc_arp_report_t; + #endif /* included_ethernet_h */ /* diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 044ddb5b..e229a5e3 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -299,17 +299,17 @@ static uword resolver_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { - uword event_type; - uword *event_data = 0; - f64 timeout = 100.0; - int i; + volatile f64 timeout = 100.0; + volatile uword *event_data = 0; while (1) { vlib_process_wait_for_event_or_clock (vm, timeout); - event_type = vlib_process_get_events (vm, &event_data); + uword event_type = + vlib_process_get_events (vm, (uword **) & event_data); + int i; switch (event_type) { case RESOLUTION_PENDING_EVENT: @@ -1352,26 +1352,15 @@ arp_change_data_callback (u32 pool_index, u8 * new_mac, return 1; event = pool_elt_at_index (am->arp_events, pool_index); - /* *INDENT-OFF* */ - if (memcmp (&event->new_mac, new_mac, sizeof (event->new_mac))) + if (eth_mac_equal (event->new_mac, new_mac) && + sw_if_index == ntohl (event->sw_if_index)) { - clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac)); + return 1; } - else - { /* same mac */ - if (sw_if_index == ntohl(event->sw_if_index) && - (!event->mac_ip || - /* for BD case, also check IP address with 10 sec timeout */ - (address == event->address && - (now - arp_event_last_time) < 10.0))) - return 1; - } - /* *INDENT-ON* */ - arp_event_last_time = now; + clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac)); event->sw_if_index = htonl (sw_if_index); - if (event->mac_ip) - event->address = address; + arp_event_last_time = now; return 0; } @@ -1391,7 +1380,7 @@ nd_change_data_callback (u32 pool_index, u8 * new_mac, event = pool_elt_at_index (am->nd_events, pool_index); /* *INDENT-OFF* */ - if (memcmp (&event->new_mac, new_mac, sizeof (event->new_mac))) + if (memcmp (event->new_mac, new_mac, sizeof (event->new_mac))) { clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac)); } @@ -1438,13 +1427,123 @@ nd_change_delete_callback (u32 pool_index, u8 * notused) return 0; } +static vlib_node_registration_t wc_ip4_arp_process_node; + +static uword +wc_ip4_arp_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + /* These cross the longjmp boundry (vlib_process_wait_for_event) + * and need to be volatile - to prevent them from being optimized into + * a register - which could change during suspension */ + + volatile wc_arp_report_t prev = { 0 }; + volatile f64 last = vlib_time_now (vm); + + while (1) + { + vlib_process_wait_for_event (vm); + uword event_type; + wc_arp_report_t *event_data = + vlib_process_get_event_data (vm, &event_type); + + f64 now = vlib_time_now (vm); + int i; + for (i = 0; i < vec_len (event_data); i++) + { + /* discard dup event */ + if (prev.ip4 == event_data[i].ip4 && + eth_mac_equal ((u8 *) prev.mac, event_data[i].mac) && + prev.sw_if_index == event_data[i].sw_if_index && + (now - last) < 10.0) + { + continue; + } + prev = event_data[i]; + last = now; + vpe_client_registration_t *reg; + /* *INDENT-OFF* */ + pool_foreach(reg, vpe_api_main.wc_ip4_arp_events_registrations, + ({ + unix_shared_memory_queue_t *q; + q = vl_api_client_index_to_input_queue (reg->client_index); + if (q && q->cursize < q->maxsize) + { + vl_api_ip4_arp_event_t * event = vl_msg_api_alloc (sizeof *event); + memset (event, 0, sizeof *event); + event->_vl_msg_id = htons (VL_API_IP4_ARP_EVENT); + event->client_index = reg->client_index; + event->pid = reg->client_pid; + event->mac_ip = 1; + event->address = event_data[i].ip4; + event->sw_if_index = htonl(event_data[i].sw_if_index); + memcpy(event->new_mac, event_data[i].mac, sizeof event->new_mac); + vl_msg_api_send_shmem (q, (u8 *) &event); + } + })); + /* *INDENT-ON* */ + } + vlib_process_put_event_data (vm, event_data); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (wc_ip4_arp_process_node,static) = { + .function = wc_ip4_arp_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "wildcard-ip4-arp-publisher-process", +}; +/* *INDENT-ON* */ + static void vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) { vpe_api_main_t *am = &vpe_api_main; vnet_main_t *vnm = vnet_get_main (); vl_api_want_ip4_arp_events_reply_t *rmp; - int rv; + int rv = 0; + + if (mp->address == 0) + { + uword *p = + hash_get (am->wc_ip4_arp_events_registration_hash, mp->client_index); + vpe_client_registration_t *rp; + if (p) + { + if (mp->enable_disable) + { + clib_warning ("pid %d: already enabled...", mp->pid); + rv = VNET_API_ERROR_INVALID_REGISTRATION; + goto reply; + } + else + { + rp = + pool_elt_at_index (am->wc_ip4_arp_events_registrations, p[0]); + pool_put (am->wc_ip4_arp_events_registrations, rp); + hash_unset (am->wc_ip4_arp_events_registration_hash, + mp->client_index); + if (pool_elts (am->wc_ip4_arp_events_registrations) == 0) + wc_arp_set_publisher_node (~0); + goto reply; + } + } + if (mp->enable_disable == 0) + { + clib_warning ("pid %d: already disabled...", mp->pid); + rv = VNET_API_ERROR_INVALID_REGISTRATION; + goto reply; + } + pool_get (am->wc_ip4_arp_events_registrations, rp); + rp->client_index = mp->client_index; + rp->client_pid = mp->pid; + hash_set (am->wc_ip4_arp_events_registration_hash, rp->client_index, + rp - am->wc_ip4_arp_events_registrations); + wc_arp_set_publisher_node (wc_ip4_arp_process_node.index); + goto reply; + } if (mp->enable_disable) { @@ -1459,12 +1558,12 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) if (rv) { pool_put (am->arp_events, event); - goto out; + goto reply; } memset (event, 0, sizeof (*event)); /* Python API expects events to have no context */ - event->_vl_msg_id = ntohs (VL_API_IP4_ARP_EVENT); + event->_vl_msg_id = htons (VL_API_IP4_ARP_EVENT); event->client_index = mp->client_index; event->address = mp->address; event->pid = mp->pid; @@ -1479,7 +1578,7 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) vpe_resolver_process_node.index, IP4_ARP_EVENT, ~0 /* pool index */ , 0 /* is_add */ ); } -out: +reply: REPLY_MACRO (VL_API_WANT_IP4_ARP_EVENTS_REPLY); } @@ -2072,13 +2171,10 @@ vpe_api_init (vlib_main_t * vm) am->vlib_main = vm; am->vnet_main = vnet_get_main (); - am->interface_events_registration_hash = hash_create (0, sizeof (uword)); - am->to_netconf_server_registration_hash = hash_create (0, sizeof (uword)); - am->from_netconf_server_registration_hash = hash_create (0, sizeof (uword)); - am->to_netconf_client_registration_hash = hash_create (0, sizeof (uword)); - am->from_netconf_client_registration_hash = hash_create (0, sizeof (uword)); - am->oam_events_registration_hash = hash_create (0, sizeof (uword)); - am->bfd_events_registration_hash = hash_create (0, sizeof (uword)); +#define _(a) \ + am->a##_registration_hash = hash_create (0, sizeof (uword)); + foreach_registration_hash; +#undef _ vl_set_memory_region_name ("/vpe-api"); vl_enable_disable_memory_api (vm, 1 /* enable it */ ); @@ -2219,10 +2315,7 @@ format_arp_event (u8 * s, va_list * args) vl_api_ip4_arp_event_t *event = va_arg (*args, vl_api_ip4_arp_event_t *); s = format (s, "pid %d: ", ntohl (event->pid)); - if (event->mac_ip) - s = format (s, "bd mac/ip4 binding events"); - else - s = format (s, "resolution for %U", format_ip4_address, &event->address); + s = format (s, "resolution for %U", format_ip4_address, &event->address); return s; } @@ -2259,6 +2352,13 @@ show_ip_arp_nd_events_fn (vlib_main_t * vm, vlib_cli_output (vm, "%U", format_arp_event, arp_event); })); + vpe_client_registration_t *reg; + pool_foreach(reg, am->wc_ip4_arp_events_registrations, + ({ + vlib_cli_output (vm, "pid %d: bd mac/ip4 binding events", + ntohl (reg->client_pid)); + })); + pool_foreach (nd_event, am->nd_events, ({ vlib_cli_output (vm, "%U", format_nd_event, nd_event); diff --git a/test/test_l2bd_arp_term.py b/test/test_l2bd_arp_term.py index 80a7ff84..20885f88 100644 --- a/test/test_l2bd_arp_term.py +++ b/test/test_l2bd_arp_term.py @@ -5,7 +5,7 @@ import unittest import random import copy -from socket import AF_INET6 +from socket import AF_INET, AF_INET6 from scapy.packet import Raw from scapy.layers.l2 import Ether, ARP @@ -117,7 +117,7 @@ class TestL2bdArpTerm(VppTestCase): self.vapi.bridge_domain_add_del(bd_id=bd_id, is_add=is_add) @classmethod - def arp(cls, src_host, host): + def arp_req(cls, src_host, host): return (Ether(dst="ff:ff:ff:ff:ff:ff", src=src_host.mac) / ARP(op="who-has", hwsrc=src_host.bin_mac, @@ -126,7 +126,15 @@ class TestL2bdArpTerm(VppTestCase): @classmethod def arp_reqs(cls, src_host, entries): - return [cls.arp(src_host, e) for e in entries] + return [cls.arp_req(src_host, e) for e in entries] + + @classmethod + def garp_req(cls, host): + return cls.arp_req(host, host) + + @classmethod + def garp_reqs(cls, entries): + return [cls.garp_req(e) for e in entries] def arp_resp_host(self, src_host, arp_resp): ether = arp_resp[Ether] @@ -146,6 +154,20 @@ class TestL2bdArpTerm(VppTestCase): def arp_resp_hosts(self, src_host, pkts): return {self.arp_resp_host(src_host, p) for p in pkts} + def inttoip4(self, ip): + o1 = int(ip / 16777216) % 256 + o2 = int(ip / 65536) % 256 + o3 = int(ip / 256) % 256 + o4 = int(ip) % 256 + return '%(o1)s.%(o2)s.%(o3)s.%(o4)s' % locals() + + def arp_event_host(self, e): + return Host(mac=':'.join(['%02x' % ord(char) for char in e.new_mac]), + ip4=self.inttoip4(e.address)) + + def arp_event_hosts(self, evs): + return {self.arp_event_host(e) for e in evs} + @classmethod def ns_req(cls, src_host, host): nsma = in6_getnsma(inet_pton(AF_INET6, "fd10::ffff")) @@ -347,6 +369,60 @@ class TestL2bdArpTerm(VppTestCase): self.verify_nd(src_host, hosts, updated) self.bd_add_del(1, is_add=0) + def test_l2bd_arp_term_09(self): + """ L2BD arp term - send garps, verify arp event reports + """ + self.vapi.want_ip4_arp_events() + self.bd_add_del(1, is_add=1) + self.set_bd_flags(1, arp_term=True, flood=False, + uu_flood=False, learn=False) + macs = self.mac_list(range(90, 95)) + hosts = self.ip4_hosts(5, 1, macs) + + garps = self.garp_reqs(hosts) + self.bd_swifs(1)[0].add_stream(garps) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + evs = [self.vapi.wait_for_event(1, "ip4_arp_event") + for i in range(len(hosts))] + ev_hosts = self.arp_event_hosts(evs) + self.assertEqual(len(ev_hosts ^ hosts), 0) + + def test_l2bd_arp_term_10(self): + """ L2BD arp term - send duplicate garps, verify suppression + """ + macs = self.mac_list(range(70, 71)) + hosts = self.ip4_hosts(6, 1, macs) + + """ send the packet 5 times expect one event + """ + garps = self.garp_reqs(hosts) * 5 + self.bd_swifs(1)[0].add_stream(garps) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + evs = [self.vapi.wait_for_event(1, "ip4_arp_event") + for i in range(len(hosts))] + ev_hosts = self.arp_event_hosts(evs) + self.assertEqual(len(ev_hosts ^ hosts), 0) + + def test_l2bd_arp_term_11(self): + """ L2BD arp term - disable ip4 arp events,send garps, verify no events + """ + self.vapi.want_ip4_arp_events(enable_disable=0) + macs = self.mac_list(range(90, 95)) + hosts = self.ip4_hosts(5, 1, macs) + + garps = self.garp_reqs(hosts) + self.bd_swifs(1)[0].add_stream(garps) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.sleep(1) + self.assertEqual(len(self.vapi.collect_events()), 0) + self.bd_add_del(1, is_add=0) + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index b63a2658..fcc67849 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -101,11 +101,11 @@ class VppPapiProvider(object): def wait_for_event(self, timeout, name=None): """ Wait for and return next event. """ if name: - self.test_class.logger.debug("Expecting event within %ss", - timeout) - else: self.test_class.logger.debug("Expecting event '%s' within %ss", name, timeout) + else: + self.test_class.logger.debug("Expecting event within %ss", + timeout) if self._events: self.test_class.logger.debug("Not waiting, event already queued") limit = time.time() + timeout @@ -419,6 +419,12 @@ class VppPapiProvider(object): 'ip_address': ip, 'mac_address': mac}) + def want_ip4_arp_events(self, enable_disable=1, address=0): + return self.api(self.papi.want_ip4_arp_events, + {'enable_disable': enable_disable, + 'address': address, + 'pid': os.getpid(), }) + def l2fib_add_del(self, mac, bd_id, sw_if_index, is_add=1, static_mac=0, filter_mac=0, bvi_mac=0): """Create/delete L2 FIB entry. -- cgit 1.2.3-korg From c125eccc10db9c0b9c5d161d3ad20b4fc8c69b26 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Wed, 20 Sep 2017 11:29:17 +0300 Subject: IP-MAC,ND:wildcard events,fix sending multiple events wildcard ND events publisher was sending the last event mutiple times Change-Id: I6c30f2de03fa825e79df9005a3cfaaf68ff7ea2f Signed-off-by: Eyal Bari --- src/vlibapi/api_helper_macros.h | 1 + src/vnet/ethernet/arp.c | 12 ++- src/vnet/ethernet/ethernet.h | 2 +- src/vnet/ip/ip6_neighbor.c | 73 +++++++++---- src/vnet/ip/ip6_neighbor.h | 8 ++ src/vpp/api/api.c | 221 ++++++++++++++++++++++++++-------------- test/test_l2bd_arp_term.py | 68 ++++++++++++- test/vpp_papi_provider.py | 6 ++ 8 files changed, 288 insertions(+), 103 deletions(-) (limited to 'src/vpp/api') diff --git a/src/vlibapi/api_helper_macros.h b/src/vlibapi/api_helper_macros.h index 13734896..052cc6e7 100644 --- a/src/vlibapi/api_helper_macros.h +++ b/src/vlibapi/api_helper_macros.h @@ -216,6 +216,7 @@ _(to_netconf_client) \ _(from_netconf_client) \ _(oam_events) \ _(bfd_events) \ +_(wc_ip6_nd_events) \ _(wc_ip4_arp_events) typedef struct diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index 2f3aa6c7..e974d255 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -94,6 +94,7 @@ typedef struct ethernet_proxy_arp_t *proxy_arps; uword wc_ip4_arp_publisher_node; + uword wc_ip4_arp_publisher_et; } ethernet_arp_main_t; static ethernet_arp_main_t ethernet_arp_main; @@ -1536,21 +1537,24 @@ vnet_arp_wc_publish_internal (vnet_main_t * vnm, { vlib_main_t *vm = vlib_get_main (); ethernet_arp_main_t *am = ðernet_arp_main; - if (am->wc_ip4_arp_publisher_node == (uword) ~ 0) + uword ni = am->wc_ip4_arp_publisher_node; + uword et = am->wc_ip4_arp_publisher_et; + + if (ni == (uword) ~ 0) return; wc_arp_report_t *r = - vlib_process_signal_event_data (vm, am->wc_ip4_arp_publisher_node, 1, 1, - sizeof *r); + vlib_process_signal_event_data (vm, ni, et, 1, sizeof *r); r->ip4 = args->a.ip4.as_u32; r->sw_if_index = args->sw_if_index; memcpy (r->mac, args->a.ethernet, sizeof r->mac); } void -wc_arp_set_publisher_node (uword node_index) +wc_arp_set_publisher_node (uword node_index, uword event_type) { ethernet_arp_main_t *am = ðernet_arp_main; am->wc_ip4_arp_publisher_node = node_index; + am->wc_ip4_arp_publisher_et = event_type; } /* diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h index 1a9e9790..a6846b13 100644 --- a/src/vnet/ethernet/ethernet.h +++ b/src/vnet/ethernet/ethernet.h @@ -543,7 +543,7 @@ int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, uword type_opaque, uword data, int is_add); -void wc_arp_set_publisher_node (uword inode_index); +void wc_arp_set_publisher_node (uword inode_index, uword event_type); void ethernet_arp_change_mac (u32 sw_if_index); void ethernet_ndp_change_mac (u32 sw_if_index); diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 345ebd34..1908a679 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -195,6 +195,9 @@ typedef struct u32 limit_neighbor_cache_size; u32 neighbor_delete_rotor; + /* Wildcard nd report publisher */ + uword wc_ip6_nd_publisher_node; + uword wc_ip6_nd_publisher_et; } ip6_neighbor_main_t; /* ipv6 neighbor discovery - timer/event types */ @@ -216,6 +219,50 @@ typedef union static ip6_neighbor_main_t ip6_neighbor_main; static ip6_address_t ip6a_zero; /* ip6 address 0 */ +static void wc_nd_signal_report (wc_nd_report_t * r); + +/** + * @brief publish wildcard arp event + * @param sw_if_index The interface on which the ARP entires are acted + */ +static int +vnet_nd_wc_publish (u32 sw_if_index, u8 * mac, ip6_address_t * ip6) +{ + wc_nd_report_t r = { + .sw_if_index = sw_if_index, + .ip6 = *ip6, + }; + memcpy (r.mac, mac, sizeof r.mac); + + void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); + vl_api_rpc_call_main_thread (wc_nd_signal_report, (u8 *) & r, sizeof r); + return 0; +} + +static void +wc_nd_signal_report (wc_nd_report_t * r) +{ + vlib_main_t *vm = vlib_get_main (); + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + uword ni = nm->wc_ip6_nd_publisher_node; + uword et = nm->wc_ip6_nd_publisher_et; + + if (ni == (uword) ~ 0) + return; + wc_nd_report_t *q = + vlib_process_signal_event_data (vm, ni, et, 1, sizeof *q); + + *q = *r; +} + +void +wc_nd_set_publisher_node (uword node_index, uword event_type) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + nm->wc_ip6_nd_publisher_node = node_index; + nm->wc_ip6_nd_publisher_et = event_type; +} + static u8 * format_ip6_neighbor_ip6_entry (u8 * s, va_list * va) { @@ -3931,6 +3978,8 @@ ip6_neighbor_init (vlib_main_t * vm) /* default, configurable */ nm->limit_neighbor_cache_size = 50000; + nm->wc_ip6_nd_publisher_node = (uword) ~ 0; + #if 0 /* $$$$ Hack fix for today */ vec_validate_init_empty @@ -4047,7 +4096,6 @@ vnet_ip6_nd_term (vlib_main_t * vm, { ip6_neighbor_main_t *nm = &ip6_neighbor_main; icmp6_neighbor_solicitation_or_advertisement_header_t *ndh; - pending_resolution_t *mc; ndh = ip6_next_header (ip); if (ndh->icmp.type != ICMP6_neighbor_solicitation && @@ -4063,26 +4111,11 @@ vnet_ip6_nd_term (vlib_main_t * vm, } /* Check if anyone want ND events for L2 BDs */ - uword *p = mhash_get (&nm->mac_changes_by_address, &ip6a_zero); - if (p && !ip6_address_is_link_local_unicast (&ip->src_address)) + if (PREDICT_FALSE + (nm->wc_ip6_nd_publisher_node != (uword) ~ 0 + && !ip6_address_is_link_local_unicast (&ip->src_address))) { - u32 next_index = p[0]; - while (next_index != (u32) ~ 0) - { - int (*fp) (u32, u8 *, u32, ip6_address_t *); - int rv = 1; - mc = pool_elt_at_index (nm->mac_changes, next_index); - fp = mc->data_callback; - /* Call the callback, return 1 to suppress dup events */ - if (fp) - rv = (*fp) (mc->data, - eth->src_address, sw_if_index, &ip->src_address); - /* Signal the resolver process */ - if (rv == 0) - vlib_process_signal_event (vm, mc->node_index, - mc->type_opaque, mc->data); - next_index = mc->next_index; - } + vnet_nd_wc_publish (sw_if_index, eth->src_address, &ip->src_address); } /* Check if MAC entry exsist for solicited target IP */ diff --git a/src/vnet/ip/ip6_neighbor.h b/src/vnet/ip/ip6_neighbor.h index 0e302ed4..ed80381b 100644 --- a/src/vnet/ip/ip6_neighbor.h +++ b/src/vnet/ip/ip6_neighbor.h @@ -89,6 +89,14 @@ extern int ip6_neighbor_proxy_add_del (u32 sw_if_index, u32 ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add); +typedef struct +{ + u32 sw_if_index; + ip6_address_t ip6; + u8 mac[6]; +} wc_nd_report_t; + +void wc_nd_set_publisher_node (uword node_index, uword event_type); #endif /* included_ip6_neighbor_h */ diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index e229a5e3..d020314b 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -1345,8 +1345,6 @@ arp_change_data_callback (u32 pool_index, u8 * new_mac, vpe_api_main_t *am = &vpe_api_main; vlib_main_t *vm = am->vlib_main; vl_api_ip4_arp_event_t *event; - static f64 arp_event_last_time; - f64 now = vlib_time_now (vm); if (pool_is_free_index (am->arp_events, pool_index)) return 1; @@ -1360,7 +1358,6 @@ arp_change_data_callback (u32 pool_index, u8 * new_mac, clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac)); event->sw_if_index = htonl (sw_if_index); - arp_event_last_time = now; return 0; } @@ -1371,35 +1368,19 @@ nd_change_data_callback (u32 pool_index, u8 * new_mac, vpe_api_main_t *am = &vpe_api_main; vlib_main_t *vm = am->vlib_main; vl_api_ip6_nd_event_t *event; - static f64 nd_event_last_time; - f64 now = vlib_time_now (vm); if (pool_is_free_index (am->nd_events, pool_index)) return 1; event = pool_elt_at_index (am->nd_events, pool_index); - - /* *INDENT-OFF* */ - if (memcmp (event->new_mac, new_mac, sizeof (event->new_mac))) + if (eth_mac_equal (event->new_mac, new_mac) && + sw_if_index == ntohl (event->sw_if_index)) { - clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac)); - } - else - { /* same mac */ - if (sw_if_index == ntohl(event->sw_if_index) && - (!event->mac_ip || - /* for BD case, also check IP address with 10 sec timeout */ - (ip6_address_is_equal (address, - (ip6_address_t *) event->address) && - (now - nd_event_last_time) < 10.0))) - return 1; + return 1; } - /* *INDENT-ON* */ - nd_event_last_time = now; + clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac)); event->sw_if_index = htonl (sw_if_index); - if (event->mac_ip) - clib_memcpy (event->address, address, sizeof (event->address)); return 0; } @@ -1427,61 +1408,107 @@ nd_change_delete_callback (u32 pool_index, u8 * notused) return 0; } -static vlib_node_registration_t wc_ip4_arp_process_node; +static vlib_node_registration_t wc_arp_process_node; + +enum +{ WC_ARP_REPORT, WC_ND_REPORT }; static uword -wc_ip4_arp_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, vlib_frame_t * f) +wc_arp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { /* These cross the longjmp boundry (vlib_process_wait_for_event) * and need to be volatile - to prevent them from being optimized into * a register - which could change during suspension */ - volatile wc_arp_report_t prev = { 0 }; - volatile f64 last = vlib_time_now (vm); + volatile wc_arp_report_t arp_prev = { 0 }; + volatile wc_nd_report_t nd_prev = { 0 }; + volatile f64 last_arp = vlib_time_now (vm); + volatile f64 last_nd = vlib_time_now (vm); while (1) { vlib_process_wait_for_event (vm); uword event_type; - wc_arp_report_t *event_data = - vlib_process_get_event_data (vm, &event_type); + void *event_data = vlib_process_get_event_data (vm, &event_type); f64 now = vlib_time_now (vm); int i; - for (i = 0; i < vec_len (event_data); i++) + if (event_type == WC_ARP_REPORT) + { + wc_arp_report_t *arp_events = event_data; + for (i = 0; i < vec_len (arp_events); i++) + { + /* discard dup event */ + if (arp_prev.ip4 == arp_events[i].ip4 && + eth_mac_equal ((u8 *) arp_prev.mac, arp_events[i].mac) && + arp_prev.sw_if_index == arp_events[i].sw_if_index && + (now - last_arp) < 10.0) + { + continue; + } + arp_prev = arp_events[i]; + last_arp = now; + vpe_client_registration_t *reg; + /* *INDENT-OFF* */ + pool_foreach(reg, vpe_api_main.wc_ip4_arp_events_registrations, + ({ + unix_shared_memory_queue_t *q; + q = vl_api_client_index_to_input_queue (reg->client_index); + if (q && q->cursize < q->maxsize) + { + vl_api_ip4_arp_event_t * event = vl_msg_api_alloc (sizeof *event); + memset (event, 0, sizeof *event); + event->_vl_msg_id = htons (VL_API_IP4_ARP_EVENT); + event->client_index = reg->client_index; + event->pid = reg->client_pid; + event->mac_ip = 1; + event->address = arp_events[i].ip4; + event->sw_if_index = htonl(arp_events[i].sw_if_index); + memcpy(event->new_mac, arp_events[i].mac, sizeof event->new_mac); + vl_msg_api_send_shmem (q, (u8 *) &event); + } + })); + /* *INDENT-ON* */ + } + } + else if (event_type == WC_ND_REPORT) { - /* discard dup event */ - if (prev.ip4 == event_data[i].ip4 && - eth_mac_equal ((u8 *) prev.mac, event_data[i].mac) && - prev.sw_if_index == event_data[i].sw_if_index && - (now - last) < 10.0) + wc_nd_report_t *nd_events = event_data; + for (i = 0; i < vec_len (nd_events); i++) { - continue; + /* discard dup event */ + if (ip6_address_is_equal + ((ip6_address_t *) & nd_prev.ip6, &nd_events[i].ip6) + && eth_mac_equal ((u8 *) nd_prev.mac, nd_events[i].mac) + && nd_prev.sw_if_index == nd_events[i].sw_if_index + && (now - last_nd) < 10.0) + { + continue; + } + nd_prev = nd_events[i]; + last_nd = now; + vpe_client_registration_t *reg; + /* *INDENT-OFF* */ + pool_foreach(reg, vpe_api_main.wc_ip6_nd_events_registrations, + ({ + unix_shared_memory_queue_t *q; + q = vl_api_client_index_to_input_queue (reg->client_index); + if (q && q->cursize < q->maxsize) + { + vl_api_ip6_nd_event_t * event = vl_msg_api_alloc (sizeof *event); + memset (event, 0, sizeof *event); + event->_vl_msg_id = htons (VL_API_IP6_ND_EVENT); + event->client_index = reg->client_index; + event->pid = reg->client_pid; + event->mac_ip = 1; + memcpy(event->address, nd_events[i].ip6.as_u8, sizeof event->address); + event->sw_if_index = htonl(nd_events[i].sw_if_index); + memcpy(event->new_mac, nd_events[i].mac, sizeof event->new_mac); + vl_msg_api_send_shmem (q, (u8 *) &event); + } + })); + /* *INDENT-ON* */ } - prev = event_data[i]; - last = now; - vpe_client_registration_t *reg; - /* *INDENT-OFF* */ - pool_foreach(reg, vpe_api_main.wc_ip4_arp_events_registrations, - ({ - unix_shared_memory_queue_t *q; - q = vl_api_client_index_to_input_queue (reg->client_index); - if (q && q->cursize < q->maxsize) - { - vl_api_ip4_arp_event_t * event = vl_msg_api_alloc (sizeof *event); - memset (event, 0, sizeof *event); - event->_vl_msg_id = htons (VL_API_IP4_ARP_EVENT); - event->client_index = reg->client_index; - event->pid = reg->client_pid; - event->mac_ip = 1; - event->address = event_data[i].ip4; - event->sw_if_index = htonl(event_data[i].sw_if_index); - memcpy(event->new_mac, event_data[i].mac, sizeof event->new_mac); - vl_msg_api_send_shmem (q, (u8 *) &event); - } - })); - /* *INDENT-ON* */ } vlib_process_put_event_data (vm, event_data); } @@ -1490,8 +1517,8 @@ wc_ip4_arp_process (vlib_main_t * vm, } /* *INDENT-OFF* */ -VLIB_REGISTER_NODE (wc_ip4_arp_process_node,static) = { - .function = wc_ip4_arp_process, +VLIB_REGISTER_NODE (wc_arp_process_node,static) = { + .function = wc_arp_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "wildcard-ip4-arp-publisher-process", }; @@ -1526,7 +1553,7 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) hash_unset (am->wc_ip4_arp_events_registration_hash, mp->client_index); if (pool_elts (am->wc_ip4_arp_events_registrations) == 0) - wc_arp_set_publisher_node (~0); + wc_arp_set_publisher_node (~0, WC_ARP_REPORT); goto reply; } } @@ -1541,7 +1568,7 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) rp->client_pid = mp->pid; hash_set (am->wc_ip4_arp_events_registration_hash, rp->client_index, rp - am->wc_ip4_arp_events_registrations); - wc_arp_set_publisher_node (wc_ip4_arp_process_node.index); + wc_arp_set_publisher_node (wc_arp_process_node.index, WC_ARP_REPORT); goto reply; } @@ -1588,7 +1615,47 @@ vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp) vpe_api_main_t *am = &vpe_api_main; vnet_main_t *vnm = vnet_get_main (); vl_api_want_ip6_nd_events_reply_t *rmp; - int rv; + int rv = 0; + + if (ip6_address_is_zero ((ip6_address_t *) mp->address)) + { + uword *p = + hash_get (am->wc_ip6_nd_events_registration_hash, mp->client_index); + vpe_client_registration_t *rp; + if (p) + { + if (mp->enable_disable) + { + clib_warning ("pid %d: already enabled...", mp->pid); + rv = VNET_API_ERROR_INVALID_REGISTRATION; + goto reply; + } + else + { + rp = + pool_elt_at_index (am->wc_ip6_nd_events_registrations, p[0]); + pool_put (am->wc_ip6_nd_events_registrations, rp); + hash_unset (am->wc_ip6_nd_events_registration_hash, + mp->client_index); + if (pool_elts (am->wc_ip6_nd_events_registrations) == 0) + wc_nd_set_publisher_node (~0, 2); + goto reply; + } + } + if (mp->enable_disable == 0) + { + clib_warning ("pid %d: already disabled...", mp->pid); + rv = VNET_API_ERROR_INVALID_REGISTRATION; + goto reply; + } + pool_get (am->wc_ip6_nd_events_registrations, rp); + rp->client_index = mp->client_index; + rp->client_pid = mp->pid; + hash_set (am->wc_ip6_nd_events_registration_hash, rp->client_index, + rp - am->wc_ip6_nd_events_registrations); + wc_nd_set_publisher_node (wc_arp_process_node.index, WC_ND_REPORT); + goto reply; + } if (mp->enable_disable) { @@ -1604,17 +1671,14 @@ vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp) if (rv) { pool_put (am->nd_events, event); - goto out; + goto reply; } memset (event, 0, sizeof (*event)); event->_vl_msg_id = ntohs (VL_API_IP6_ND_EVENT); event->client_index = mp->client_index; - clib_memcpy (event->address, mp->address, 16); + clib_memcpy (event->address, mp->address, sizeof event->address); event->pid = mp->pid; - if (ip6_address_is_zero ((ip6_address_t *) mp->address)) - event->mac_ip = 1; - } else { @@ -1624,7 +1688,7 @@ vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp) vpe_resolver_process_node.index, IP6_ND_EVENT, ~0 /* pool index */ , 0 /* is_add */ ); } -out: +reply: REPLY_MACRO (VL_API_WANT_IP6_ND_EVENTS_REPLY); } @@ -2325,10 +2389,7 @@ format_nd_event (u8 * s, va_list * args) vl_api_ip6_nd_event_t *event = va_arg (*args, vl_api_ip6_nd_event_t *); s = format (s, "pid %d: ", ntohl (event->pid)); - if (event->mac_ip) - s = format (s, "bd mac/ip6 binding events"); - else - s = format (s, "resolution for %U", format_ip6_address, event->address); + s = format (s, "resolution for %U", format_ip6_address, event->address); return s; } @@ -2340,7 +2401,9 @@ show_ip_arp_nd_events_fn (vlib_main_t * vm, vl_api_ip4_arp_event_t *arp_event; vl_api_ip6_nd_event_t *nd_event; - if ((pool_elts (am->arp_events) == 0) && (pool_elts (am->nd_events) == 0)) + if (pool_elts (am->arp_events) == 0 && pool_elts (am->nd_events) == 0 && + pool_elts (am->wc_ip4_arp_events_registrations) == 0 && + pool_elts (am->wc_ip6_nd_events_registrations) == 0) { vlib_cli_output (vm, "No active arp or nd event registrations"); return 0; @@ -2363,6 +2426,12 @@ show_ip_arp_nd_events_fn (vlib_main_t * vm, ({ vlib_cli_output (vm, "%U", format_nd_event, nd_event); })); + + pool_foreach(reg, am->wc_ip6_nd_events_registrations, + ({ + vlib_cli_output (vm, "pid %d: bd mac/ip6 binding events", + ntohl (reg->client_pid)); + })); /* *INDENT-ON* */ return 0; diff --git a/test/test_l2bd_arp_term.py b/test/test_l2bd_arp_term.py index 20885f88..20cc537e 100644 --- a/test/test_l2bd_arp_term.py +++ b/test/test_l2bd_arp_term.py @@ -168,6 +168,13 @@ class TestL2bdArpTerm(VppTestCase): def arp_event_hosts(self, evs): return {self.arp_event_host(e) for e in evs} + def nd_event_host(self, e): + return Host(mac=':'.join(['%02x' % ord(char) for char in e.new_mac]), + ip6=inet_ntop(AF_INET6, e.address)) + + def nd_event_hosts(self, evs): + return {self.nd_event_host(e) for e in evs} + @classmethod def ns_req(cls, src_host, host): nsma = in6_getnsma(inet_pton(AF_INET6, "fd10::ffff")) @@ -178,7 +185,11 @@ class TestL2bdArpTerm(VppTestCase): ICMPv6NDOptSrcLLAddr(lladdr=src_host.mac)) @classmethod - def ns_reqs(cls, src_host, entries): + def ns_reqs_dst(cls, entries, dst_host): + return [cls.ns_req(e, dst_host) for e in entries] + + @classmethod + def ns_reqs_src(cls, src_host, entries): return [cls.ns_req(src_host, e) for e in entries] def na_resp_host(self, src_host, rx): @@ -237,7 +248,7 @@ class TestL2bdArpTerm(VppTestCase): self.assertEqual(len(resps ^ resp_hosts), 0) def verify_nd(self, src_host, req_hosts, resp_hosts, bd_id=1): - reqs = self.ns_reqs(src_host, req_hosts) + reqs = self.ns_reqs_src(src_host, req_hosts) for swif in self.bd_swifs(bd_id): swif.add_stream(reqs) @@ -423,6 +434,59 @@ class TestL2bdArpTerm(VppTestCase): self.assertEqual(len(self.vapi.collect_events()), 0) self.bd_add_del(1, is_add=0) + def test_l2bd_arp_term_12(self): + """ L2BD ND term - send NS packets verify reports + """ + self.vapi.want_ip6_nd_events(address=inet_pton(AF_INET6, "::0")) + dst_host = self.ip6_host(50, 50, "00:00:11:22:33:44") + self.bd_add_del(1, is_add=1) + self.set_bd_flags(1, arp_term=True, flood=False, + uu_flood=False, learn=False) + macs = self.mac_list(range(10, 15)) + hosts = self.ip6_hosts(5, 1, macs) + reqs = self.ns_reqs_dst(hosts, dst_host) + self.bd_swifs(1)[0].add_stream(reqs) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + evs = [self.vapi.wait_for_event(2, "ip6_nd_event") + for i in range(len(hosts))] + ev_hosts = self.nd_event_hosts(evs) + self.assertEqual(len(ev_hosts ^ hosts), 0) + + def test_l2bd_arp_term_13(self): + """ L2BD ND term - send duplicate ns, verify suppression + """ + dst_host = self.ip6_host(50, 50, "00:00:11:22:33:44") + macs = self.mac_list(range(10, 11)) + hosts = self.ip6_hosts(5, 1, macs) + reqs = self.ns_reqs_dst(hosts, dst_host) * 5 + self.bd_swifs(1)[0].add_stream(reqs) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + evs = [self.vapi.wait_for_event(2, "ip6_nd_event") + for i in range(len(hosts))] + ev_hosts = self.nd_event_hosts(evs) + self.assertEqual(len(ev_hosts ^ hosts), 0) + + def test_l2bd_arp_term_14(self): + """ L2BD ND term - disable ip4 arp events,send ns, verify no events + """ + self.vapi.want_ip6_nd_events(enable_disable=0, + address=inet_pton(AF_INET6, "::0")) + dst_host = self.ip6_host(50, 50, "00:00:11:22:33:44") + macs = self.mac_list(range(10, 15)) + hosts = self.ip6_hosts(5, 1, macs) + reqs = self.ns_reqs_dst(hosts, dst_host) + self.bd_swifs(1)[0].add_stream(reqs) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.sleep(1) + self.assertEqual(len(self.vapi.collect_events()), 0) + self.bd_add_del(1, is_add=0) + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index fcc67849..71e7aea1 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -425,6 +425,12 @@ class VppPapiProvider(object): 'address': address, 'pid': os.getpid(), }) + def want_ip6_nd_events(self, enable_disable=1, address=0): + return self.api(self.papi.want_ip6_nd_events, + {'enable_disable': enable_disable, + 'address': address, + 'pid': os.getpid(), }) + def l2fib_add_del(self, mac, bd_id, sw_if_index, is_add=1, static_mac=0, filter_mac=0, bvi_mac=0): """Create/delete L2 FIB entry. -- cgit 1.2.3-korg