From 78372a9a55098ad43c4d6d941b640cce4ff24226 Mon Sep 17 00:00:00 2001 From: Shwetha Bhandari Date: Wed, 18 Jan 2017 12:43:54 +0530 Subject: ioam: manycast using iOAM and SR (VPP-628) Change-Id: I6d2afda991d771fb4a89fc3f6544f8e940a9b9f0 Signed-off-by: Shwetha Bhandari --- src/plugins/ioam.am | 29 +- src/plugins/ioam/analyse/ioam_analyse.h | 23 +- src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.h | 59 ++ src/plugins/ioam/encap/ip6_ioam_trace.c | 1 - src/plugins/ioam/encap/ip6_ioam_trace.h | 3 +- src/plugins/ioam/ip6/ioam_cache.api | 37 + src/plugins/ioam/ip6/ioam_cache.c | 386 +++++++++ src/plugins/ioam/ip6/ioam_cache.h | 897 +++++++++++++++++++++ src/plugins/ioam/ip6/ioam_cache_all_api_h.h | 16 + src/plugins/ioam/ip6/ioam_cache_msg_enum.h | 28 + src/plugins/ioam/ip6/ioam_cache_node.c | 424 ++++++++++ .../ioam/ip6/ioam_cache_tunnel_select_node.c | 770 ++++++++++++++++++ src/plugins/ioam/lib-trace/trace_util.h | 2 - src/vnet/fib/fib_entry.h | 9 +- src/vnet/fib/fib_entry_src.c | 5 +- src/vnet/ip/ip6.h | 78 ++ src/vnet/ip/ip6_forward.c | 118 +-- src/vnet/sr/sr.h | 40 +- 18 files changed, 2834 insertions(+), 91 deletions(-) create mode 100644 src/plugins/ioam/ip6/ioam_cache.api create mode 100644 src/plugins/ioam/ip6/ioam_cache.c create mode 100644 src/plugins/ioam/ip6/ioam_cache.h create mode 100644 src/plugins/ioam/ip6/ioam_cache_all_api_h.h create mode 100644 src/plugins/ioam/ip6/ioam_cache_msg_enum.h create mode 100644 src/plugins/ioam/ip6/ioam_cache_node.c create mode 100644 src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c (limited to 'src') diff --git a/src/plugins/ioam.am b/src/plugins/ioam.am index 14d8a9eb6d9..4346e3c0ccb 100644 --- a/src/plugins/ioam.am +++ b/src/plugins/ioam.am @@ -75,7 +75,8 @@ IOAM_TRACE_NOINST_HDR = \ ioam/lib-trace/trace_all_api_h.h \ ioam/lib-trace/trace_msg_enum.h \ ioam/lib-trace/trace.api.h \ - ioam/lib-trace/trace_util.h + ioam/lib-trace/trace_util.h \ + ioam/encap/ip6_ioam_trace.h IOAM_TRACE_API = ioam/lib-trace/trace.api @@ -163,6 +164,23 @@ IOAM_ANALYSE_SRC = \ ioam/analyse/ioam_analyse.h \ ioam/analyse/ioam_summary_export.h +######################################## +# iOAM record cache and rewrite +######################################## + +IOAM_IP6_MANYCAST_SRC = \ +ioam/ip6/ioam_cache.c \ +ioam/ip6/ioam_cache_node.c \ +ioam/ip6/ioam_cache_tunnel_select_node.c \ +ioam/ip6/ioam_cache.api.h + +IOAM_IP6_MANYCAST_API = ioam/ip6/ioam_cache.api + +IOAM_IP6_MANYCAST_NOINST_HDR = \ + ioam/ip6/ioam_cache_all_api_h.h \ + ioam/ip6/ioam_cache_msg_enum.h \ + ioam/ip6/ioam_cache.api.h + ######################################## # iOAM plugins ######################################## @@ -174,20 +192,23 @@ ioam_plugin_la_SOURCES = \ $(IOAM_VXLAN_GPE_SRC) \ $(IOAM_E2E_SRC) \ $(IPFIX_COLLECTOR_SRC) \ - $(IOAM_ANALYSE_SRC) + $(IOAM_ANALYSE_SRC) \ + $(IOAM_IP6_MANYCAST_SRC) API_FILES += \ $(IOAM_POT_API) \ $(IOAM_EXPORT_API) \ $(IOAM_TRACE_API) \ - $(IOAM_VXLAN_GPE_API) + $(IOAM_VXLAN_GPE_API) \ + $(IOAM_IP6_MANYCAST_API) noinst_HEADERS += \ $(IOAM_POT_NOINST_HDR) \ $(IOAM_EXPORT_NOINST_HDR) \ $(IOAM_TRACE_NOINST_HDR) \ $(IOAM_VXLAN_GPE_NOINST_HDR) \ - $(IOAM_E2E_NOINST_HDR) + $(IOAM_E2E_NOINST_HDR) \ + $(IOAM_IP6_MANYCAST_NOINST_HDR) vppplugins_LTLIBRARIES += ioam_plugin.la diff --git a/src/plugins/ioam/analyse/ioam_analyse.h b/src/plugins/ioam/analyse/ioam_analyse.h index d5b6fbe52a9..3c69d71f349 100644 --- a/src/plugins/ioam/analyse/ioam_analyse.h +++ b/src/plugins/ioam/analyse/ioam_analyse.h @@ -123,12 +123,14 @@ typedef struct ioam_analyser_data_t_ } ioam_analyser_data_t; always_inline f64 -ip6_ioam_analyse_calc_delay (ioam_trace_hdr_t * trace, u16 trace_len) +ip6_ioam_analyse_calc_delay (ioam_trace_hdr_t * trace, u16 trace_len, + u8 oneway) { u16 size_of_traceopt_per_node, size_of_all_traceopts; u8 num_nodes; - u32 *start_elt, *end_elt; + u32 *start_elt, *end_elt, *uturn_elt;; u32 start_time, end_time; + u8 done = 0; size_of_traceopt_per_node = fetch_trace_data_size (trace->ioam_trace_type); // Unknown trace type @@ -145,6 +147,19 @@ ip6_ioam_analyse_calc_delay (ioam_trace_hdr_t * trace, u16 trace_len) trace->elts + (u32) (size_of_traceopt_per_node * (num_nodes - 1) / sizeof (u32)); + if (oneway && (trace->ioam_trace_type & BIT_TTL_NODEID)) + { + done = 0; + do + { + uturn_elt = start_elt - size_of_traceopt_per_node / sizeof (u32); + + if ((clib_net_to_host_u32 (*start_elt) >> 24) <= + (clib_net_to_host_u32 (*uturn_elt) >> 24)) + done = 1; + } + while (!done && (start_elt = uturn_elt) != end_elt); + } if (trace->ioam_trace_type & BIT_TTL_NODEID) { start_elt++; @@ -155,7 +170,6 @@ ip6_ioam_analyse_calc_delay (ioam_trace_hdr_t * trace, u16 trace_len) start_elt++; end_elt++; } - start_time = clib_net_to_host_u32 (*start_elt); end_time = clib_net_to_host_u32 (*end_elt); @@ -273,11 +287,10 @@ ip6_ioam_analyse_hbh_trace (ioam_analyser_data_t * data, found_match: trace_record->pkt_counter++; trace_record->bytes_counter += pak_len; - if (trace->ioam_trace_type & BIT_TIMESTAMP) { /* Calculate time delay */ - u32 delay = (u32) ip6_ioam_analyse_calc_delay (trace, trace_len); + u32 delay = (u32) ip6_ioam_analyse_calc_delay (trace, trace_len, 0); if (delay < trace_record->min_delay) trace_record->min_delay = delay; else if (delay > trace_record->max_delay) diff --git a/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.h b/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.h index f6abdce3c9a..5a2a2d70028 100644 --- a/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.h +++ b/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.h @@ -18,6 +18,7 @@ #include #include +#include /** @brief IP6-iOAM analyser main structure. @note cache aligned. @@ -57,6 +58,64 @@ ioam_analyse_get_data_from_flow_id (u32 flow_id) return (ioam_analyser_main.aggregated_data + flow_id); } +always_inline void * +ip6_ioam_find_hbh_option (ip6_hop_by_hop_header_t * hbh0, u8 option) +{ + ip6_hop_by_hop_option_t *opt0, *limit0; + u8 type0; + + opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1); + limit0 = + (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + ((hbh0->length + 1) << 3)); + + while (opt0 < limit0) + { + type0 = opt0->type; + if (type0 == option) + return ((void *) opt0); + + if (0 == type0) + { + opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1; + continue; + } + opt0 = (ip6_hop_by_hop_option_t *) + (((u8 *) opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t)); + } + + return NULL; +} + +always_inline int +ip6_ioam_analyse_compare_path_delay (ip6_hop_by_hop_header_t * hbh0, + ip6_hop_by_hop_header_t * hbh1, + bool oneway) +{ + ioam_trace_option_t *trace0 = NULL, *trace1 = NULL; + f64 delay0, delay1; + + trace0 = + ip6_ioam_find_hbh_option (hbh0, HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST); + trace1 = + ip6_ioam_find_hbh_option (hbh1, HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST); + + if (PREDICT_FALSE ((trace0 == NULL) && (trace1 == NULL))) + return 0; + + if (PREDICT_FALSE (trace1 == NULL)) + return 1; + + if (PREDICT_FALSE (trace0 == NULL)) + return -1; + + delay0 = ip6_ioam_analyse_calc_delay (&trace0->trace_hdr, + trace0->hdr.length - 2, oneway); + delay1 = ip6_ioam_analyse_calc_delay (&trace1->trace_hdr, + trace1->hdr.length - 2, oneway); + + return (delay0 - delay1); +} + #endif /* PLUGINS_IOAM_PLUGIN_IOAM_ANALYSE_IP6_IOAM_ANALYSE_NODE_H_ */ /* diff --git a/src/plugins/ioam/encap/ip6_ioam_trace.c b/src/plugins/ioam/encap/ip6_ioam_trace.c index 6972ba4be96..f1eb1bf0957 100644 --- a/src/plugins/ioam/encap/ip6_ioam_trace.c +++ b/src/plugins/ioam/encap/ip6_ioam_trace.c @@ -40,7 +40,6 @@ typedef union u32 as_u32[2]; } time_u64_t; - extern ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main; extern ip6_main_t ip6_main; diff --git a/src/plugins/ioam/encap/ip6_ioam_trace.h b/src/plugins/ioam/encap/ip6_ioam_trace.h index b332b3197e9..620b70a8c22 100644 --- a/src/plugins/ioam/encap/ip6_ioam_trace.h +++ b/src/plugins/ioam/encap/ip6_ioam_trace.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2017 Cisco and/or its affiliates. + * trace_util.h -- Trace Profile Utility header + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -26,7 +28,6 @@ typedef CLIB_PACKED(struct { }) ioam_trace_option_t; /* *INDENT-ON* */ - #endif /* PLUGINS_IOAM_PLUGIN_IOAM_ENCAP_IP6_IOAM_TRACE_H_ */ /* diff --git a/src/plugins/ioam/ip6/ioam_cache.api b/src/plugins/ioam/ip6/ioam_cache.api new file mode 100644 index 00000000000..de50d57d4ee --- /dev/null +++ b/src/plugins/ioam/ip6/ioam_cache.api @@ -0,0 +1,37 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* API to control ioam caching */ + +define ioam_cache_ip6_enable_disable { + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + + /* Enable / disable the feature */ + u8 is_disable; + +}; + +define ioam_cache_ip6_enable_disable_reply { + /* From the request */ + u32 context; + + /* Return value, zero means all OK */ + i32 retval; +}; diff --git a/src/plugins/ioam/ip6/ioam_cache.c b/src/plugins/ioam/ip6/ioam_cache.c new file mode 100644 index 00000000000..9e90ff9a920 --- /dev/null +++ b/src/plugins/ioam/ip6/ioam_cache.c @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * ioam_cache.c - ioam ip6 API / debug CLI handling + *------------------------------------------------------------------ + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include "ioam_cache.h" + +/* define message IDs */ +#include + +/* define message structures */ +#define vl_typedefs +#include +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include +#undef vl_api_version + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ + +#define REPLY_MACRO(t) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+cm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + + +/* List of message types that this plugin understands */ + +#define foreach_ioam_cache_plugin_api_msg \ +_(IOAM_CACHE_IP6_ENABLE_DISABLE, ioam_cache_ip6_enable_disable) + +static u8 * +ioam_e2e_id_trace_handler (u8 * s, ip6_hop_by_hop_option_t * opt) +{ + ioam_e2e_id_option_t *e2e = (ioam_e2e_id_option_t *) opt; + + if (e2e) + { + s = + format (s, "IP6_HOP_BY_HOP E2E ID = %U\n", format_ip6_address, + &(e2e->id)); + } + + + return s; +} + +static u8 * +ioam_e2e_cache_trace_handler (u8 * s, ip6_hop_by_hop_option_t * opt) +{ + ioam_e2e_cache_option_t *e2e = (ioam_e2e_cache_option_t *) opt; + + if (e2e) + { + s = + format (s, "IP6_HOP_BY_HOP E2E CACHE = pool:%d idx:%d\n", + e2e->pool_id, e2e->pool_index); + } + + + return s; +} + +/* Action function shared between message handler and debug CLI */ +int +ioam_cache_ip6_enable_disable (ioam_cache_main_t * em, u8 is_disable) +{ + vlib_main_t *vm = em->vlib_main; + + if (is_disable == 0) + { + ioam_cache_table_init (vm); + ip6_hbh_set_next_override (em->cache_hbh_slot); + ip6_hbh_register_option (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID, + 0, ioam_e2e_id_trace_handler); + ip6_hbh_register_option (HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID, + 0, ioam_e2e_cache_trace_handler); + + } + else + { + ip6_hbh_set_next_override (IP6_LOOKUP_NEXT_POP_HOP_BY_HOP); + ioam_cache_table_destroy (vm); + ip6_hbh_unregister_option (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID); + ip6_hbh_unregister_option (HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID); + } + + return 0; +} + +/* Action function shared between message handler and debug CLI */ +int +ioam_tunnel_select_ip6_enable_disable (ioam_cache_main_t * em, + u8 criteria, + u8 no_of_responses, u8 is_disable) +{ + vlib_main_t *vm = em->vlib_main; + + if (is_disable == 0) + { + ioam_cache_ts_table_init (vm); + em->criteria_oneway = criteria; + em->wait_for_responses = no_of_responses; + ip6_hbh_set_next_override (em->ts_hbh_slot); + ip6_ioam_ts_cache_set_rewrite (); + ip6_hbh_register_option (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID, + 0, ioam_e2e_id_trace_handler); + ip6_hbh_register_option (HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID, + 0, ioam_e2e_cache_trace_handler); + + /* Turn on the cleanup process */ + // vlib_process_signal_event (vm, em->cleanup_process_node_index, 1, 0); + } + else + { + ioam_cache_ts_timer_node_enable (vm, 0); + ip6_hbh_set_next_override (IP6_LOOKUP_NEXT_POP_HOP_BY_HOP); + ioam_cache_ts_table_destroy (vm); + ip6_ioam_ts_cache_cleanup_rewrite (); + ip6_hbh_unregister_option (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID); + ip6_hbh_unregister_option (HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID); + } + + return 0; +} + +/* API message handler */ +static void vl_api_ioam_cache_ip6_enable_disable_t_handler + (vl_api_ioam_cache_ip6_enable_disable_t * mp) +{ + vl_api_ioam_cache_ip6_enable_disable_reply_t *rmp; + ioam_cache_main_t *cm = &ioam_cache_main; + int rv; + + rv = ioam_cache_ip6_enable_disable (cm, (int) (mp->is_disable)); + REPLY_MACRO (VL_API_IOAM_CACHE_IP6_ENABLE_DISABLE_REPLY); +} + +/* Set up the API message handling tables */ +static clib_error_t * +ioam_cache_plugin_api_hookup (vlib_main_t * vm) +{ + ioam_cache_main_t *sm = &ioam_cache_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_ioam_cache_plugin_api_msg; +#undef _ + + return 0; +} + +static clib_error_t * +set_ioam_cache_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + ioam_cache_main_t *em = &ioam_cache_main; + u8 is_disable = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "disable")) + is_disable = 1; + else + break; + } + ioam_cache_ip6_enable_disable (em, is_disable); + + return 0; +} + +/* *INDENT_OFF* */ +VLIB_CLI_COMMAND (set_ioam_cache_command, static) = +{ +.path = "set ioam ip6 cache",.short_help = + "set ioam ip6 cache [disable]",.function = set_ioam_cache_command_fn}; +/* *INDENT_ON* */ + +#define IOAM_TS_WAIT_FOR_RESPONSES 3 +static clib_error_t * +set_ioam_tunnel_select_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ioam_cache_main_t *em = &ioam_cache_main; + u8 is_disable = 0; + u8 one_way = 0; + u8 no_of_responses = IOAM_TS_WAIT_FOR_RESPONSES; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "disable")) + is_disable = 1; + else if (unformat (input, "rtt")) + one_way = 0; + else if (unformat (input, "oneway")) + one_way = 1; + else if (unformat (input, "wait_for_responses %d", &no_of_responses)) + ; + else + break; + } + + ioam_tunnel_select_ip6_enable_disable (em, one_way, no_of_responses, + is_disable); + + return 0; +} + +/* *INDENT_OFF* */ +VLIB_CLI_COMMAND (set_ioam_cache_ts_command, static) = +{ +.path = "set ioam ip6 sr-tunnel-select",.short_help = + "set ioam ip6 sr-tunnel-select [disable] [oneway|rtt] [wait_for_responses ]",.function + = set_ioam_tunnel_select_command_fn}; +/* *INDENT_ON* */ + +static void +ioam_cache_table_print (vlib_main_t * vm, u8 verbose) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + ioam_cache_entry_t *entry = 0; + ioam_cache_ts_entry_t *ts_entry = 0; + int no_of_threads = vec_len (vlib_worker_threads); + int i; + + pool_foreach (entry, cm->ioam_rewrite_pool, ( + { + vlib_cli_output (vm, "%U", + format_ioam_cache_entry, + entry); + })); + + if (cm->ts_stats) + for (i = 0; i < no_of_threads; i++) + { + vlib_cli_output (vm, "Number of entries in thread-%d selection pool: %lu\n \ + (pool found to be full: %lu times)", i, + cm->ts_stats[i].inuse, cm->ts_stats[i].add_failed); + + if (verbose == 1) + vlib_worker_thread_barrier_sync (vm); + pool_foreach (ts_entry, cm->ioam_ts_pool[i], ( + { + vlib_cli_output (vm, + "%U", + format_ioam_cache_ts_entry, + ts_entry, + (u32) + i); + } + )); + vlib_worker_thread_barrier_release (vm); + } + +} + +static clib_error_t * +show_ioam_cache_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 verbose = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + } + ioam_cache_table_print (vm, verbose); + + + return 0; +} + +/* *INDENT_OFF* */ +VLIB_CLI_COMMAND (show_ioam_cache_command, static) = +{ +.path = "show ioam ip6 cache",.short_help = + "show ioam ip6 cache [verbose]",.function = show_ioam_cache_command_fn}; +/* *INDENT_ON* */ + +static clib_error_t * +ioam_cache_init (vlib_main_t * vm) +{ + ioam_cache_main_t *em = &ioam_cache_main; + clib_error_t *error = 0; + u8 *name; + u32 cache_node_index = ioam_cache_node.index; + u32 ts_node_index = ioam_cache_ts_node.index; + vlib_node_t *ip6_hbyh_node = NULL, *ip6_hbh_pop_node = NULL, *error_node = + NULL; + + name = format (0, "ioam_cache_%08x%c", api_version, 0); + + memset (&ioam_cache_main, 0, sizeof (ioam_cache_main)); + /* Ask for a correctly-sized block of API message decode slots */ + em->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + + error = ioam_cache_plugin_api_hookup (vm); + /* Hook this node to ip6-hop-by-hop */ + ip6_hbyh_node = vlib_get_node_by_name (vm, (u8 *) "ip6-hop-by-hop"); + em->cache_hbh_slot = + vlib_node_add_next (vm, ip6_hbyh_node->index, cache_node_index); + em->ts_hbh_slot = + vlib_node_add_next (vm, ip6_hbyh_node->index, ts_node_index); + + ip6_hbh_pop_node = vlib_get_node_by_name (vm, (u8 *) "ip6-pop-hop-by-hop"); + em->ip6_hbh_pop_node_index = ip6_hbh_pop_node->index; + + error_node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); + em->error_node_index = error_node->index; + em->vlib_main = vm; + + vec_free (name); + + return error; +} + +VLIB_INIT_FUNCTION (ioam_cache_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/ip6/ioam_cache.h b/src/plugins/ioam/ip6/ioam_cache.h new file mode 100644 index 00000000000..aa88d58d8a8 --- /dev/null +++ b/src/plugins/ioam/ip6/ioam_cache.h @@ -0,0 +1,897 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ioam_cache_h__ +#define __included_ioam_cache_h__ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +/* + * ioam_cache.h + * This header contains routines for caching of ioam header and + * buffer: + * 1 - On application facing node: to cache ioam header recvd + * in request and reattach in response to provide round + * trip path visibility. Since request response matching + * is needed works with TCP and relies on (5 tuples,seq no) + * 2 - On M-Anycast server node: This node replicates requests + * towards multiple anycast service nodes serving anycast + * IP6 address. It evaluates response and forwards the best + * response towards the client of requesting the service. + * Again since request-response matching is needed, works + * with TCP and relies on (5 tuples,seq no) for matching. + * To do this it caches SYN-ACK responses for a short time to + * evaluate multiple responses received before the selected + * SYN-ACK response is forwared and others dropped. + * + * M-Anycast server cache: + * - There is a pool of cache entries per worker thread. + * - Cache entry is created when SYN is received expected + * number of responses are marked based on number of + * SR tunnels for the anycast destination address + * - The pool/thread id and pool index are attached in the + * message as an ioam option for quick look up. + * - When is received SYN-ACK the ioam option containing + * thread id + pool index of the cache entry is used to + * look up cache entry. + * - Cache synchronization: + * - This is achieved by cache entry add/del/update all handled + * by the same worker/main thread + * - Packets from client to threads - syn packets, can be disctributed + * based on incoming interface affinity to the cpu core pinned to + * the thread or a simple sequence number based distribution + * if thread per interface is not scaling + * - Response packets from server towards clients - syn-acks, are + * forced to the same thread that created the cache entry + * using SR and the destination of SR v6 address assigned + * to the core/thread. This adderss is sent as an ioam option + * in the syn that can be then used on the other side to + * populate v6 dst address in the response + * - Timeout: timer wheel per thread is used to track the syn-ack wait + * time. The timer wheel tick is updated via an input node per thread. + * + * Application facing node/Service side cache: + * - Single pool of cache entries. + * - Cache entry is created when SYN is received. Caches the ioam + * header. Hash table entry is created based on 5 tuple and + * TCP seq no to pool index + * - Response SYN-ACK processed by looking up pool index in hash table + * and cache entry in the pool is used to get the ioam header rewrite + * string. Entry is freed from pool and hash table after use. + * - Locking/Synchronization: Currently this functionality is deployed + * with main/single thread only. Hence no locking is used. + * - Deployment: A VPP node per application server servicing anycast + * address is expected. Locking/synchronization needed when the server + * /application facing node is started with multiple worker threads. + * + */ + +/* + * Application facing server side caching: + * Cache entry for ioam header + * Currently caters to TCP and relies on + * TCP - 5 tuples + seqno to cache and reinsert + * ioam header b/n TCP request response + */ +typedef struct +{ + ip6_address_t src_address; + ip6_address_t dst_address; + u16 src_port; + u16 dst_port; + u8 protocol; + u32 seq_no; + ip6_address_t next_hop; + u16 my_address_offset; + u8 *ioam_rewrite_string; +} ioam_cache_entry_t; + +/* + * Cache entry for anycast server selection + * Works for TCP as 5 tuple + sequence number + * is required for request response matching + * max_responses expected is set based on number + * of SR tunnels for the dst_address + * Timeout or all response_received = max_responses + * will clear the entry + * buffer_index index of the response msg vlib buffer + * that is currently the best response + */ +typedef struct +{ + u32 pool_id; + u32 pool_index; + ip6_address_t src_address; + ip6_address_t dst_address; + u16 src_port; + u16 dst_port; + u8 protocol; + u32 seq_no; + u32 buffer_index; + ip6_hop_by_hop_header_t *hbh; //pointer to hbh header in the buffer + u64 created_at; + u8 response_received; + u8 max_responses; + u32 stop_timer_handle; + /** Handle returned from tw_start_timer */ + u32 timer_handle; + /** entry should expire at this clock tick */ + u32 expected_to_expire; +} ioam_cache_ts_entry_t; + +/* + * Per thread tunnel selection cache stats + */ +typedef struct +{ + u64 inuse; + u64 add_failed; +} ioam_cache_ts_pool_stats_t; + +/* Server side: iOAM header caching */ +#define MAX_CACHE_ENTRIES 4096 +/* M-Anycast: Cache for SR tunnel selection */ +#define MAX_CACHE_TS_ENTRIES 1048576 + +#define IOAM_CACHE_TABLE_DEFAULT_HASH_NUM_BUCKETS (4 * 1024) +#define IOAM_CACHE_TABLE_DEFAULT_HASH_MEMORY_SIZE (2<<20) + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + + /* Pool of ioam_cache_buffer_t */ + ioam_cache_entry_t *ioam_rewrite_pool; + + /* For steering packets ioam cache entry is followed by + * SR header. This is the SR rewrite template */ + u8 *sr_rewrite_template; + /* The current rewrite string being used */ + u8 *rewrite; + u8 rewrite_pool_index_offset; + + u64 lookup_table_nbuckets; + u64 lookup_table_size; + clib_bihash_8_8_t ioam_rewrite_cache_table; + + /* M-Anycast: Pool of ioam_cache_ts_entry_t per thread */ + ioam_cache_ts_entry_t **ioam_ts_pool; + ioam_cache_ts_pool_stats_t *ts_stats; + /** per thread single-wheel */ + tw_timer_wheel_16t_2w_512sl_t *timer_wheels; + + /* + * Selection criteria: oneway delay: Server to M-Anycast + * or RTT + */ + bool criteria_oneway; + u8 wait_for_responses; + + /* convenience */ + vlib_main_t *vlib_main; + + uword cache_hbh_slot; + uword ts_hbh_slot; + u32 ip6_hbh_pop_node_index; + u32 error_node_index; + u32 cleanup_process_node_index; +} ioam_cache_main_t; + +ioam_cache_main_t ioam_cache_main; + +vlib_node_registration_t ioam_cache_node; +vlib_node_registration_t ioam_cache_ts_node; + +/* Compute flow hash. We'll use it to select which Sponge to use for this + * flow. And other things. + * ip6_compute_flow_hash in ip6.h doesnt locate tcp/udp when + * ext headers are present. While it could be made to it will be a + * performance hit for ECMP flows. + * HEnce this function here, with L4 information directly input + * Useful when tcp/udp headers are already located in presence of + * ext headers + */ +always_inline u32 +ip6_compute_flow_hash_ext (const ip6_header_t * ip, + u8 protocol, + u16 src_port, + u16 dst_port, flow_hash_config_t flow_hash_config) +{ + u64 a, b, c; + u64 t1, t2; + + t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]); + t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) ? t1 : 0; + + t2 = (ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1]); + t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR) ? t2 : 0; + + a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1; + b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2; + b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? protocol : 0; + + t1 = src_port; + t2 = dst_port; + + t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0; + t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0; + + c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? + ((t1 << 16) | t2) : ((t2 << 16) | t1); + + hash_mix64 (a, b, c); + return (u32) c; +} + + +/* 2 new ioam E2E options : + * 1. HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID: IP6 address + * of ioam node that inserted ioam header + * 2. HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID: Pool id and index + * to look up tunnel select cache entry + */ +#define HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID 30 +#define HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID 31 + +typedef CLIB_PACKED (struct + { + ip6_hop_by_hop_option_t hdr; u8 e2e_type; u8 reserved[5]; + ip6_address_t id; + }) ioam_e2e_id_option_t; + +typedef CLIB_PACKED (struct + { + ip6_hop_by_hop_option_t hdr; u8 e2e_type; u8 pool_id; + u32 pool_index; + }) ioam_e2e_cache_option_t; + +#define IOAM_E2E_ID_OPTION_RND ((sizeof(ioam_e2e_id_option_t) + 7) & ~7) +#define IOAM_E2E_ID_HBH_EXT_LEN (IOAM_E2E_ID_OPTION_RND >> 3) +#define IOAM_E2E_CACHE_OPTION_RND ((sizeof(ioam_e2e_cache_option_t) + 7) & ~7) +#define IOAM_E2E_CACHE_HBH_EXT_LEN (IOAM_E2E_CACHE_OPTION_RND >> 3) + +static inline void +ioam_e2e_id_rewrite_handler (ioam_e2e_id_option_t * e2e_option, + vlib_buffer_t * b0) +{ + ip6_main_t *im = &ip6_main; + ip6_address_t *my_address = 0; + my_address = + ip6_interface_first_address (im, vnet_buffer (b0)->sw_if_index[VLIB_RX]); + if (my_address) + { + e2e_option->id.as_u64[0] = my_address->as_u64[0]; + e2e_option->id.as_u64[1] = my_address->as_u64[1]; + } +} + +/* Following functions are for the caching of ioam header + * to enable reattaching it for a complete request-response + * message exchange */ +inline static void +ioam_cache_entry_free (ioam_cache_entry_t * entry) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + if (entry) + { + vec_free (entry->ioam_rewrite_string); + memset (entry, 0, sizeof (*entry)); + pool_put (cm->ioam_rewrite_pool, entry); + } +} + +inline static ioam_cache_entry_t * +ioam_cache_entry_cleanup (u32 pool_index) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + ioam_cache_entry_t *entry = 0; + + entry = pool_elt_at_index (cm->ioam_rewrite_pool, pool_index); + ioam_cache_entry_free (entry); + return (0); +} + +inline static ioam_cache_entry_t * +ioam_cache_lookup (ip6_header_t * ip0, u16 src_port, u16 dst_port, u32 seq_no) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + u32 flow_hash = ip6_compute_flow_hash_ext (ip0, ip0->protocol, + src_port, dst_port, + IP_FLOW_HASH_DEFAULT | + IP_FLOW_HASH_REVERSE_SRC_DST); + clib_bihash_kv_8_8_t kv, value; + + kv.key = (u64) flow_hash << 32 | seq_no; + kv.value = 0; + value.key = 0; + value.value = 0; + + if (clib_bihash_search_8_8 (&cm->ioam_rewrite_cache_table, &kv, &value) >= + 0) + { + ioam_cache_entry_t *entry = 0; + + entry = pool_elt_at_index (cm->ioam_rewrite_pool, value.value); + /* match */ + if (ip6_address_compare (&ip0->src_address, &entry->dst_address) == 0 && + ip6_address_compare (&ip0->dst_address, &entry->src_address) == 0 && + entry->src_port == dst_port && + entry->dst_port == src_port && entry->seq_no == seq_no) + { + /* If lookup is successful remove it from the hash */ + clib_bihash_add_del_8_8 (&cm->ioam_rewrite_cache_table, &kv, 0); + return (entry); + } + else + return (0); + + } + return (0); +} + +/* + * Caches ioam hbh header + * Extends the hbh header with option to contain IP6 address of the node + * that caches it + */ +inline static int +ioam_cache_add (vlib_buffer_t * b0, + ip6_header_t * ip0, + u16 src_port, + u16 dst_port, ip6_hop_by_hop_header_t * hbh0, u32 seq_no) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + ioam_cache_entry_t *entry = 0; + u32 rewrite_len = 0, e2e_id_offset = 0; + u32 pool_index = 0; + ioam_e2e_id_option_t *e2e = 0; + + pool_get_aligned (cm->ioam_rewrite_pool, entry, CLIB_CACHE_LINE_BYTES); + memset (entry, 0, sizeof (*entry)); + pool_index = entry - cm->ioam_rewrite_pool; + + clib_memcpy (entry->dst_address.as_u64, ip0->dst_address.as_u64, + sizeof (ip6_address_t)); + clib_memcpy (entry->src_address.as_u64, ip0->src_address.as_u64, + sizeof (ip6_address_t)); + entry->src_port = src_port; + entry->dst_port = dst_port; + entry->seq_no = seq_no; + rewrite_len = ((hbh0->length + 1) << 3); + vec_validate (entry->ioam_rewrite_string, rewrite_len - 1); + e2e = ip6_ioam_find_hbh_option (hbh0, HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID); + if (e2e) + { + entry->next_hop.as_u64[0] = e2e->id.as_u64[0]; + entry->next_hop.as_u64[1] = e2e->id.as_u64[1]; + } + else + { + return (-1); + } + e2e_id_offset = (u8 *) e2e - (u8 *) hbh0; + /* setup e2e id option to insert v6 address of the node caching it */ + clib_memcpy (entry->ioam_rewrite_string, hbh0, rewrite_len); + hbh0 = (ip6_hop_by_hop_header_t *) entry->ioam_rewrite_string; + + /* suffix rewrite string with e2e ID option */ + e2e = (ioam_e2e_id_option_t *) (entry->ioam_rewrite_string + e2e_id_offset); + ioam_e2e_id_rewrite_handler (e2e, b0); + entry->my_address_offset = (u8 *) (&e2e->id) - (u8 *) hbh0; + + /* add it to hash, replacing and freeing any collision for now */ + u32 flow_hash = + ip6_compute_flow_hash_ext (ip0, hbh0->protocol, src_port, dst_port, + IP_FLOW_HASH_DEFAULT); + clib_bihash_kv_8_8_t kv, value; + kv.key = (u64) flow_hash << 32 | seq_no; + kv.value = 0; + if (clib_bihash_search_8_8 (&cm->ioam_rewrite_cache_table, &kv, &value) >= + 0) + { + /* replace */ + ioam_cache_entry_cleanup (value.value); + } + kv.value = pool_index; + clib_bihash_add_del_8_8 (&cm->ioam_rewrite_cache_table, &kv, 1); + return (0); +} + +/* Creates SR rewrite string + * This is appended with ioam header on the server facing + * node. + * This SR header is necessary to attract packets towards + * selected Anycast server. + */ +inline static void +ioam_cache_sr_rewrite_template_create (void) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + ip6_address_t *segments = 0; + ip6_address_t *this_seg = 0; + + /* This nodes address and the original dest will be + * filled when the packet is processed */ + vec_add2 (segments, this_seg, 2); + memset (this_seg, 0xfe, 2 * sizeof (ip6_address_t)); + cm->sr_rewrite_template = ip6_compute_rewrite_string_insert (segments); + vec_free (segments); +} + +inline static int +ioam_cache_table_init (vlib_main_t * vm) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + + pool_alloc_aligned (cm->ioam_rewrite_pool, + MAX_CACHE_ENTRIES, CLIB_CACHE_LINE_BYTES); + cm->lookup_table_nbuckets = IOAM_CACHE_TABLE_DEFAULT_HASH_NUM_BUCKETS; + cm->lookup_table_nbuckets = 1 << max_log2 (cm->lookup_table_nbuckets); + cm->lookup_table_size = IOAM_CACHE_TABLE_DEFAULT_HASH_MEMORY_SIZE; + + clib_bihash_init_8_8 (&cm->ioam_rewrite_cache_table, + "ioam rewrite cache table", + cm->lookup_table_nbuckets, cm->lookup_table_size); + /* Create SR rewrite template */ + ioam_cache_sr_rewrite_template_create (); + return (1); +} + +inline static int +ioam_cache_table_destroy (vlib_main_t * vm) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + ioam_cache_entry_t *entry = 0; + /* free pool and hash table */ + clib_bihash_free_8_8 (&cm->ioam_rewrite_cache_table); + pool_foreach (entry, cm->ioam_rewrite_pool, ( + { + ioam_cache_entry_free (entry); + })); + pool_free (cm->ioam_rewrite_pool); + cm->ioam_rewrite_pool = 0; + vec_free (cm->sr_rewrite_template); + cm->sr_rewrite_template = 0; + return (0); +} + +inline static u8 * +format_ioam_cache_entry (u8 * s, va_list * args) +{ + ioam_cache_entry_t *e = va_arg (*args, ioam_cache_entry_t *); + ioam_cache_main_t *cm = &ioam_cache_main; + + s = format (s, "%d: %U:%d to %U:%d seq_no %lu\n", + (e - cm->ioam_rewrite_pool), + format_ip6_address, &e->src_address, + e->src_port, + format_ip6_address, &e->dst_address, e->dst_port, e->seq_no); + s = format (s, " %U", + format_ip6_hop_by_hop_ext_hdr, + (ip6_hop_by_hop_header_t *) e->ioam_rewrite_string, + vec_len (e->ioam_rewrite_string) - 1); + return s; +} + +void ioam_cache_ts_timer_node_enable (vlib_main_t * vm, u8 enable); + +#define IOAM_CACHE_TS_TIMEOUT 1.0 //SYN timeout 1 sec +#define IOAM_CACHE_TS_TICK 100e-3 +/* Timer delays as multiples of 100ms */ +#define IOAM_CACHE_TS_TIMEOUT_TICKS IOAM_CACHE_TS_TICK*9 +#define TIMER_HANDLE_INVALID ((u32) ~0) + + +void expired_cache_ts_timer_callback (u32 * expired_timers); + +/* + * Following functions are to manage M-Anycast server selection + * cache + * There is a per worker thread pool to create a cache entry + * for a TCP SYN received. TCP SYN-ACK contians ioam header + * with HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID option to point to the + * entry. + */ +inline static int +ioam_cache_ts_table_init (vlib_main_t * vm) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + int no_of_threads = vec_len (vlib_worker_threads); + int i; + + vec_validate_aligned (cm->ioam_ts_pool, no_of_threads - 1, + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (cm->ts_stats, no_of_threads - 1, + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (cm->timer_wheels, no_of_threads - 1, + CLIB_CACHE_LINE_BYTES); + cm->lookup_table_nbuckets = IOAM_CACHE_TABLE_DEFAULT_HASH_NUM_BUCKETS; + cm->lookup_table_nbuckets = 1 << max_log2 (cm->lookup_table_nbuckets); + cm->lookup_table_size = IOAM_CACHE_TABLE_DEFAULT_HASH_MEMORY_SIZE; + for (i = 0; i < no_of_threads; i++) + { + pool_alloc_aligned (cm->ioam_ts_pool[i], + MAX_CACHE_TS_ENTRIES, CLIB_CACHE_LINE_BYTES); + memset (&cm->ts_stats[i], 0, sizeof (ioam_cache_ts_pool_stats_t)); + tw_timer_wheel_init_16t_2w_512sl (&cm->timer_wheels[i], + expired_cache_ts_timer_callback, + IOAM_CACHE_TS_TICK + /* timer period 100ms */ , + 10e4); + cm->timer_wheels[i].last_run_time = vlib_time_now (vm); + } + ioam_cache_ts_timer_node_enable (vm, 1); + return (1); +} + +always_inline void +ioam_cache_ts_timer_set (ioam_cache_main_t * cm, + ioam_cache_ts_entry_t * entry, u32 interval) +{ + entry->timer_handle + = tw_timer_start_16t_2w_512sl (&cm->timer_wheels[entry->pool_id], + entry->pool_index, 1, interval); +} + +always_inline void +ioam_cache_ts_timer_reset (ioam_cache_main_t * cm, + ioam_cache_ts_entry_t * entry) +{ + tw_timer_stop_16t_2w_512sl (&cm->timer_wheels[entry->pool_id], + entry->timer_handle); + entry->timer_handle = TIMER_HANDLE_INVALID; +} + +inline static void +ioam_cache_ts_entry_free (u32 thread_id, + ioam_cache_ts_entry_t * entry, u32 node_index) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + vlib_main_t *vm = cm->vlib_main; + vlib_frame_t *nf = 0; + u32 *to_next; + + if (entry) + { + if (entry->hbh != 0) + { + nf = vlib_get_frame_to_node (vm, node_index); + nf->n_vectors = 0; + to_next = vlib_frame_vector_args (nf); + nf->n_vectors = 1; + to_next[0] = entry->buffer_index; + vlib_put_frame_to_node (vm, node_index, nf); + } + pool_put (cm->ioam_ts_pool[thread_id], entry); + cm->ts_stats[thread_id].inuse--; + memset (entry, 0, sizeof (*entry)); + } +} + +inline static int +ioam_cache_ts_table_destroy (vlib_main_t * vm) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + ioam_cache_ts_entry_t *entry = 0; + int no_of_threads = vec_len (vlib_worker_threads); + int i; + + /* free pool and hash table */ + for (i = 0; i < no_of_threads; i++) + { + pool_foreach (entry, cm->ioam_ts_pool[i], ( + { + ioam_cache_ts_entry_free (i, + entry, + cm->error_node_index); + } + )); + pool_free (cm->ioam_ts_pool[i]); + cm->ioam_ts_pool = 0; + tw_timer_wheel_free_16t_2w_512sl (&cm->timer_wheels[i]); + } + vec_free (cm->ioam_ts_pool); + return (0); +} + +inline static int +ioam_cache_ts_entry_cleanup (u32 thread_id, u32 pool_index) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + ioam_cache_ts_entry_t *entry = 0; + + entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index); + ioam_cache_ts_entry_free (thread_id, entry, cm->error_node_index); + return (0); +} + +/* + * Caches buffer for ioam SR tunnel select for Anycast service + */ +inline static int +ioam_cache_ts_add (ip6_header_t * ip0, + u16 src_port, + u16 dst_port, + u32 seq_no, + u8 max_responses, u64 now, u32 thread_id, u32 * pool_index) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + ioam_cache_ts_entry_t *entry = 0; + + if (cm->ts_stats[thread_id].inuse == MAX_CACHE_TS_ENTRIES) + { + cm->ts_stats[thread_id].add_failed++; + return (-1); + } + + pool_get_aligned (cm->ioam_ts_pool[thread_id], entry, + CLIB_CACHE_LINE_BYTES); + memset (entry, 0, sizeof (*entry)); + *pool_index = entry - cm->ioam_ts_pool[thread_id]; + + clib_memcpy (entry->dst_address.as_u64, ip0->dst_address.as_u64, + sizeof (ip6_address_t)); + clib_memcpy (entry->src_address.as_u64, ip0->src_address.as_u64, + sizeof (ip6_address_t)); + entry->src_port = src_port; + entry->dst_port = dst_port; + entry->seq_no = seq_no; + entry->response_received = 0; + entry->max_responses = max_responses; + entry->created_at = now; + entry->hbh = 0; + entry->buffer_index = 0; + entry->pool_id = thread_id; + entry->pool_index = *pool_index; + ioam_cache_ts_timer_set (cm, entry, IOAM_CACHE_TS_TIMEOUT); + cm->ts_stats[thread_id].inuse++; + return (0); +} + +inline static void +ioam_cache_ts_send (u32 thread_id, i32 pool_index) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + ioam_cache_ts_entry_t *entry = 0; + + entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index); + if (!pool_is_free (cm->ioam_ts_pool[thread_id], entry) && entry) + { + /* send and free pool entry */ + ioam_cache_ts_entry_free (thread_id, entry, cm->ip6_hbh_pop_node_index); + } +} + +inline static void +ioam_cache_ts_check_and_send (u32 thread_id, i32 pool_index) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + ioam_cache_ts_entry_t *entry = 0; + entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index); + if (entry && entry->hbh) + { + if (entry->response_received == entry->max_responses || + entry->created_at + IOAM_CACHE_TS_TIMEOUT <= + vlib_time_now (cm->vlib_main)) + { + ioam_cache_ts_timer_reset (cm, entry); + ioam_cache_ts_send (thread_id, pool_index); + } + } +} + +inline static int +ioam_cache_ts_update (u32 thread_id, + i32 pool_index, + u32 buffer_index, ip6_hop_by_hop_header_t * hbh) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + ioam_cache_ts_entry_t *entry = 0; + vlib_main_t *vm = cm->vlib_main; + vlib_frame_t *nf = 0; + u32 *to_next; + + entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index); + if (!pool_is_free (cm->ioam_ts_pool[thread_id], entry) && entry) + { + /* drop existing buffer */ + if (entry->hbh != 0) + { + nf = vlib_get_frame_to_node (vm, cm->error_node_index); + nf->n_vectors = 0; + to_next = vlib_frame_vector_args (nf); + nf->n_vectors = 1; + to_next[0] = entry->buffer_index; + vlib_put_frame_to_node (vm, cm->error_node_index, nf); + } + /* update */ + entry->buffer_index = buffer_index; + entry->hbh = hbh; + /* check and send */ + ioam_cache_ts_check_and_send (thread_id, pool_index); + return (0); + } + return (-1); +} + +/* + * looks up the entry based on the e2e option pool index + * result = 0 found the entry + * result < 0 indicates failture to find an entry + */ +inline static int +ioam_cache_ts_lookup (ip6_header_t * ip0, + u8 protocol, + u16 src_port, + u16 dst_port, + u32 seq_no, + ip6_hop_by_hop_header_t ** hbh, + u32 * pool_index, u8 * thread_id, u8 response_seen) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + ip6_hop_by_hop_header_t *hbh0 = 0; + ioam_e2e_cache_option_t *e2e = 0; + + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + e2e = + (ioam_e2e_cache_option_t *) ((u8 *) hbh0 + cm->rewrite_pool_index_offset); + if ((u8 *) e2e < ((u8 *) hbh0 + ((hbh0->length + 1) << 3)) + && e2e->hdr.type == HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID) + { + ioam_cache_ts_entry_t *entry = 0; + *pool_index = e2e->pool_index; + *thread_id = e2e->pool_id; + entry = pool_elt_at_index (cm->ioam_ts_pool[*thread_id], *pool_index); + /* match */ + if (entry && + ip6_address_compare (&ip0->src_address, &entry->dst_address) == 0 && + ip6_address_compare (&ip0->dst_address, &entry->src_address) == 0 && + entry->src_port == dst_port && + entry->dst_port == src_port && entry->seq_no == seq_no) + { + *hbh = entry->hbh; + entry->response_received += response_seen; + return (0); + } + else if (entry) + { + return (-1); + } + } + return (-1); +} + +inline static u8 * +format_ioam_cache_ts_entry (u8 * s, va_list * args) +{ + ioam_cache_ts_entry_t *e = va_arg (*args, ioam_cache_ts_entry_t *); + u32 thread_id = va_arg (*args, u32); + ioam_cache_main_t *cm = &ioam_cache_main; + ioam_e2e_id_option_t *e2e = 0; + vlib_main_t *vm = cm->vlib_main; + clib_time_t *ct = &vm->clib_time; + + if (e && e->hbh) + { + e2e = + ip6_ioam_find_hbh_option (e->hbh, + HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID); + + s = + format (s, + "%d: %U:%d to %U:%d seq_no %u buffer %u %U \n\t\tCreated at %U Received %d\n", + (e - cm->ioam_ts_pool[thread_id]), format_ip6_address, + &e->src_address, e->src_port, format_ip6_address, + &e->dst_address, e->dst_port, e->seq_no, e->buffer_index, + format_ip6_address, e2e ? &e2e->id : 0, format_time_interval, + "h:m:s:u", + (e->created_at - + vm->cpu_time_main_loop_start) * ct->seconds_per_clock, + e->response_received); + } + else + { + s = + format (s, + "%d: %U:%d to %U:%d seq_no %u Buffer %u \n\t\tCreated at %U Received %d\n", + (e - cm->ioam_ts_pool[thread_id]), format_ip6_address, + &e->src_address, e->src_port, format_ip6_address, + &e->dst_address, e->dst_port, e->seq_no, e->buffer_index, + format_time_interval, "h:m:s:u", + (e->created_at - + vm->cpu_time_main_loop_start) * ct->seconds_per_clock, + e->response_received); + } + return s; +} + +/* + * Get extended rewrite string for iOAM data in v6 + * This makes space for an e2e options to carry cache pool info + * and manycast server address. + * It set the rewrite string per configs in ioam ip6 + new option + * for cache along with offset to the option to populate cache + * pool id and index + */ +static inline int +ip6_ioam_ts_cache_set_rewrite (void) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + ioam_cache_main_t *cm = &ioam_cache_main; + ip6_hop_by_hop_header_t *hbh; + u32 rewrite_len = 0; + ioam_e2e_cache_option_t *e2e = 0; + ioam_e2e_id_option_t *e2e_id = 0; + + vec_free (cm->rewrite); + ip6_ioam_set_rewrite (&(cm->rewrite), hm->has_trace_option, + hm->has_pot_option, hm->has_seqno_option); + hbh = (ip6_hop_by_hop_header_t *) cm->rewrite; + rewrite_len = ((hbh->length + 1) << 3); + vec_validate (cm->rewrite, + rewrite_len - 1 + IOAM_E2E_CACHE_OPTION_RND + + IOAM_E2E_ID_OPTION_RND); + hbh = (ip6_hop_by_hop_header_t *) cm->rewrite; + /* setup e2e id option to insert pool id and index of the node caching it */ + hbh->length += IOAM_E2E_CACHE_HBH_EXT_LEN + IOAM_E2E_ID_HBH_EXT_LEN; + cm->rewrite_pool_index_offset = rewrite_len; + e2e = (ioam_e2e_cache_option_t *) (cm->rewrite + rewrite_len); + e2e->hdr.type = HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID + | HBH_OPTION_TYPE_SKIP_UNKNOWN; + e2e->hdr.length = sizeof (ioam_e2e_cache_option_t) - + sizeof (ip6_hop_by_hop_option_t); + e2e->e2e_type = 2; + e2e_id = + (ioam_e2e_id_option_t *) ((u8 *) e2e + sizeof (ioam_e2e_cache_option_t)); + e2e_id->hdr.type = + HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID | HBH_OPTION_TYPE_SKIP_UNKNOWN; + e2e_id->hdr.length = + sizeof (ioam_e2e_id_option_t) - sizeof (ip6_hop_by_hop_option_t); + e2e_id->e2e_type = 1; + + return (0); +} + +static inline int +ip6_ioam_ts_cache_cleanup_rewrite (void) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + + vec_free (cm->rewrite); + cm->rewrite = 0; + cm->rewrite_pool_index_offset = 0; + return (0); +} +#endif /* __included_ioam_cache_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/ip6/ioam_cache_all_api_h.h b/src/plugins/ioam/ip6/ioam_cache_all_api_h.h new file mode 100644 index 00000000000..61272a5187e --- /dev/null +++ b/src/plugins/ioam/ip6/ioam_cache_all_api_h.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include diff --git a/src/plugins/ioam/ip6/ioam_cache_msg_enum.h b/src/plugins/ioam/ip6/ioam_cache_msg_enum.h new file mode 100644 index 00000000000..8afd067b808 --- /dev/null +++ b/src/plugins/ioam/ip6/ioam_cache_msg_enum.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_ioam_cache_msg_enum_h +#define included_ioam_cache_msg_enum_h + +#include + +#define vl_msg_id(n,h) n, +typedef enum { +#include + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_ioam_cache_msg_enum_h */ diff --git a/src/plugins/ioam/ip6/ioam_cache_node.c b/src/plugins/ioam/ip6/ioam_cache_node.c new file mode 100644 index 00000000000..6c8a038a4c1 --- /dev/null +++ b/src/plugins/ioam/ip6/ioam_cache_node.c @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * This file implements caching of ioam header and reattaching + * it in response message by performing request-response matching. + * Works for TCP SYN/SYN-ACK. + * This feature is used for anycast server selection. + * ioam data thus cached is used to measure and get complete round trip + * network path to help in server selection. + * There are 2 graph nodes defined to : + * 1. process packets that contain iOAM header and cache it + * 2. process TCP SYN-ACKs and reattach ioam header from the + * cache corresponding to TCP-SYN + * These graph nodes are attached to the vnet graph based on + * ioam cache and classifier configs. + * e.g. + * If db06::06 is the anycast service IP6 address: + * + * set ioam ip6 cache + * + * Apply this classifier on interface where requests for anycast service are received: + * classify session acl-hit-next ip6-node ip6-lookup table-index 0 match l3 ip6 dst db06::06 + * ioam-decap anycast <<< ioam-decap is hooked to cache when set ioam ip6 cache is enabled + * + * Apply this classifier on interface where responses from anycast service are received: + * classify session acl-hit-next ip6-node ip6-add-from-cache-hop-by-hop table-index 0 match l3 + * ip6 src db06::06 ioam-encap anycast-response + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct +{ + u32 next_index; + u32 flow_label; +} cache_trace_t; + +/* packet trace format function */ +static u8 * +format_cache_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + cache_trace_t *t = va_arg (*args, cache_trace_t *); + + s = format (s, "CACHE: flow_label %d, next index %d", + t->flow_label, t->next_index); + return s; +} + +vlib_node_registration_t ioam_cache_node; + +#define foreach_cache_error \ +_(RECORDED, "ip6 iOAM headers cached") + +typedef enum +{ +#define _(sym,str) CACHE_ERROR_##sym, + foreach_cache_error +#undef _ + CACHE_N_ERROR, +} cache_error_t; + +static char *cache_error_strings[] = { +#define _(sym,string) string, + foreach_cache_error +#undef _ +}; + +typedef enum +{ + IOAM_CACHE_NEXT_POP_HBYH, + IOAM_CACHE_N_NEXT, +} cache_next_t; + +static uword +ip6_ioam_cache_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + cache_next_t next_index; + u32 recorded = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + // TODO: Dual loop + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *p0; + u32 next0 = IOAM_CACHE_NEXT_POP_HBYH; + ip6_header_t *ip0; + ip6_hop_by_hop_header_t *hbh0; + tcp_header_t *tcp0; + u32 tcp_offset0; + + /* speculatively enqueue p0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + if (IP_PROTOCOL_TCP == + ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, &tcp_offset0)) + { + tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0); + if ((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN && + (tcp0->flags & TCP_FLAG_ACK) == 0) + { + /* Cache the ioam hbh header */ + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + if (0 == ioam_cache_add (p0, + ip0, + clib_net_to_host_u16 + (tcp0->src_port), + clib_net_to_host_u16 + (tcp0->dst_port), hbh0, + clib_net_to_host_u32 + (tcp0->seq_number) + 1)) + { + recorded++; + } + } + } + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (p0->flags & VLIB_BUFFER_IS_TRACED) + { + cache_trace_t *t = + vlib_add_trace (vm, node, p0, sizeof (*t)); + t->flow_label = + clib_net_to_host_u32 + (ip0->ip_version_traffic_class_and_flow_label); + t->next_index = next0; + } + } + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ioam_cache_node.index, + CACHE_ERROR_RECORDED, recorded); + return frame->n_vectors; +} + +/* + * Node for IP6 iOAM header cache + */ +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ioam_cache_node) = +{ + .function = ip6_ioam_cache_node_fn, + .name = "ip6-ioam-cache", + .vector_size = sizeof (u32), + .format_trace = format_cache_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (cache_error_strings), + .error_strings = cache_error_strings, + .n_next_nodes = IOAM_CACHE_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = + { + [IOAM_CACHE_NEXT_POP_HBYH] = "ip6-pop-hop-by-hop" + }, +}; +/* *INDENT-ON* */ + +typedef struct +{ + u32 next_index; +} ip6_add_from_cache_hbh_trace_t; + +/* packet trace format function */ +static u8 * +format_ip6_add_from_cache_hbh_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_add_from_cache_hbh_trace_t *t = va_arg (*args, + ip6_add_from_cache_hbh_trace_t + *); + + s = format (s, "IP6_ADD_FROM_CACHE_HBH: next index %d", t->next_index); + return s; +} + +vlib_node_registration_t ip6_add_from_cache_hbh_node; + +#define foreach_ip6_add_from_cache_hbh_error \ +_(PROCESSED, "Pkts w/ added ip6 hop-by-hop options") + +typedef enum +{ +#define _(sym,str) IP6_ADD_FROM_CACHE_HBH_ERROR_##sym, + foreach_ip6_add_from_cache_hbh_error +#undef _ + IP6_ADD_FROM_CACHE_HBH_N_ERROR, +} ip6_add_from_cache_hbh_error_t; + +static char *ip6_add_from_cache_hbh_error_strings[] = { +#define _(sym,string) string, + foreach_ip6_add_from_cache_hbh_error +#undef _ +}; + +#define foreach_ip6_ioam_cache_input_next \ + _(IP6_LOOKUP, "ip6-lookup") \ + _(DROP, "error-drop") + +typedef enum +{ +#define _(s,n) IP6_IOAM_CACHE_INPUT_NEXT_##s, + foreach_ip6_ioam_cache_input_next +#undef _ + IP6_IOAM_CACHE_INPUT_N_NEXT, +} ip6_ioam_cache_input_next_t; + + +static uword +ip6_add_from_cache_hbh_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + u32 n_left_from, *from, *to_next; + ip_lookup_next_t next_index; + u32 processed = 0; + u8 *rewrite = 0; + u32 rewrite_len = 0; + u32 sr_rewrite_len = vec_len (cm->sr_rewrite_template); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + // TODO: Dual loop + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + ip6_header_t *ip0; + ip6_hop_by_hop_header_t *hbh0; + ip6_sr_header_t *srh0 = 0; + u64 *copy_src0, *copy_dst0; + u16 new_l0; + tcp_header_t *tcp0; + u32 tcp_offset0; + ioam_cache_entry_t *entry = 0; + + next0 = IP6_IOAM_CACHE_INPUT_NEXT_IP6_LOOKUP; + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + if (IP_PROTOCOL_TCP != + ip6_locate_header (b0, ip0, IP_PROTOCOL_TCP, &tcp_offset0)) + { + goto TRACE0; + } + tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0); + if (((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN && + (tcp0->flags & TCP_FLAG_ACK) == TCP_FLAG_ACK) || + (tcp0->flags & TCP_FLAG_RST) == TCP_FLAG_RST) + { + if (0 != (entry = ioam_cache_lookup (ip0, + clib_net_to_host_u16 + (tcp0->src_port), + clib_net_to_host_u16 + (tcp0->dst_port), + clib_net_to_host_u32 + (tcp0->ack_number)))) + { + rewrite = entry->ioam_rewrite_string; + rewrite_len = vec_len (rewrite); + } + else + { + next0 = IP6_IOAM_CACHE_INPUT_NEXT_DROP; + goto TRACE0; + } + } + else + goto TRACE0; + + + /* Copy the ip header left by the required amount */ + copy_dst0 = (u64 *) (((u8 *) ip0) - (rewrite_len + sr_rewrite_len)); + copy_src0 = (u64 *) ip0; + + copy_dst0[0] = copy_src0[0]; + copy_dst0[1] = copy_src0[1]; + copy_dst0[2] = copy_src0[2]; + copy_dst0[3] = copy_src0[3]; + copy_dst0[4] = copy_src0[4]; + vlib_buffer_advance (b0, -(word) (rewrite_len + sr_rewrite_len)); + ip0 = vlib_buffer_get_current (b0); + + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + srh0 = (ip6_sr_header_t *) ((u8 *) hbh0 + rewrite_len); + /* $$$ tune, rewrite_len is a multiple of 8 */ + clib_memcpy (hbh0, rewrite, rewrite_len); + clib_memcpy (srh0, cm->sr_rewrite_template, sr_rewrite_len); + /* Copy dst address into the DA slot in the segment list */ + clib_memcpy (srh0->segments, ip0->dst_address.as_u64, + sizeof (ip6_address_t)); + /* Rewrite the ip6 dst address with the first hop */ + clib_memcpy (ip0->dst_address.as_u64, entry->next_hop.as_u64, + sizeof (ip6_address_t)); + clib_memcpy (&srh0->segments[1], + (u8 *) hbh0 + entry->my_address_offset, + sizeof (ip6_address_t)); + srh0->segments_left--; + ioam_cache_entry_free (entry); + + /* Patch the protocol chain, insert the h-b-h (type 0) header */ + srh0->protocol = ip0->protocol; + hbh0->protocol = IPPROTO_IPV6_ROUTE; + ip0->protocol = 0; + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + rewrite_len + + sr_rewrite_len; + ip0->payload_length = clib_host_to_net_u16 (new_l0); + processed++; + TRACE0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip6_add_from_cache_hbh_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ip6_add_from_cache_hbh_node.index, + IP6_ADD_FROM_CACHE_HBH_ERROR_PROCESSED, + processed); + return frame->n_vectors; +} +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_add_from_cache_hbh_node) = +{ + .function = ip6_add_from_cache_hbh_node_fn, + .name = "ip6-add-from-cache-hop-by-hop", + .vector_size = sizeof (u32), + .format_trace = format_ip6_add_from_cache_hbh_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (ip6_add_from_cache_hbh_error_strings), + .error_strings = ip6_add_from_cache_hbh_error_strings, + /* See ip/lookup.h */ + .n_next_nodes = IP6_IOAM_CACHE_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [IP6_IOAM_CACHE_INPUT_NEXT_##s] = n, + foreach_ip6_ioam_cache_input_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_add_from_cache_hbh_node, + ip6_add_from_cache_hbh_node_fn) +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c new file mode 100644 index 00000000000..3df9871e5f6 --- /dev/null +++ b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c @@ -0,0 +1,770 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ioam_cache_tunnel_select_node.c + * This file implements anycast server selection using ioam data + * attached to anycast service selection. + * Anycast service is reachable via multiple servers reachable + * over SR tunnels. + * Works with TCP Anycast application. + * Cache entry is created when TCP SYN is received for anycast destination. + * Response TCP SYN ACKs for anycast service is compared and selected + * response is forwarded. + * The functionality is introduced via graph nodes that are hooked into + * vnet graph via classifier configs like below: + * + * Enable anycast service selection: + * set ioam ip6 sr-tunnel-select oneway + * + * Enable following classifier on the anycast service client facing interface + * e.g. anycast service is db06::06 then: + * classify session acl-hit-next ip6-node ip6-add-syn-hop-by-hop table-index 0 match l3 + * ip6 dst db06::06 ioam-encap anycast + * + * Enable following classifier on the interfaces facing the server of anycast service: + * classify session acl-hit-next ip6-node ip6-lookup table-index 0 match l3 + * ip6 src db06::06 ioam-decap anycast + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct +{ + u32 next_index; + u32 flow_label; +} cache_ts_trace_t; + +/* packet trace format function */ +static u8 * +format_cache_ts_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + cache_ts_trace_t *t = va_arg (*args, cache_ts_trace_t *); + + s = format (s, "CACHE: flow_label %d, next index %d", + t->flow_label, t->next_index); + return s; +} + +vlib_node_registration_t ioam_cache_ts_node; + +#define foreach_cache_ts_error \ +_(RECORDED, "ip6 iOAM headers cached") + +typedef enum +{ +#define _(sym,str) CACHE_TS_ERROR_##sym, + foreach_cache_ts_error +#undef _ + CACHE_TS_N_ERROR, +} cache_ts_error_t; + +static char *cache_ts_error_strings[] = { +#define _(sym,string) string, + foreach_cache_ts_error +#undef _ +}; + +typedef enum +{ + IOAM_CACHE_TS_NEXT_POP_HBYH, + IOAM_CACHE_TS_ERROR_NEXT_DROP, + IOAM_CACHE_TS_N_NEXT, +} cache_ts_next_t; + +static uword +ip6_ioam_cache_ts_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + u32 n_left_from, *from, *to_next; + cache_ts_next_t next_index; + u32 recorded = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + // TODO: dual loop + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *p0; + u32 next0 = IOAM_CACHE_TS_NEXT_POP_HBYH; + ip6_header_t *ip0; + ip6_hop_by_hop_header_t *hbh0, *hbh_cmp; + tcp_header_t *tcp0; + u32 tcp_offset0; + u32 cache_ts_index = 0; + u8 cache_thread_id = 0; + int result = 0; + int skip = 0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + if (IP_PROTOCOL_TCP == + ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, &tcp_offset0)) + { + tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0); + if ((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN && + (tcp0->flags & TCP_FLAG_ACK) == TCP_FLAG_ACK) + { + /* Look up and compare */ + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + + if (0 == ioam_cache_ts_lookup (ip0, + hbh0->protocol, + clib_net_to_host_u16 + (tcp0->src_port), + clib_net_to_host_u16 + (tcp0->dst_port), + clib_net_to_host_u32 + (tcp0->ack_number), &hbh_cmp, + &cache_ts_index, + &cache_thread_id, 1)) + { + /* response seen */ + result = -1; + if (hbh_cmp) + result = + ip6_ioam_analyse_compare_path_delay (hbh0, hbh_cmp, + cm->criteria_oneway); + if (result >= 0) + { + /* current syn/ack is worse than the earlier: Drop */ + next0 = IOAM_CACHE_TS_ERROR_NEXT_DROP; + /* Check if all responses are received or time has exceeded + send cached response if yes */ + ioam_cache_ts_check_and_send (cache_thread_id, + cache_ts_index); + } + else + { + /* Update cache with this buffer */ + /* If successfully updated then skip sending it */ + if (0 == + (result = + ioam_cache_ts_update (cache_thread_id, + cache_ts_index, bi0, + hbh0))) + { + skip = 1; + } + else + next0 = IOAM_CACHE_TS_ERROR_NEXT_DROP; + } + } + else + { + next0 = IOAM_CACHE_TS_ERROR_NEXT_DROP; + } + } + else if ((tcp0->flags & TCP_FLAG_RST) == TCP_FLAG_RST) + { + /* Look up and compare */ + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + if (0 == ioam_cache_ts_lookup (ip0, hbh0->protocol, clib_net_to_host_u16 (tcp0->src_port), clib_net_to_host_u16 (tcp0->dst_port), clib_net_to_host_u32 (tcp0->ack_number), &hbh_cmp, &cache_ts_index, &cache_thread_id, 1)) //response seen + { + next0 = IOAM_CACHE_TS_ERROR_NEXT_DROP; + if (hbh_cmp) + ioam_cache_ts_check_and_send (cache_thread_id, + cache_ts_index); + } + + } + } + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (p0->flags & VLIB_BUFFER_IS_TRACED) + { + cache_ts_trace_t *t = + vlib_add_trace (vm, node, p0, sizeof (*t)); + t->flow_label = + clib_net_to_host_u32 + (ip0->ip_version_traffic_class_and_flow_label); + t->next_index = next0; + } + } + /* verify speculative enqueue, maybe switch current next frame */ + if (!skip) + { + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, ioam_cache_ts_node.index, + CACHE_TS_ERROR_RECORDED, recorded); + return frame->n_vectors; +} + +/* + * Node for IP6 iOAM header cache + */ +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ioam_cache_ts_node) = +{ + .function = ip6_ioam_cache_ts_node_fn, + .name = "ip6-ioam-tunnel-select", + .vector_size = sizeof (u32), + .format_trace = format_cache_ts_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (cache_ts_error_strings), + .error_strings = cache_ts_error_strings, + .n_next_nodes = IOAM_CACHE_TS_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = + { + [IOAM_CACHE_TS_NEXT_POP_HBYH] = "ip6-pop-hop-by-hop", + [IOAM_CACHE_TS_ERROR_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +typedef struct +{ + u32 next_index; +} ip6_reset_ts_hbh_trace_t; + +/* packet trace format function */ +static u8 * +format_ip6_reset_ts_hbh_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_reset_ts_hbh_trace_t *t = va_arg (*args, + ip6_reset_ts_hbh_trace_t *); + + s = + format (s, "IP6_IOAM_RESET_TUNNEL_SELECT_HBH: next index %d", + t->next_index); + return s; +} + +vlib_node_registration_t ip6_reset_ts_hbh_node; + +#define foreach_ip6_reset_ts_hbh_error \ +_(PROCESSED, "iOAM Syn/Ack Pkts processed") \ +_(SAVED, "iOAM Syn Pkts state saved") \ +_(REMOVED, "iOAM Syn/Ack Pkts state removed") + +typedef enum +{ +#define _(sym,str) IP6_RESET_TS_HBH_ERROR_##sym, + foreach_ip6_reset_ts_hbh_error +#undef _ + IP6_RESET_TS_HBH_N_ERROR, +} ip6_reset_ts_hbh_error_t; + +static char *ip6_reset_ts_hbh_error_strings[] = { +#define _(sym,string) string, + foreach_ip6_reset_ts_hbh_error +#undef _ +}; + +#define foreach_ip6_ioam_cache_ts_input_next \ + _(IP6_LOOKUP, "ip6-lookup") \ + _(DROP, "error-drop") + +typedef enum +{ +#define _(s,n) IP6_IOAM_CACHE_TS_INPUT_NEXT_##s, + foreach_ip6_ioam_cache_ts_input_next +#undef _ + IP6_IOAM_CACHE_TS_INPUT_N_NEXT, +} ip6_ioam_cache_ts_input_next_t; + + +static uword +ip6_reset_ts_hbh_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + u32 n_left_from, *from, *to_next; + ip_lookup_next_t next_index; + u32 processed = 0, cache_ts_added = 0; + u64 now; + u8 *rewrite = cm->rewrite; + u32 rewrite_length = vec_len (rewrite); + ioam_e2e_cache_option_t *e2e = 0; + u8 no_of_responses = cm->wait_for_responses; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + now = vlib_time_now (vm); + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + ip6_header_t *ip0, *ip1; + tcp_header_t *tcp0, *tcp1; + u32 tcp_offset0, tcp_offset1; + ip6_hop_by_hop_header_t *hbh0, *hbh1; + u64 *copy_src0, *copy_dst0, *copy_src1, *copy_dst1; + u16 new_l0, new_l1; + u32 pool_index0 = 0, pool_index1 = 0; + + next0 = next1 = IP6_IOAM_CACHE_TS_INPUT_NEXT_IP6_LOOKUP; + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + } + + + /* speculatively enqueue b0 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + if (IP_PROTOCOL_TCP != + ip6_locate_header (b0, ip0, IP_PROTOCOL_TCP, &tcp_offset0)) + { + goto NEXT00; + } + tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0); + if ((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN && + (tcp0->flags & TCP_FLAG_ACK) == 0) + { + if (no_of_responses > 0) + { + /* Create TS select entry */ + if (0 == ioam_cache_ts_add (ip0, + clib_net_to_host_u16 + (tcp0->src_port), + clib_net_to_host_u16 + (tcp0->dst_port), + clib_net_to_host_u32 + (tcp0->seq_number) + 1, + no_of_responses, now, + vm->cpu_index, &pool_index0)) + { + cache_ts_added++; + } + } + copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length); + copy_src0 = (u64 *) ip0; + + copy_dst0[0] = copy_src0[0]; + copy_dst0[1] = copy_src0[1]; + copy_dst0[2] = copy_src0[2]; + copy_dst0[3] = copy_src0[3]; + copy_dst0[4] = copy_src0[4]; + + vlib_buffer_advance (b0, -(word) rewrite_length); + ip0 = vlib_buffer_get_current (b0); + + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + /* $$$ tune, rewrite_length is a multiple of 8 */ + clib_memcpy (hbh0, rewrite, rewrite_length); + e2e = + (ioam_e2e_cache_option_t *) ((u8 *) hbh0 + + cm->rewrite_pool_index_offset); + e2e->pool_id = (u8) vm->cpu_index; + e2e->pool_index = pool_index0; + ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *) + ((u8 *) e2e + + sizeof (ioam_e2e_cache_option_t)), + b0); + /* Patch the protocol chain, insert the h-b-h (type 0) header */ + hbh0->protocol = ip0->protocol; + ip0->protocol = 0; + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + rewrite_length; + ip0->payload_length = clib_host_to_net_u16 (new_l0); + processed++; + } + + NEXT00: + if (IP_PROTOCOL_TCP != + ip6_locate_header (b1, ip1, IP_PROTOCOL_TCP, &tcp_offset1)) + { + goto TRACE00; + } + tcp1 = (tcp_header_t *) ((u8 *) ip1 + tcp_offset1); + if ((tcp1->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN && + (tcp1->flags & TCP_FLAG_ACK) == 0) + { + if (no_of_responses > 0) + { + /* Create TS select entry */ + if (0 == ioam_cache_ts_add (ip1, + clib_net_to_host_u16 + (tcp1->src_port), + clib_net_to_host_u16 + (tcp1->dst_port), + clib_net_to_host_u32 + (tcp1->seq_number) + 1, + no_of_responses, now, + vm->cpu_index, &pool_index1)) + { + cache_ts_added++; + } + } + + copy_dst1 = (u64 *) (((u8 *) ip1) - rewrite_length); + copy_src1 = (u64 *) ip1; + + copy_dst1[0] = copy_src1[0]; + copy_dst1[1] = copy_src1[1]; + copy_dst1[2] = copy_src1[2]; + copy_dst1[3] = copy_src1[3]; + copy_dst1[4] = copy_src1[4]; + + vlib_buffer_advance (b1, -(word) rewrite_length); + ip1 = vlib_buffer_get_current (b1); + + hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1); + /* $$$ tune, rewrite_length is a multiple of 8 */ + clib_memcpy (hbh1, rewrite, rewrite_length); + e2e = + (ioam_e2e_cache_option_t *) ((u8 *) hbh1 + + cm->rewrite_pool_index_offset); + e2e->pool_id = (u8) vm->cpu_index; + e2e->pool_index = pool_index1; + ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *) + ((u8 *) e2e + + sizeof (ioam_e2e_cache_option_t)), + b1); + /* Patch the protocol chain, insert the h-b-h (type 0) header */ + hbh1->protocol = ip1->protocol; + ip1->protocol = 0; + new_l1 = + clib_net_to_host_u16 (ip1->payload_length) + rewrite_length; + ip1->payload_length = clib_host_to_net_u16 (new_l1); + processed++; + } + + TRACE00: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_reset_ts_hbh_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_reset_ts_hbh_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->next_index = next1; + } + + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + ip6_header_t *ip0; + tcp_header_t *tcp0; + u32 tcp_offset0; + ip6_hop_by_hop_header_t *hbh0; + u64 *copy_src0, *copy_dst0; + u16 new_l0; + u32 pool_index0 = 0; + + next0 = IP6_IOAM_CACHE_TS_INPUT_NEXT_IP6_LOOKUP; + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + if (IP_PROTOCOL_TCP != + ip6_locate_header (b0, ip0, IP_PROTOCOL_TCP, &tcp_offset0)) + { + goto TRACE0; + } + tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0); + if ((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN && + (tcp0->flags & TCP_FLAG_ACK) == 0) + { + if (no_of_responses > 0) + { + /* Create TS select entry */ + if (0 == ioam_cache_ts_add (ip0, + clib_net_to_host_u16 + (tcp0->src_port), + clib_net_to_host_u16 + (tcp0->dst_port), + clib_net_to_host_u32 + (tcp0->seq_number) + 1, + no_of_responses, now, + vm->cpu_index, &pool_index0)) + { + cache_ts_added++; + } + } + copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length); + copy_src0 = (u64 *) ip0; + + copy_dst0[0] = copy_src0[0]; + copy_dst0[1] = copy_src0[1]; + copy_dst0[2] = copy_src0[2]; + copy_dst0[3] = copy_src0[3]; + copy_dst0[4] = copy_src0[4]; + + vlib_buffer_advance (b0, -(word) rewrite_length); + ip0 = vlib_buffer_get_current (b0); + + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + /* $$$ tune, rewrite_length is a multiple of 8 */ + clib_memcpy (hbh0, rewrite, rewrite_length); + e2e = + (ioam_e2e_cache_option_t *) ((u8 *) hbh0 + + cm->rewrite_pool_index_offset); + e2e->pool_id = (u8) vm->cpu_index; + e2e->pool_index = pool_index0; + ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *) + ((u8 *) e2e + + sizeof (ioam_e2e_cache_option_t)), + b0); + /* Patch the protocol chain, insert the h-b-h (type 0) header */ + hbh0->protocol = ip0->protocol; + ip0->protocol = 0; + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + rewrite_length; + ip0->payload_length = clib_host_to_net_u16 (new_l0); + processed++; + } + TRACE0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip6_reset_ts_hbh_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ip6_reset_ts_hbh_node.index, + IP6_RESET_TS_HBH_ERROR_PROCESSED, processed); + vlib_node_increment_counter (vm, ip6_reset_ts_hbh_node.index, + IP6_RESET_TS_HBH_ERROR_SAVED, cache_ts_added); + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_reset_ts_hbh_node) = +{ + .function = ip6_reset_ts_hbh_node_fn, + .name = "ip6-add-syn-hop-by-hop", + .vector_size = sizeof (u32), + .format_trace = format_ip6_reset_ts_hbh_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (ip6_reset_ts_hbh_error_strings), + .error_strings = ip6_reset_ts_hbh_error_strings, + /* See ip/lookup.h */ + .n_next_nodes = IP6_IOAM_CACHE_TS_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [IP6_IOAM_CACHE_TS_INPUT_NEXT_##s] = n, + foreach_ip6_ioam_cache_ts_input_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_reset_ts_hbh_node, ip6_reset_ts_hbh_node_fn) +/* *INDENT-ON* */ + +vlib_node_registration_t ioam_cache_ts_timer_tick_node; + +typedef struct +{ + u32 thread_index; +} ioam_cache_ts_timer_tick_trace_t; + +/* packet trace format function */ +static u8 * +format_ioam_cache_ts_timer_tick_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ioam_cache_ts_timer_tick_trace_t *t = + va_arg (*args, ioam_cache_ts_timer_tick_trace_t *); + + s = format (s, "IOAM_CACHE_TS_TIMER_TICK: thread index %d", + t->thread_index); + return s; +} + +#define foreach_ioam_cache_ts_timer_tick_error \ + _(TIMER, "Timer events") + +typedef enum +{ +#define _(sym,str) IOAM_CACHE_TS_TIMER_TICK_ERROR_##sym, + foreach_ioam_cache_ts_timer_tick_error +#undef _ + IOAM_CACHE_TS_TIMER_TICK_N_ERROR, +} ioam_cache_ts_timer_tick_error_t; + +static char *ioam_cache_ts_timer_tick_error_strings[] = { +#define _(sym,string) string, + foreach_ioam_cache_ts_timer_tick_error +#undef _ +}; + +void +ioam_cache_ts_timer_node_enable (vlib_main_t * vm, u8 enable) +{ + vlib_node_set_state (vm, ioam_cache_ts_timer_tick_node.index, + enable == + 0 ? VLIB_NODE_STATE_DISABLED : + VLIB_NODE_STATE_POLLING); +} + +void +expired_cache_ts_timer_callback (u32 * expired_timers) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + int i; + u32 pool_index; + u32 thread_index = os_get_cpu_number (); + u32 count = 0; + + for (i = 0; i < vec_len (expired_timers); i++) + { + /* Get pool index and pool id */ + pool_index = expired_timers[i] & 0x0FFFFFFF; + + /* Handle expiration */ + ioam_cache_ts_send (thread_index, pool_index); + count++; + } + vlib_node_increment_counter (cm->vlib_main, + ioam_cache_ts_timer_tick_node.index, + IOAM_CACHE_TS_TIMER_TICK_ERROR_TIMER, count); +} + +static uword +ioam_cache_ts_timer_tick_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + ioam_cache_main_t *cm = &ioam_cache_main; + u32 my_thread_index = os_get_cpu_number (); + struct timespec ts, tsrem; + + tw_timer_expire_timers_16t_2w_512sl (&cm->timer_wheels[my_thread_index], + vlib_time_now (vm)); + ts.tv_sec = 0; + ts.tv_nsec = 1000 * 1000 * IOAM_CACHE_TS_TICK; + while (nanosleep (&ts, &tsrem) < 0) + { + ts = tsrem; + } + + return 0; +} +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ioam_cache_ts_timer_tick_node) = { + .function = ioam_cache_ts_timer_tick_node_fn, + .name = "ioam-cache-ts-timer-tick", + .format_trace = format_ioam_cache_ts_timer_tick_trace, + .type = VLIB_NODE_TYPE_INPUT, + + .n_errors = ARRAY_LEN(ioam_cache_ts_timer_tick_error_strings), + .error_strings = ioam_cache_ts_timer_tick_error_strings, + + .n_next_nodes = 1, + + .state = VLIB_NODE_STATE_DISABLED, + + /* edit / add dispositions here */ + .next_nodes = { + [0] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-trace/trace_util.h b/src/plugins/ioam/lib-trace/trace_util.h index 60802adea64..7065b41e9c9 100644 --- a/src/plugins/ioam/lib-trace/trace_util.h +++ b/src/plugins/ioam/lib-trace/trace_util.h @@ -213,8 +213,6 @@ typedef struct u32 app_data; } ioam_trace_ts_app_t; - - static inline u8 fetch_trace_data_size (u8 trace_type) { diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index f258b755741..9dfb8127124 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -187,10 +187,15 @@ typedef enum fib_entry_attribute_t_ { * The prefix/address is local to this device */ FIB_ENTRY_ATTRIBUTE_LOCAL, + /** + * The prefix/address exempted from loose uRPF check + * To be used with caution + */ + FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT, /** * Marker. add new entries before this one. */ - FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_LOCAL, + FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT, } fib_entry_attribute_t; /** @@ -205,6 +210,7 @@ typedef enum fib_entry_attribute_t_ { [FIB_ENTRY_ATTRIBUTE_DROP] = "drop", \ [FIB_ENTRY_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ [FIB_ENTRY_ATTRIBUTE_LOCAL] = "local", \ + [FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT] = "uRPF-exempt" \ } #define FOR_EACH_FIB_ATTRIBUTE(_item) \ @@ -220,6 +226,7 @@ typedef enum fib_entry_flag_t_ { FIB_ENTRY_FLAG_EXCLUSIVE = (1 << FIB_ENTRY_ATTRIBUTE_EXCLUSIVE), FIB_ENTRY_FLAG_LOCAL = (1 << FIB_ENTRY_ATTRIBUTE_LOCAL), FIB_ENTRY_FLAG_IMPORT = (1 << FIB_ENTRY_ATTRIBUTE_IMPORT), + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT = (1 << FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT), } __attribute__((packed)) fib_entry_flag_t; /** diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c index 6f5b7fee485..feb232df7f1 100644 --- a/src/vnet/fib/fib_entry_src.c +++ b/src/vnet/fib/fib_entry_src.c @@ -446,8 +446,9 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry, */ index_t ui = fib_path_list_get_urpf(esrc->fes_pl); - if (fib_entry_is_sourced(fib_entry_get_index(fib_entry), - FIB_SOURCE_URPF_EXEMPT) && + if ((fib_entry_is_sourced(fib_entry_get_index(fib_entry), + FIB_SOURCE_URPF_EXEMPT) || + (esrc->fes_entry_flags & FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT))&& (0 == fib_urpf_check_size(ui))) { /* diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index 2615fbfab02..cf40fbb37aa 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -474,6 +474,84 @@ ip6_compute_flow_hash (const ip6_header_t * ip, return (u32) c; } +/* ip6_locate_header + * + * This function is to search for the header specified by the protocol number + * in find_hdr_type. + * This is used to locate a specific IPv6 extension header + * or to find transport layer header. + * 1. If the find_hdr_type < 0 then it finds and returns the protocol number and + * offset stored in *offset of the transport or ESP header in the chain if + * found. + * 2. If a header with find_hdr_type > 0 protocol number is found then the + * offset is stored in *offset and protocol number of the header is + * returned. + * 3. If find_hdr_type is not found or packet is malformed or + * it is a non-first fragment -1 is returned. + */ +always_inline int +ip6_locate_header (vlib_buffer_t * p0, + ip6_header_t * ip0, int find_hdr_type, u32 * offset) +{ + u8 next_proto = ip0->protocol; + u8 *next_header; + u8 done = 0; + u32 cur_offset; + u8 *temp_nxthdr = 0; + u32 exthdr_len = 0; + + next_header = ip6_next_header (ip0); + cur_offset = sizeof (ip6_header_t); + while (1) + { + done = (next_proto == find_hdr_type); + if (PREDICT_FALSE + (next_header >= + (u8 *) vlib_buffer_get_current (p0) + p0->current_length)) + { + //A malicious packet could set an extension header with a too big size + return (-1); + } + if (done) + break; + if ((!ip6_ext_hdr (next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT) + { + if (find_hdr_type < 0) + break; + return -1; + } + if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) next_header; + u16 frag_off = ip6_frag_hdr_offset (frag_hdr); + /* Non first fragment return -1 */ + if (frag_off) + return (-1); + exthdr_len = sizeof (ip6_frag_hdr_t); + temp_nxthdr = next_header + exthdr_len; + } + else if (next_proto == IP_PROTOCOL_IPSEC_AH) + { + exthdr_len = + ip6_ext_authhdr_len (((ip6_ext_header_t *) next_header)); + temp_nxthdr = next_header + exthdr_len; + } + else + { + exthdr_len = + ip6_ext_header_len (((ip6_ext_header_t *) next_header)); + temp_nxthdr = next_header + exthdr_len; + } + next_proto = ((ip6_ext_header_t *) next_header)->next_hdr; + next_header = temp_nxthdr; + cur_offset += exthdr_len; + } + + *offset = cur_offset; + return (next_proto); +} + +u8 *format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args); /* * Hop-by-Hop handling */ diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 6f77c6dd69d..2388a30e7ed 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -1236,80 +1236,6 @@ ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) return p0->flags; } -/* ip6_locate_header - * - * This function is to search for the header specified by the find_hdr number. - * 1. If the find_hdr < 0 then it finds and returns the protocol number and - * offset stored in *offset of the transport or ESP header in the chain if - * found. - * 2. If a header with find_hdr > 0 protocol number is found then the - * offset is stored in *offset and protocol number of the header is - * returned. - * 3. If find_hdr header is not found or packet is malformed or - * it is a non-first fragment -1 is returned. - */ -always_inline int -ip6_locate_header (vlib_buffer_t * p0, - ip6_header_t * ip0, int find_hdr, u32 * offset) -{ - u8 next_proto = ip0->protocol; - u8 *next_header; - u8 done = 0; - u32 cur_offset; - u8 *temp_nxthdr = 0; - u32 exthdr_len = 0; - - next_header = ip6_next_header (ip0); - cur_offset = sizeof (ip6_header_t); - while (1) - { - done = (next_proto == find_hdr); - if (PREDICT_FALSE - (next_header >= - (u8 *) vlib_buffer_get_current (p0) + p0->current_length)) - { - //A malicious packet could set an extension header with a too big size - return (-1); - } - if (done) - break; - if ((!ip6_ext_hdr (next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT) - { - if (find_hdr < 0) - break; - return -1; - } - if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION) - { - ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) next_header; - u16 frag_off = ip6_frag_hdr_offset (frag_hdr); - /* Non first fragment return -1 */ - if (frag_off) - return (-1); - exthdr_len = sizeof (ip6_frag_hdr_t); - temp_nxthdr = next_header + exthdr_len; - } - else if (next_proto == IP_PROTOCOL_IPSEC_AH) - { - exthdr_len = - ip6_ext_authhdr_len (((ip6_ext_header_t *) next_header)); - temp_nxthdr = next_header + exthdr_len; - } - else - { - exthdr_len = - ip6_ext_header_len (((ip6_ext_header_t *) next_header)); - temp_nxthdr = next_header + exthdr_len; - } - next_proto = ((ip6_ext_header_t *) next_header)->next_hdr; - next_header = temp_nxthdr; - cur_offset += exthdr_len; - } - - *offset = cur_offset; - return (next_proto); -} - /** * @brief returns number of links on which src is reachable. */ @@ -2413,6 +2339,50 @@ static char *ip6_hop_by_hop_error_strings[] = { #undef _ }; +u8 * +format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args) +{ + ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *); + int total_len = va_arg (*args, int); + ip6_hop_by_hop_option_t *opt0, *limit0; + ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; + u8 type0; + + s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d", + hbh0->protocol, (hbh0->length + 1) << 3, total_len); + + opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1); + limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len); + + while (opt0 < limit0) + { + type0 = opt0->type; + switch (type0) + { + case 0: /* Pad, just stop */ + opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1); + break; + + default: + if (hm->trace[type0]) + { + s = (*hm->trace[type0]) (s, opt0); + } + else + { + s = + format (s, "\n unrecognized option %d length %d", type0, + opt0->length); + } + opt0 = + (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length + + sizeof (ip6_hop_by_hop_option_t)); + break; + } + } + return s; +} + static u8 * format_ip6_hop_by_hop_trace (u8 * s, va_list * args) { diff --git a/src/vnet/sr/sr.h b/src/vnet/sr/sr.h index eb781e4bcee..1545a84f38b 100755 --- a/src/vnet/sr/sr.h +++ b/src/vnet/sr/sr.h @@ -83,7 +83,6 @@ typedef struct ip6_address_t bsid; /**< BindingSID (key) */ u8 type; /**< Type (default is 0) */ - /* SR Policy specific DPO */ /* IF Type = DEFAULT Then Load Balancer DPO among SID lists */ /* IF Type = SPRAY then Spray DPO with all SID lists */ @@ -290,6 +289,45 @@ sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index, u32 table_id, ip46_address_t * prefix, u32 mask_width, u32 sw_if_index, u8 traffic_type); +/** + * @brief SR rewrite string computation for SRH insertion (inline) + * + * @param sl is a vector of IPv6 addresses composing the Segment List + * + * @return precomputed rewrite string for SRH insertion + */ +static inline u8 * +ip6_compute_rewrite_string_insert (ip6_address_t * sl) +{ + ip6_sr_header_t *srh; + ip6_address_t *addrp, *this_address; + u32 header_length = 0; + u8 *rs = NULL; + + header_length = 0; + header_length += sizeof (ip6_sr_header_t); + header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t); + + vec_validate (rs, header_length - 1); + + srh = (ip6_sr_header_t *) rs; + srh->type = ROUTING_HEADER_TYPE_SR; + srh->segments_left = vec_len (sl); + srh->first_segment = vec_len (sl); + srh->length = ((sizeof (ip6_sr_header_t) + + ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1; + srh->flags = 0x00; + srh->reserved = 0x0000; + addrp = srh->segments + vec_len (sl); + vec_foreach (this_address, sl) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + return rs; +} + + #endif /* included_vnet_sr_h */ /* -- cgit 1.2.3-korg