From 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 19 Dec 2016 23:05:39 +0100 Subject: Reorganize source tree to use single autotools instance Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion --- src/vnet/bfd/bfd.api | 205 ++++++++++ src/vnet/bfd/bfd_api.c | 262 ++++++++++++ src/vnet/bfd/bfd_api.h | 46 +++ src/vnet/bfd/bfd_debug.h | 79 ++++ src/vnet/bfd/bfd_doc.md | 1 + src/vnet/bfd/bfd_main.c | 969 ++++++++++++++++++++++++++++++++++++++++++++ src/vnet/bfd/bfd_main.h | 220 ++++++++++ src/vnet/bfd/bfd_protocol.c | 74 ++++ src/vnet/bfd/bfd_protocol.h | 154 +++++++ src/vnet/bfd/bfd_udp.c | 639 +++++++++++++++++++++++++++++ src/vnet/bfd/bfd_udp.h | 56 +++ src/vnet/bfd/dir.dox | 18 + 12 files changed, 2723 insertions(+) create mode 100644 src/vnet/bfd/bfd.api create mode 100644 src/vnet/bfd/bfd_api.c create mode 100644 src/vnet/bfd/bfd_api.h create mode 100644 src/vnet/bfd/bfd_debug.h create mode 100644 src/vnet/bfd/bfd_doc.md create mode 100644 src/vnet/bfd/bfd_main.c create mode 100644 src/vnet/bfd/bfd_main.h create mode 100644 src/vnet/bfd/bfd_protocol.c create mode 100644 src/vnet/bfd/bfd_protocol.h create mode 100644 src/vnet/bfd/bfd_udp.c create mode 100644 src/vnet/bfd/bfd_udp.h create mode 100644 src/vnet/bfd/dir.dox (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api new file mode 100644 index 00000000..5798ee69 --- /dev/null +++ b/src/vnet/bfd/bfd.api @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief Configure BFD feature + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param slow_timer - slow timer (seconds) + @param min_tx - desired min tx interval + @param min_rx - desired min rx interval + @param detect_mult - desired detection multiplier +*/ +define bfd_set_config { + u32 client_index; + u32 context; + u32 slow_timer; + u32 min_tx; + u32 min_rx; + u8 detect_mult; +}; + +/** \brief Configure BFD feature response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define bfd_set_config_reply { + u32 context; + i32 retval; +}; + +/** \brief Get BFD configuration +*/ +define bfd_get_config { + u32 client_index; + u32 context; +}; + +/** \brief Get BFD configuration response + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param slow_timer - slow timer (seconds) + @param min_tx - desired min tx interval + @param min_rx - desired min rx interval + @param detect_mult - desired detection multiplier +*/ +define bfd_get_config_reply { + u32 client_index; + u32 context; + u32 slow_timer; + u32 min_tx; + u32 min_rx; + u8 detect_mult; +}; + +/** \brief Add UDP BFD session on interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - sw index of the interface + @param desired_min_tx - desired min transmit interval (microseconds) + @param required_min_rx - required min receive interval (microseconds) + @param detect_mult - detect multiplier (# of packets missed between connection goes down) + @param local_addr - local address + @param peer_addr - peer address + @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 +*/ +define bfd_udp_add { + u32 client_index; + u32 context; + u32 sw_if_index; + u32 desired_min_tx; + u32 required_min_rx; + u8 local_addr[16]; + u8 peer_addr[16]; + u8 is_ipv6; + u8 detect_mult; +}; + +/** \brief Add UDP BFD session response + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param bs_index - index of the session created +*/ +define bfd_udp_add_reply { + u32 context; + i32 retval; + u32 bs_index; +}; + +/** \brief Delete UDP BFD session on interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - sw index of the interface + @param local_addr - local address + @param peer_addr - peer address + @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 +*/ +define bfd_udp_del { + u32 client_index; + u32 context; + u32 sw_if_index; + u8 local_addr[16]; + u8 peer_addr[16]; + u8 is_ipv6; +}; + +/** \brief Delete UDP BFD session response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define bfd_udp_del_reply { + u32 context; + i32 retval; +}; + +/** \brief Get all BFD sessions + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define bfd_udp_session_dump { + u32 client_index; + u32 context; +}; + +/** \brief BFD session details structure + @param context - sender context, to match reply w/ request + @param bs_index - index of the session + @param sw_if_index - sw index of the interface + @param local_addr - local address + @param peer_addr - peer address + @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 + @param state - session state +*/ +define bfd_udp_session_details { + u32 context; + u32 bs_index; + u32 sw_if_index; + u8 local_addr[16]; + u8 peer_addr[16]; + u8 is_ipv6; + u8 state; +}; + +/** \brief Set flags of BFD session + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bs_index - index of the bfd session to set flags on + @param admin_up_down - set the admin state, 1 = up, 0 = down +*/ +define bfd_session_set_flags { + u32 client_index; + u32 context; + u32 bs_index; + u8 admin_up_down; +}; + +/** \brief Reply to bfd_session_set_flags + @param context - sender context which was passed in the request + @param retval - return code of the set flags request +*/ +define bfd_session_set_flags_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Register for BFD events + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 => register for events, 0 => cancel registration + @param pid - sender's pid +*/ +define want_bfd_events +{ + u32 client_index; + u32 context; + u32 enable_disable; + u32 pid; +}; + +/** \brief Reply for BFD events registration + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define want_bfd_events_reply +{ + u32 context; + i32 retval; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c new file mode 100644 index 00000000..126cf29a --- /dev/null +++ b/src/vnet/bfd/bfd_api.c @@ -0,0 +1,262 @@ +/* + *------------------------------------------------------------------ + * bfd_api.c - bfd api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_vpe_api_msg \ +_(BFD_UDP_ADD, bfd_udp_add) \ +_(BFD_UDP_DEL, bfd_udp_del) \ +_(BFD_UDP_SESSION_DUMP, bfd_udp_session_dump) \ +_(BFD_SESSION_SET_FLAGS, bfd_session_set_flags) \ +_(WANT_BFD_EVENTS, want_bfd_events) + +pub_sub_handler (bfd_events, BFD_EVENTS); + +static void +vl_api_bfd_udp_add_t_handler (vl_api_bfd_udp_add_t * mp) +{ + vl_api_bfd_udp_add_reply_t *rmp; + int rv; + + VALIDATE_SW_IF_INDEX (mp); + + ip46_address_t local_addr; + memset (&local_addr, 0, sizeof (local_addr)); + ip46_address_t peer_addr; + memset (&peer_addr, 0, sizeof (peer_addr)); + if (mp->is_ipv6) + { + clib_memcpy (&local_addr.ip6, mp->local_addr, sizeof (local_addr.ip6)); + clib_memcpy (&peer_addr.ip6, mp->peer_addr, sizeof (peer_addr.ip6)); + } + else + { + clib_memcpy (&local_addr.ip4, mp->local_addr, sizeof (local_addr.ip4)); + clib_memcpy (&peer_addr.ip4, mp->peer_addr, sizeof (peer_addr.ip4)); + } + + rv = bfd_udp_add_session (clib_net_to_host_u32 (mp->sw_if_index), + clib_net_to_host_u32 (mp->desired_min_tx), + clib_net_to_host_u32 (mp->required_min_rx), + mp->detect_mult, &local_addr, &peer_addr); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_BFD_UDP_ADD_REPLY); +} + +static void +vl_api_bfd_udp_del_t_handler (vl_api_bfd_udp_del_t * mp) +{ + vl_api_bfd_udp_del_reply_t *rmp; + int rv; + + VALIDATE_SW_IF_INDEX (mp); + + ip46_address_t local_addr; + memset (&local_addr, 0, sizeof (local_addr)); + ip46_address_t peer_addr; + memset (&peer_addr, 0, sizeof (peer_addr)); + if (mp->is_ipv6) + { + clib_memcpy (&local_addr.ip6, mp->local_addr, sizeof (local_addr.ip6)); + clib_memcpy (&peer_addr.ip6, mp->peer_addr, sizeof (peer_addr.ip6)); + } + else + { + clib_memcpy (&local_addr.ip4, mp->local_addr, sizeof (local_addr.ip4)); + clib_memcpy (&peer_addr.ip4, mp->peer_addr, sizeof (peer_addr.ip4)); + } + + rv = + bfd_udp_del_session (clib_net_to_host_u32 (mp->sw_if_index), &local_addr, + &peer_addr); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_BFD_UDP_DEL_REPLY); +} + +void +send_bfd_udp_session_details (unix_shared_memory_queue_t * q, u32 context, + bfd_session_t * bs) +{ + if (bs->transport != BFD_TRANSPORT_UDP4 && + bs->transport != BFD_TRANSPORT_UDP6) + { + return; + } + + vl_api_bfd_udp_session_details_t *mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_BFD_UDP_SESSION_DETAILS); + mp->context = context; + mp->bs_index = clib_host_to_net_u32 (bs->bs_idx); + mp->state = bs->local_state; + bfd_udp_session_t *bus = &bs->udp; + bfd_udp_key_t *key = &bus->key; + mp->sw_if_index = clib_host_to_net_u32 (key->sw_if_index); + mp->is_ipv6 = !(ip46_address_is_ip4 (&key->local_addr)); + if (mp->is_ipv6) + { + clib_memcpy (mp->local_addr, &key->local_addr, + sizeof (key->local_addr)); + clib_memcpy (mp->peer_addr, &key->peer_addr, sizeof (key->peer_addr)); + } + else + { + clib_memcpy (mp->local_addr, key->local_addr.ip4.data, + sizeof (key->local_addr.ip4.data)); + clib_memcpy (mp->peer_addr, key->peer_addr.ip4.data, + sizeof (key->peer_addr.ip4.data)); + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +void +bfd_event (bfd_main_t * bm, bfd_session_t * bs) +{ + vpe_api_main_t *vam = &vpe_api_main; + vpe_client_registration_t *reg; + unix_shared_memory_queue_t *q; + /* *INDENT-OFF* */ + pool_foreach (reg, vam->bfd_events_registrations, ({ + q = vl_api_client_index_to_input_queue (reg->client_index); + if (q) + { + switch (bs->transport) + { + case BFD_TRANSPORT_UDP4: + /* fallthrough */ + case BFD_TRANSPORT_UDP6: + send_bfd_udp_session_details (q, 0, bs); + } + } + })); + /* *INDENT-ON* */ +} + +static void +vl_api_bfd_udp_session_dump_t_handler (vl_api_bfd_udp_session_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + + if (q == 0) + return; + + bfd_session_t *bs = NULL; + /* *INDENT-OFF* */ + pool_foreach (bs, bfd_main.sessions, ({ + if (bs->transport == BFD_TRANSPORT_UDP4 || + bs->transport == BFD_TRANSPORT_UDP6) + send_bfd_udp_session_details (q, mp->context, bs); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_bfd_session_set_flags_t_handler (vl_api_bfd_session_set_flags_t * mp) +{ + vl_api_bfd_session_set_flags_reply_t *rmp; + int rv; + + rv = + bfd_session_set_flags (clib_net_to_host_u32 (mp->bs_index), + mp->admin_up_down); + + REPLY_MACRO (VL_API_BFD_SESSION_SET_FLAGS_REPLY); +} + + +/* + * bfd_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_bfd; +#undef _ +} + +static clib_error_t * +bfd_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (bfd_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h new file mode 100644 index 00000000..cfcd04f3 --- /dev/null +++ b/src/vnet/bfd/bfd_api.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief BFD global declarations + */ +#ifndef __included_bfd_api_h__ +#define __included_bfd_api_h__ + +#include +#include +#include +#include + +vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, u32 desired_min_tx_us, + u32 required_min_rx_us, u8 detect_mult, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr); + +vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr); + +vnet_api_error_t bfd_session_set_flags (u32 bs_index, u8 admin_up_down); + +#endif /* __included_bfd_api_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bfd/bfd_debug.h b/src/vnet/bfd/bfd_debug.h new file mode 100644 index 00000000..707ebab2 --- /dev/null +++ b/src/vnet/bfd/bfd_debug.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2011-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief BFD global declarations + */ +#ifndef __included_bfd_debug_h__ +#define __included_bfd_debug_h__ + +/* controls debug prints */ +#define BFD_DEBUG (0) + +#if BFD_DEBUG +#define BFD_DEBUG_FILE_DEF \ + static const char *__file = NULL; \ + { \ + __file = strrchr (__FILE__, '/'); \ + if (__file) \ + { \ + ++__file; \ + } \ + else \ + { \ + __file = __FILE__; \ + } \ + } + +#define BFD_DBG(fmt, ...) \ + do \ + { \ + BFD_DEBUG_FILE_DEF \ + static u8 *_s = NULL; \ + vlib_main_t *vm = vlib_get_main (); \ + _s = format (_s, "%6.02f:DBG:%s:%d:%s():" fmt, vlib_time_now (vm), \ + __file, __LINE__, __func__, ##__VA_ARGS__); \ + printf ("%.*s\n", vec_len (_s), _s); \ + vec_reset_length (_s); \ + } \ + while (0); + +#define BFD_ERR(fmt, ...) \ + do \ + { \ + BFD_DEBUG_FILE_DEF \ + static u8 *_s = NULL; \ + vlib_main_t *vm = vlib_get_main (); \ + _s = format (_s, "%6.02f:ERR:%s:%d:%s():" fmt, vlib_time_now (vm), \ + __file, __LINE__, __func__, ##__VA_ARGS__); \ + printf ("%.*s\n", vec_len (_s), _s); \ + vec_reset_length (_s); \ + } \ + while (0); + +#else +#define BFD_DBG(...) +#define BFD_ERR(...) +#endif + +#endif /* __included_bfd_debug_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bfd/bfd_doc.md b/src/vnet/bfd/bfd_doc.md new file mode 100644 index 00000000..1333ed77 --- /dev/null +++ b/src/vnet/bfd/bfd_doc.md @@ -0,0 +1 @@ +TODO diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c new file mode 100644 index 00000000..e25eadfc --- /dev/null +++ b/src/vnet/bfd/bfd_main.c @@ -0,0 +1,969 @@ +/* + * Copyright (c) 2011-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief BFD nodes implementation + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static u64 +bfd_us_to_clocks (bfd_main_t * bm, u64 us) +{ + return bm->cpu_cps * ((f64) us / USEC_PER_SECOND); +} + +static vlib_node_registration_t bfd_process_node; + +typedef enum +{ +#define F(t, n) BFD_OUTPUT_##t, + foreach_bfd_transport (F) +#undef F + BFD_OUTPUT_N_NEXT, +} bfd_output_next_t; + +static u32 bfd_next_index_by_transport[] = { +#define F(t, n) [BFD_TRANSPORT_##t] = BFD_OUTPUT_##t, + foreach_bfd_transport (F) +#undef F +}; + +/* + * We actually send all bfd pkts to the "error" node after scanning + * them, so the graph node has only one next-index. The "error-drop" + * node automatically bumps our per-node packet counters for us. + */ +typedef enum +{ + BFD_INPUT_NEXT_NORMAL, + BFD_INPUT_N_NEXT, +} bfd_input_next_t; + +static void bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, + int handling_wakeup); + +static void +bfd_set_defaults (bfd_main_t * bm, bfd_session_t * bs) +{ + bs->local_state = BFD_STATE_down; + bs->local_diag = BFD_DIAG_CODE_no_diag; + bs->remote_state = BFD_STATE_down; + bs->local_demand = 0; + bs->remote_discr = 0; + bs->desired_min_tx_us = BFD_DEFAULT_DESIRED_MIN_TX_US; + bs->desired_min_tx_clocks = bfd_us_to_clocks (bm, bs->desired_min_tx_us); + bs->remote_min_rx_us = 1; + bs->remote_demand = 0; +} + +static void +bfd_set_diag (bfd_session_t * bs, bfd_diag_code_e code) +{ + if (bs->local_diag != code) + { + BFD_DBG ("set local_diag, bs_idx=%d: '%d:%s'", bs->bs_idx, code, + bfd_diag_code_string (code)); + bs->local_diag = code; + } +} + +static void +bfd_set_state (bfd_main_t * bm, bfd_session_t * bs, + bfd_state_e new_state, int handling_wakeup) +{ + if (bs->local_state != new_state) + { + BFD_DBG ("Change state, bs_idx=%d: %s->%s", bs->bs_idx, + bfd_state_string (bs->local_state), + bfd_state_string (new_state)); + bs->local_state = new_state; + bfd_on_state_change (bm, bs, clib_cpu_time_now (), handling_wakeup); + } +} + +static void +bfd_recalc_tx_interval (bfd_main_t * bm, bfd_session_t * bs) +{ + if (!bs->local_demand) + { + bs->transmit_interval_clocks = + clib_max (bs->desired_min_tx_clocks, bs->remote_min_rx_clocks); + } + else + { + /* TODO */ + } + BFD_DBG ("Recalculated transmit interval %lu clocks/%.2fs", + bs->transmit_interval_clocks, + bs->transmit_interval_clocks / bm->cpu_cps); +} + +static void +bfd_calc_next_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now) +{ + if (!bs->local_demand) + { + if (bs->local_detect_mult > 1) + { + /* common case - 75-100% of transmit interval */ + bs->tx_timeout_clocks = now + + (1 - .25 * (random_f64 (&bm->random_seed))) * + bs->transmit_interval_clocks; + if (bs->tx_timeout_clocks < now) + { + /* huh, we've missed it already, skip the missed events */ + const u64 missed = + (now - bs->tx_timeout_clocks) / bs->transmit_interval_clocks; + BFD_ERR ("Missed %lu transmit events (now is %lu, calc " + "tx_timeout is %lu)!", + missed, now, bs->tx_timeout_clocks); + bs->tx_timeout_clocks += + (missed + 1) * bs->transmit_interval_clocks; + } + } + else + { + /* special case - 75-90% of transmit interval */ + bs->tx_timeout_clocks = + now + + (.9 - .15 * (random_f64 (&bm->random_seed))) * + bs->transmit_interval_clocks; + if (bs->tx_timeout_clocks < now) + { + /* huh, we've missed it already, skip the missed events */ + const u64 missed = + (now - bs->tx_timeout_clocks) / bs->transmit_interval_clocks; + BFD_ERR ("Missed %lu transmit events (now is %lu, calc " + "tx_timeout is %lu)!", + missed, now, bs->tx_timeout_clocks); + bs->tx_timeout_clocks += + (missed + 1) * bs->transmit_interval_clocks; + } + } + } + else + { + /* TODO */ + } + if (bs->tx_timeout_clocks) + { + BFD_DBG ("Next transmit in %lu clocks/%.02fs@%lu", + bs->tx_timeout_clocks - now, + (bs->tx_timeout_clocks - now) / bm->cpu_cps, + bs->tx_timeout_clocks); + } +} + +static void +bfd_recalc_detection_time (bfd_main_t * bm, bfd_session_t * bs) +{ + if (!bs->local_demand) + { + bs->detection_time_clocks = + bs->remote_detect_mult * + bfd_us_to_clocks (bm, clib_max (bs->required_min_rx_us, + bs->remote_desired_min_tx_us)); + } + else + { + bs->detection_time_clocks = + bs->local_detect_mult * + bfd_us_to_clocks (bm, + clib_max (bs->desired_min_tx_us, + bs->remote_min_rx_us)); + } + BFD_DBG ("Recalculated detection time %lu clocks/%.2fs", + bs->detection_time_clocks, + bs->detection_time_clocks / bm->cpu_cps); +} + +static void +bfd_set_timer (bfd_main_t * bm, bfd_session_t * bs, u64 now, + int handling_wakeup) +{ + u64 next = 0; + u64 rx_timeout = 0; + if (BFD_STATE_up == bs->local_state) + { + rx_timeout = bs->last_rx_clocks + bs->detection_time_clocks; + } + if (bs->tx_timeout_clocks && rx_timeout) + { + next = clib_min (bs->tx_timeout_clocks, rx_timeout); + } + else if (bs->tx_timeout_clocks) + { + next = bs->tx_timeout_clocks; + } + else if (rx_timeout) + { + next = rx_timeout; + } + BFD_DBG ("bs_idx=%u, tx_timeout=%lu, rx_timeout=%lu, next=%s", bs->bs_idx, + bs->tx_timeout_clocks, rx_timeout, + next == bs->tx_timeout_clocks ? "tx" : "rx"); + /* sometimes the wheel expires an event a bit sooner than requested, account + for that here */ + if (next && (now + bm->wheel_inaccuracy > bs->wheel_time_clocks || + next < bs->wheel_time_clocks || !bs->wheel_time_clocks)) + { + bs->wheel_time_clocks = next; + BFD_DBG ("timing_wheel_insert(%p, %lu (%ld clocks/%.2fs in the " + "future), %u);", + &bm->wheel, bs->wheel_time_clocks, + (i64) bs->wheel_time_clocks - clib_cpu_time_now (), + (i64) (bs->wheel_time_clocks - clib_cpu_time_now ()) / + bm->cpu_cps, bs->bs_idx); + timing_wheel_insert (&bm->wheel, bs->wheel_time_clocks, bs->bs_idx); + if (!handling_wakeup) + { + vlib_process_signal_event (bm->vlib_main, + bm->bfd_process_node_index, + BFD_EVENT_RESCHEDULE, bs->bs_idx); + } + } +} + +static void +bfd_set_desired_min_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now, + u32 desired_min_tx_us, int handling_wakeup) +{ + bs->desired_min_tx_us = desired_min_tx_us; + bs->desired_min_tx_clocks = bfd_us_to_clocks (bm, bs->desired_min_tx_us); + BFD_DBG ("Set desired min tx to %uus/%lu clocks/%.2fs", + bs->desired_min_tx_us, bs->desired_min_tx_clocks, + bs->desired_min_tx_clocks / bm->cpu_cps); + bfd_recalc_detection_time (bm, bs); + bfd_recalc_tx_interval (bm, bs); + bfd_calc_next_tx (bm, bs, now); + bfd_set_timer (bm, bs, now, handling_wakeup); +} + +static void +bfd_set_remote_required_min_rx (bfd_main_t * bm, bfd_session_t * bs, + u64 now, + u32 remote_required_min_rx_us, + int handling_wakeup) +{ + bs->remote_min_rx_us = remote_required_min_rx_us; + bs->remote_min_rx_clocks = bfd_us_to_clocks (bm, bs->remote_min_rx_us); + BFD_DBG ("Set remote min rx to %uus/%lu clocks/%.2fs", bs->remote_min_rx_us, + bs->remote_min_rx_clocks, bs->remote_min_rx_clocks / bm->cpu_cps); + bfd_recalc_detection_time (bm, bs); + bfd_recalc_tx_interval (bm, bs); + bfd_calc_next_tx (bm, bs, now); + bfd_set_timer (bm, bs, now, handling_wakeup); +} + +void +bfd_session_start (bfd_main_t * bm, bfd_session_t * bs) +{ + BFD_DBG ("%U", format_bfd_session, bs); + bfd_recalc_tx_interval (bm, bs); + vlib_process_signal_event (bm->vlib_main, bm->bfd_process_node_index, + BFD_EVENT_NEW_SESSION, bs->bs_idx); +} + +vnet_api_error_t +bfd_del_session (uword bs_idx) +{ + const bfd_main_t *bm = &bfd_main; + if (!pool_is_free_index (bm->sessions, bs_idx)) + { + bfd_session_t *bs = pool_elt_at_index (bm->sessions, bs_idx); + pool_put (bm->sessions, bs); + return 0; + } + else + { + BFD_ERR ("no such session"); + return VNET_API_ERROR_BFD_NOENT; + } + return 0; +} + +const char * +bfd_diag_code_string (bfd_diag_code_e diag) +{ +#define F(n, t, s) \ + case BFD_DIAG_CODE_NAME (t): \ + return s; + switch (diag) + { + foreach_bfd_diag_code (F)} + return "UNKNOWN"; +#undef F +} + +const char * +bfd_state_string (bfd_state_e state) +{ +#define F(n, t, s) \ + case BFD_STATE_NAME (t): \ + return s; + switch (state) + { + foreach_bfd_state (F)} + return "UNKNOWN"; +#undef F +} + +vnet_api_error_t +bfd_session_set_flags (u32 bs_idx, u8 admin_up_down) +{ + bfd_main_t *bm = &bfd_main; + if (pool_is_free_index (bm->sessions, bs_idx)) + { + BFD_ERR ("invalid bs_idx=%u", bs_idx); + return VNET_API_ERROR_BFD_NOENT; + } + bfd_session_t *bs = pool_elt_at_index (bm->sessions, bs_idx); + if (admin_up_down) + { + bfd_set_state (bm, bs, BFD_STATE_down, 0); + } + else + { + bfd_set_diag (bs, BFD_DIAG_CODE_neighbor_sig_down); + bfd_set_state (bm, bs, BFD_STATE_admin_down, 0); + } + return 0; +} + +u8 * +bfd_input_format_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + const bfd_input_trace_t *t = va_arg (*args, bfd_input_trace_t *); + const bfd_pkt_t *pkt = (bfd_pkt_t *) t->data; + if (t->len > STRUCT_SIZE_OF (bfd_pkt_t, head)) + { + s = format (s, "BFD v%u, diag=%u(%s), state=%u(%s),\n" + " flags=(P:%u, F:%u, C:%u, A:%u, D:%u, M:%u), detect_mult=%u, " + "length=%u\n", + bfd_pkt_get_version (pkt), bfd_pkt_get_diag_code (pkt), + bfd_diag_code_string (bfd_pkt_get_diag_code (pkt)), + bfd_pkt_get_state (pkt), + bfd_state_string (bfd_pkt_get_state (pkt)), + bfd_pkt_get_poll (pkt), bfd_pkt_get_final (pkt), + bfd_pkt_get_control_plane_independent (pkt), + bfd_pkt_get_auth_present (pkt), bfd_pkt_get_demand (pkt), + bfd_pkt_get_multipoint (pkt), pkt->head.detect_mult, + pkt->head.length); + if (t->len >= sizeof (bfd_pkt_t) + && pkt->head.length >= sizeof (bfd_pkt_t)) + { + s = format (s, " my discriminator: %u\n", pkt->my_disc); + s = format (s, " your discriminator: %u\n", pkt->your_disc); + s = format (s, " desired min tx interval: %u\n", + clib_net_to_host_u32 (pkt->des_min_tx)); + s = format (s, " required min rx interval: %u\n", + clib_net_to_host_u32 (pkt->req_min_rx)); + s = format (s, " required min echo rx interval: %u\n", + clib_net_to_host_u32 (pkt->req_min_echo_rx)); + } + } + + return s; +} + +static void +bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, + int handling_wakeup) +{ + BFD_DBG ("State changed: %U", format_bfd_session, bs); + bfd_event (bm, bs); + switch (bs->local_state) + { + case BFD_STATE_admin_down: + bfd_set_desired_min_tx (bm, bs, now, + clib_max (bs->config_desired_min_tx_us, + BFD_DEFAULT_DESIRED_MIN_TX_US), + handling_wakeup); + break; + case BFD_STATE_down: + bfd_set_desired_min_tx (bm, bs, now, + clib_max (bs->config_desired_min_tx_us, + BFD_DEFAULT_DESIRED_MIN_TX_US), + handling_wakeup); + break; + case BFD_STATE_init: + bfd_set_desired_min_tx (bm, bs, now, + clib_max (bs->config_desired_min_tx_us, + BFD_DEFAULT_DESIRED_MIN_TX_US), + handling_wakeup); + break; + case BFD_STATE_up: + bfd_set_desired_min_tx (bm, bs, now, bs->config_desired_min_tx_us, + handling_wakeup); + break; + } +} + +static void +bfd_add_transport_layer (vlib_main_t * vm, vlib_buffer_t * b, + bfd_session_t * bs) +{ + switch (bs->transport) + { + case BFD_TRANSPORT_UDP4: + /* fallthrough */ + case BFD_TRANSPORT_UDP6: + BFD_DBG ("Transport bfd via udp, bs_idx=%u", bs->bs_idx); + bfd_add_udp_transport (vm, b, &bs->udp); + break; + } +} + +static vlib_buffer_t * +bfd_create_frame (vlib_main_t * vm, vlib_node_runtime_t * rt, + bfd_session_t * bs) +{ + u32 bi; + if (vlib_buffer_alloc (vm, &bi, 1) != 1) + { + clib_warning ("buffer allocation failure"); + return NULL; + } + + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + ASSERT (b->current_data == 0); + + u32 *to_next; + u32 n_left_to_next; + + vlib_get_next_frame (vm, rt, bfd_next_index_by_transport[bs->transport], + to_next, n_left_to_next); + + to_next[0] = bi; + n_left_to_next -= 1; + + vlib_put_next_frame (vm, rt, bfd_next_index_by_transport[bs->transport], + n_left_to_next); + return b; +} + +static void +bfd_init_control_frame (vlib_buffer_t * b, bfd_session_t * bs) +{ + bfd_pkt_t *pkt = vlib_buffer_get_current (b); + const u32 bfd_length = 24; + memset (pkt, 0, sizeof (*pkt)); + + bfd_pkt_set_version (pkt, 1); + bfd_pkt_set_diag_code (pkt, bs->local_diag); + bfd_pkt_set_state (pkt, bs->local_state); + if (bs->local_demand && BFD_STATE_up == bs->local_state && + BFD_STATE_up == bs->remote_state) + { + bfd_pkt_set_demand (pkt); + } + pkt->head.detect_mult = bs->local_detect_mult; + pkt->head.length = clib_host_to_net_u32 (bfd_length); + pkt->my_disc = bs->local_discr; + pkt->your_disc = bs->remote_discr; + pkt->des_min_tx = clib_host_to_net_u32 (bs->desired_min_tx_us); + pkt->req_min_rx = clib_host_to_net_u32 (bs->required_min_rx_us); + pkt->req_min_echo_rx = clib_host_to_net_u32 (0); /* FIXME */ + b->current_length = bfd_length; +} + +static void +bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, + bfd_main_t * bm, bfd_session_t * bs, u64 now, + int handling_wakeup) +{ + if (!bs->remote_min_rx_us) + { + BFD_DBG + ("bfd.RemoteMinRxInterval is zero, not sending periodic control " + "frame"); + return; + } + /* FIXME + A system MUST NOT periodically transmit BFD Control packets if Demand + mode is active on the remote system (bfd.RemoteDemandMode is 1, + bfd.SessionState is Up, and bfd.RemoteSessionState is Up) and a Poll + Sequence is not being transmitted. + */ + /* sometimes the wheel expires an event a bit sooner than requested, account + for that here */ + if (now + bm->wheel_inaccuracy >= bs->tx_timeout_clocks) + { + BFD_DBG ("Send periodic control frame for bs_idx=%lu", bs->bs_idx); + vlib_buffer_t *b = bfd_create_frame (vm, rt, bs); + if (!b) + { + return; + } + bfd_init_control_frame (b, bs); + bfd_add_transport_layer (vm, b, bs); + bfd_calc_next_tx (bm, bs, now); + } + else + { + BFD_DBG + ("No need to send control frame now, now is %lu, tx_timeout is %lu", + now, bs->tx_timeout_clocks); + } + bfd_set_timer (bm, bs, now, handling_wakeup); +} + +void +bfd_send_final (vlib_main_t * vm, vlib_buffer_t * b, bfd_session_t * bs) +{ + BFD_DBG ("Send final control frame for bs_idx=%lu", bs->bs_idx); + bfd_init_control_frame (b, bs); + bfd_pkt_set_final (vlib_buffer_get_current (b)); + bfd_add_transport_layer (vm, b, bs); +} + +static void +bfd_check_rx_timeout (bfd_main_t * bm, bfd_session_t * bs, u64 now, + int handling_wakeup) +{ + /* sometimes the wheel expires an event a bit sooner than requested, account + for that here */ + if (bs->last_rx_clocks + bs->detection_time_clocks <= + now + bm->wheel_inaccuracy) + { + BFD_DBG ("Rx timeout, session goes down"); + bfd_set_diag (bs, BFD_DIAG_CODE_det_time_exp); + bfd_set_state (bm, bs, BFD_STATE_down, handling_wakeup); + } +} + +void +bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm, + bfd_session_t * bs, u64 now) +{ + BFD_DBG ("Timeout for bs_idx=%lu", bs->bs_idx); + switch (bs->local_state) + { + case BFD_STATE_admin_down: + BFD_ERR ("Unexpected timeout when in %s state", + bfd_state_string (bs->local_state)); + abort (); + break; + case BFD_STATE_down: + bfd_send_periodic (vm, rt, bm, bs, now, 1); + break; + case BFD_STATE_init: + BFD_ERR ("Unexpected timeout when in %s state", + bfd_state_string (bs->local_state)); + abort (); + break; + case BFD_STATE_up: + bfd_check_rx_timeout (bm, bs, now, 1); + bfd_send_periodic (vm, rt, bm, bs, now, 1); + break; + } +} + +/* + * bfd process node function + */ +static uword +bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + bfd_main_t *bm = &bfd_main; + u32 *expired = 0; + uword event_type, *event_data = 0; + + /* So we can send events to the bfd process */ + bm->bfd_process_node_index = bfd_process_node.index; + + while (1) + { + u64 now = clib_cpu_time_now (); + u64 next_expire = timing_wheel_next_expiring_elt_time (&bm->wheel); + BFD_DBG ("timing_wheel_next_expiring_elt_time(%p) returns %lu", + &bm->wheel, next_expire); + if ((i64) next_expire < 0) + { + BFD_DBG ("wait for event without timeout"); + (void) vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + } + else + { + f64 timeout = ((i64) next_expire - (i64) now) / bm->cpu_cps; + BFD_DBG ("wait for event with timeout %.02f", timeout); + if (timeout < 0) + { + BFD_DBG ("negative timeout, already expired, skipping wait"); + event_type = ~0; + } + else + { + (void) vlib_process_wait_for_event_or_clock (vm, timeout); + event_type = vlib_process_get_events (vm, &event_data); + } + } + now = clib_cpu_time_now (); + switch (event_type) + { + case ~0: /* no events => timeout */ + /* nothing to do here */ + break; + case BFD_EVENT_RESCHEDULE: + /* nothing to do here - reschedule is done automatically after + * each event or timeout */ + break; + case BFD_EVENT_NEW_SESSION: + do + { + bfd_session_t *bs = + pool_elt_at_index (bm->sessions, *event_data); + bfd_send_periodic (vm, rt, bm, bs, now, 1); + } + while (0); + break; + default: + clib_warning ("BUG: event type 0x%wx", event_type); + break; + } + BFD_DBG ("advancing wheel, now is %lu", now); + BFD_DBG ("timing_wheel_advance (%p, %lu, %p, 0);", &bm->wheel, now, + expired); + expired = timing_wheel_advance (&bm->wheel, now, expired, 0); + BFD_DBG ("Expired %d elements", vec_len (expired)); + u32 *p = NULL; + vec_foreach (p, expired) + { + const u32 bs_idx = *p; + if (!pool_is_free_index (bm->sessions, bs_idx)) + { + bfd_session_t *bs = pool_elt_at_index (bm->sessions, bs_idx); + bfd_on_timeout (vm, rt, bm, bs, now); + } + } + if (expired) + { + _vec_len (expired) = 0; + } + if (event_data) + { + _vec_len (event_data) = 0; + } + } + + return 0; +} + +/* + * bfd process node declaration + */ +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (bfd_process_node, static) = { + .function = bfd_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "bfd-process", + .n_next_nodes = BFD_OUTPUT_N_NEXT, + .next_nodes = + { +#define F(t, n) [BFD_OUTPUT_##t] = n, + foreach_bfd_transport (F) +#undef F + }, +}; +/* *INDENT-ON* */ + +static clib_error_t * +bfd_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) +{ + // bfd_main_t *bm = &bfd_main; + // vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + /* TODO */ + } + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bfd_sw_interface_up_down); + +static clib_error_t * +bfd_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + // bfd_main_t *bm = &bfd_main; + if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP) + { + /* TODO */ + } + return 0; +} + +VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bfd_hw_interface_up_down); + +/* + * setup function + */ +static clib_error_t * +bfd_main_init (vlib_main_t * vm) +{ + bfd_main_t *bm = &bfd_main; + bm->random_seed = random_default_seed (); + bm->vlib_main = vm; + bm->vnet_main = vnet_get_main (); + memset (&bm->wheel, 0, sizeof (bm->wheel)); + bm->cpu_cps = 2590000000; // vm->clib_time.clocks_per_second; + BFD_DBG ("cps is %.2f", bm->cpu_cps); + const u64 now = clib_cpu_time_now (); + timing_wheel_init (&bm->wheel, now, bm->cpu_cps); + bm->wheel_inaccuracy = 2 << bm->wheel.log2_clocks_per_bin; + + return 0; +} + +VLIB_INIT_FUNCTION (bfd_main_init); + +bfd_session_t * +bfd_get_session (bfd_main_t * bm, bfd_transport_t t) +{ + bfd_session_t *result; + pool_get (bm->sessions, result); + memset (result, 0, sizeof (*result)); + result->bs_idx = result - bm->sessions; + result->transport = t; + result->local_discr = random_u32 (&bm->random_seed); + bfd_set_defaults (bm, result); + hash_set (bm->session_by_disc, result->local_discr, result->bs_idx); + return result; +} + +void +bfd_put_session (bfd_main_t * bm, bfd_session_t * bs) +{ + hash_unset (bm->session_by_disc, bs->local_discr); + pool_put (bm->sessions, bs); +} + +bfd_session_t * +bfd_find_session_by_idx (bfd_main_t * bm, uword bs_idx) +{ + if (!pool_is_free_index (bm->sessions, bs_idx)) + { + return pool_elt_at_index (bm->sessions, bs_idx); + } + return NULL; +} + +bfd_session_t * +bfd_find_session_by_disc (bfd_main_t * bm, u32 disc) +{ + uword *p = hash_get (bfd_main.session_by_disc, disc); + if (p) + { + return pool_elt_at_index (bfd_main.sessions, *p); + } + return NULL; +} + +/** + * @brief verify bfd packet - common checks + * + * @param pkt + * + * @return 1 if bfd packet is valid + */ +int +bfd_verify_pkt_common (const bfd_pkt_t * pkt) +{ + if (1 != bfd_pkt_get_version (pkt)) + { + BFD_ERR ("BFD verification failed - unexpected version: '%d'", + bfd_pkt_get_version (pkt)); + return 0; + } + if (pkt->head.length < sizeof (bfd_pkt_t) || + (bfd_pkt_get_auth_present (pkt) && + pkt->head.length < sizeof (bfd_pkt_with_auth_t))) + { + BFD_ERR ("BFD verification failed - unexpected length: '%d' (auth " + "present: %d)", + pkt->head.length, bfd_pkt_get_auth_present (pkt)); + return 0; + } + if (!pkt->head.detect_mult) + { + BFD_ERR ("BFD verification failed - unexpected detect-mult: '%d'", + pkt->head.detect_mult); + return 0; + } + if (bfd_pkt_get_multipoint (pkt)) + { + BFD_ERR ("BFD verification failed - unexpected multipoint: '%d'", + bfd_pkt_get_multipoint (pkt)); + return 0; + } + if (!pkt->my_disc) + { + BFD_ERR ("BFD verification failed - unexpected my-disc: '%d'", + pkt->my_disc); + return 0; + } + if (!pkt->your_disc) + { + const u8 pkt_state = bfd_pkt_get_state (pkt); + if (pkt_state != BFD_STATE_down && pkt_state != BFD_STATE_admin_down) + { + BFD_ERR ("BFD verification failed - unexpected state: '%s' " + "(your-disc is zero)", bfd_state_string (pkt_state)); + return 0; + } + } + return 1; +} + +/** + * @brief verify bfd packet - authentication + * + * @param pkt + * + * @return 1 if bfd packet is valid + */ +int +bfd_verify_pkt_session (const bfd_pkt_t * pkt, u16 pkt_size, + const bfd_session_t * bs) +{ + const bfd_pkt_with_auth_t *with_auth = (bfd_pkt_with_auth_t *) pkt; + if (!bfd_pkt_get_auth_present (pkt)) + { + if (pkt_size > sizeof (*pkt)) + { + BFD_ERR ("BFD verification failed - unexpected packet size '%d' " + "(auth not present)", pkt_size); + return 0; + } + } + else + { + if (!with_auth->auth.type) + { + BFD_ERR ("BFD verification failed - unexpected auth type: '%d'", + with_auth->auth.type); + return 0; + } + /* TODO FIXME - implement the actual verification */ + } + return 1; +} + +void +bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) +{ + bfd_session_t *bs = bfd_find_session_by_idx (bm, bs_idx); + if (!bs) + { + return; + } + BFD_DBG ("Scanning bfd packet, bs_idx=%d", bs->bs_idx); + bs->remote_discr = pkt->my_disc; + bs->remote_state = bfd_pkt_get_state (pkt); + bs->remote_demand = bfd_pkt_get_demand (pkt); + u64 now = clib_cpu_time_now (); + bs->last_rx_clocks = now; + bs->remote_desired_min_tx_us = clib_net_to_host_u32 (pkt->des_min_tx); + bs->remote_detect_mult = pkt->head.detect_mult; + bfd_set_remote_required_min_rx (bm, bs, now, + clib_net_to_host_u32 (pkt->req_min_rx), 0); + /* FIXME + If the Required Min Echo RX Interval field is zero, the + transmission of Echo packets, if any, MUST cease. + + If a Poll Sequence is being transmitted by the local system and + the Final (F) bit in the received packet is set, the Poll Sequence + MUST be terminated. + */ + /* FIXME 6.8.2 */ + /* FIXME 6.8.4 */ + if (BFD_STATE_admin_down == bs->local_state) + return; + if (BFD_STATE_admin_down == bs->remote_state) + { + bfd_set_diag (bs, BFD_DIAG_CODE_neighbor_sig_down); + bfd_set_state (bm, bs, BFD_STATE_down, 0); + } + else if (BFD_STATE_down == bs->local_state) + { + if (BFD_STATE_down == bs->remote_state) + { + bfd_set_state (bm, bs, BFD_STATE_init, 0); + } + else if (BFD_STATE_init == bs->remote_state) + { + bfd_set_state (bm, bs, BFD_STATE_up, 0); + } + } + else if (BFD_STATE_init == bs->local_state) + { + if (BFD_STATE_up == bs->remote_state || + BFD_STATE_init == bs->remote_state) + { + bfd_set_state (bm, bs, BFD_STATE_up, 0); + } + } + else /* BFD_STATE_up == bs->local_state */ + { + if (BFD_STATE_down == bs->remote_state) + { + bfd_set_diag (bs, BFD_DIAG_CODE_neighbor_sig_down); + bfd_set_state (bm, bs, BFD_STATE_down, 0); + } + } +} + +u8 * +format_bfd_session (u8 * s, va_list * args) +{ + const bfd_session_t *bs = va_arg (*args, bfd_session_t *); + return format (s, "BFD(%u): bfd.SessionState=%s, " + "bfd.RemoteSessionState=%s, " + "bfd.LocalDiscr=%u, " + "bfd.RemoteDiscr=%u, " + "bfd.LocalDiag=%s, " + "bfd.DesiredMinTxInterval=%u, " + "bfd.RequiredMinRxInterval=%u, " + "bfd.RemoteMinRxInterval=%u, " + "bfd.DemandMode=%s, " + "bfd.RemoteDemandMode=%s, " + "bfd.DetectMult=%u, ", + bs->bs_idx, bfd_state_string (bs->local_state), + bfd_state_string (bs->remote_state), bs->local_discr, + bs->remote_discr, bfd_diag_code_string (bs->local_diag), + bs->desired_min_tx_us, bs->required_min_rx_us, + bs->remote_min_rx_us, (bs->local_demand ? "yes" : "no"), + (bs->remote_demand ? "yes" : "no"), bs->local_detect_mult); +} + +bfd_main_t bfd_main; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h new file mode 100644 index 00000000..c72ea92a --- /dev/null +++ b/src/vnet/bfd/bfd_main.h @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2011-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief BFD global declarations + */ +#ifndef __included_bfd_main_h__ +#define __included_bfd_main_h__ + +#include +#include +#include +#include + +#define foreach_bfd_transport(F) \ + F (UDP4, "ip4-rewrite") \ + F (UDP6, "ip6-rewrite") + +typedef enum +{ +#define F(t, n) BFD_TRANSPORT_##t, + foreach_bfd_transport (F) +#undef F +} bfd_transport_t; + +#define foreach_bfd_mode(F) \ + F (asynchronous) \ + F (demand) + +typedef enum +{ +#define F(x) BFD_MODE_##x, + foreach_bfd_mode (F) +#undef F +} bfd_mode_e; + +typedef struct +{ + /* index in bfd_main.sessions pool */ + u32 bs_idx; + + /* session state */ + bfd_state_e local_state; + + /* local diagnostics */ + bfd_diag_code_e local_diag; + + /* remote session state */ + bfd_state_e remote_state; + + /* local discriminator */ + u32 local_discr; + + /* remote discriminator */ + u32 remote_discr; + + /* configured desired min tx interval (microseconds) */ + u32 config_desired_min_tx_us; + + /* desired min tx interval (microseconds) */ + u32 desired_min_tx_us; + + /* desired min tx interval (clocks) */ + u64 desired_min_tx_clocks; + + /* required min rx interval */ + u32 required_min_rx_us; + + /* remote min rx interval (microseconds) */ + u32 remote_min_rx_us; + + /* remote min rx interval (clocks) */ + u64 remote_min_rx_clocks; + + /* remote desired min tx interval */ + u32 remote_desired_min_tx_us; + + /* 1 if in demand mode, 0 otherwise */ + u8 local_demand; + + /* 1 if remote system sets demand mode, 0 otherwise */ + u8 remote_demand; + + /* local detect multiplier */ + u8 local_detect_mult; + + /* remote detect multiplier */ + u8 remote_detect_mult; + + /* set to value of timer in timing wheel, 0 if never set */ + u64 wheel_time_clocks; + + /* transmit interval */ + u64 transmit_interval_clocks; + + /* next time at which to transmit a packet */ + u64 tx_timeout_clocks; + + /* timestamp of last packet received */ + u64 last_rx_clocks; + + /* detection time */ + u64 detection_time_clocks; + + /* transport type for this session */ + bfd_transport_t transport; + + union + { + bfd_udp_session_t udp; + }; +} bfd_session_t; + +typedef struct +{ + u32 client_index; + u32 client_pid; +} event_subscriber_t; + +typedef struct +{ + /* pool of bfd sessions context data */ + bfd_session_t *sessions; + + /* timing wheel for scheduling timeouts */ + timing_wheel_t wheel; + + /* timing wheel inaccuracy, in clocks */ + u64 wheel_inaccuracy; + + /* hashmap - bfd session by discriminator */ + u32 *session_by_disc; + + /* background process node index */ + u32 bfd_process_node_index; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + + /* cpu clocks per second */ + f64 cpu_cps; + + /* for generating random numbers */ + u32 random_seed; + +} bfd_main_t; + +extern bfd_main_t bfd_main; + +/* Packet counters */ +#define foreach_bfd_error(F) \ + F (NONE, "good bfd packets (processed)") \ + F (BAD, "invalid bfd packets") \ + F (DISABLED, "bfd packets received on disabled interfaces") + +typedef enum +{ +#define F(sym, str) BFD_ERROR_##sym, + foreach_bfd_error (F) +#undef F + BFD_N_ERROR, +} bfd_error_t; + +/* bfd packet trace capture */ +typedef struct +{ + u32 len; + u8 data[400]; +} bfd_input_trace_t; + +enum +{ + BFD_EVENT_RESCHEDULE = 1, + BFD_EVENT_NEW_SESSION, +} bfd_process_event_e; + +u8 *bfd_input_format_trace (u8 * s, va_list * args); + +bfd_session_t *bfd_get_session (bfd_main_t * bm, bfd_transport_t t); +void bfd_put_session (bfd_main_t * bm, bfd_session_t * bs); +bfd_session_t *bfd_find_session_by_idx (bfd_main_t * bm, uword bs_idx); +bfd_session_t *bfd_find_session_by_disc (bfd_main_t * bm, u32 disc); +void bfd_session_start (bfd_main_t * bm, bfd_session_t * bs); +void bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * bfd, u32 bs_idx); +int bfd_verify_pkt_common (const bfd_pkt_t * pkt); +int bfd_verify_pkt_session (const bfd_pkt_t * pkt, u16 pkt_size, + const bfd_session_t * bs); +void bfd_event (bfd_main_t * bm, bfd_session_t * bs); +void bfd_send_final (vlib_main_t * vm, vlib_buffer_t * b, bfd_session_t * bs); +u8 *format_bfd_session (u8 * s, va_list * args); + + +#define USEC_PER_MS 1000LL +#define USEC_PER_SECOND (1000 * USEC_PER_MS) + +/* default, slow transmission interval for BFD packets, per spec at least 1s */ +#define BFD_DEFAULT_DESIRED_MIN_TX_US USEC_PER_SECOND + +#endif /* __included_bfd_main_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bfd/bfd_protocol.c b/src/vnet/bfd/bfd_protocol.c new file mode 100644 index 00000000..ede9536f --- /dev/null +++ b/src/vnet/bfd/bfd_protocol.c @@ -0,0 +1,74 @@ +#include + +u8 bfd_pkt_get_version (const bfd_pkt_t *pkt) +{ + return pkt->head.vers_diag >> 5; +} + +void bfd_pkt_set_version (bfd_pkt_t *pkt, int version) +{ + pkt->head.vers_diag = + (version << 5) | (pkt->head.vers_diag & ((1 << 5) - 1)); +} + +u8 bfd_pkt_get_diag_code (const bfd_pkt_t *pkt) +{ + return pkt->head.vers_diag & ((1 << 5) - 1); +} + +void bfd_pkt_set_diag_code (bfd_pkt_t *pkt, int value) +{ + pkt->head.vers_diag = + (pkt->head.vers_diag & ~((1 << 5) - 1)) | (value & ((1 << 5) - 1)); +} + +u8 bfd_pkt_get_state (const bfd_pkt_t *pkt) +{ + return pkt->head.sta_flags >> 6; +} + +void bfd_pkt_set_state (bfd_pkt_t *pkt, int value) +{ + pkt->head.sta_flags = (value << 6) | (pkt->head.sta_flags & ((1 << 6) - 1)); +} + +u8 bfd_pkt_get_poll (const bfd_pkt_t *pkt) +{ + return (pkt->head.sta_flags >> 5) & 1; +} + +void bfd_pkt_set_final (bfd_pkt_t *pkt) { pkt->head.sta_flags |= 1 << 5; } + +u8 bfd_pkt_get_final (const bfd_pkt_t *pkt) +{ + return (pkt->head.sta_flags >> 4) & 1; +} + +void bfd_pkt_set_poll (bfd_pkt_t *pkt); +u8 bfd_pkt_get_control_plane_independent (const bfd_pkt_t *pkt) +{ + return (pkt->head.sta_flags >> 3) & 1; +} + +void bfd_pkt_set_control_plane_independent (bfd_pkt_t *pkt); + +u8 bfd_pkt_get_auth_present (const bfd_pkt_t *pkt) +{ + return (pkt->head.sta_flags >> 2) & 1; +} + +void bfd_pkt_set_auth_present (bfd_pkt_t *pkt); + +u8 bfd_pkt_get_demand (const bfd_pkt_t *pkt) +{ + return (pkt->head.sta_flags >> 1) & 1; +} + +void bfd_pkt_set_demand (bfd_pkt_t *pkt) { pkt->head.sta_flags |= 1 << 1; } + +u8 bfd_pkt_get_multipoint (const bfd_pkt_t *pkt) +{ + return pkt->head.sta_flags & 1; +} + +void bfd_pkt_set_multipoint (bfd_pkt_t *pkt); diff --git a/src/vnet/bfd/bfd_protocol.h b/src/vnet/bfd/bfd_protocol.h new file mode 100644 index 00000000..cf751b3b --- /dev/null +++ b/src/vnet/bfd/bfd_protocol.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2011-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_bfd_protocol_h__ +#define __included_bfd_protocol_h__ +/** + * @file + * @brief BFD protocol declarations + */ + +#include +#include + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + /* + An optional Authentication Section MAY be present: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Auth Type | Auth Len | Authentication Data... | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + u8 type; + u8 len; + u8 data[0]; +}) bfd_auth_t; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + /* + The Mandatory Section of a BFD Control packet has the following + format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |Vers | Diag |Sta|P|F|C|A|D|M| Detect Mult | Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | My Discriminator | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Your Discriminator | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Desired Min TX Interval | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Required Min RX Interval | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Required Min Echo RX Interval | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + struct + { + u8 vers_diag; + u8 sta_flags; + u8 detect_mult; + u8 length; + } head; + u32 my_disc; + u32 your_disc; + u32 des_min_tx; + u32 req_min_rx; + u32 req_min_echo_rx; +}) bfd_pkt_t; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + bfd_pkt_t pkt; + bfd_auth_t auth; +}) bfd_pkt_with_auth_t; +/* *INDENT-ON* */ + +u8 bfd_pkt_get_version (const bfd_pkt_t * pkt); +void bfd_pkt_set_version (bfd_pkt_t * pkt, int version); +u8 bfd_pkt_get_diag_code (const bfd_pkt_t * pkt); +void bfd_pkt_set_diag_code (bfd_pkt_t * pkt, int value); +u8 bfd_pkt_get_state (const bfd_pkt_t * pkt); +void bfd_pkt_set_state (bfd_pkt_t * pkt, int value); +u8 bfd_pkt_get_poll (const bfd_pkt_t * pkt); +void bfd_pkt_set_final (bfd_pkt_t * pkt); +u8 bfd_pkt_get_final (const bfd_pkt_t * pkt); +void bfd_pkt_set_poll (bfd_pkt_t * pkt); +u8 bfd_pkt_get_control_plane_independent (const bfd_pkt_t * pkt); +void bfd_pkt_set_control_plane_independent (bfd_pkt_t * pkt); +u8 bfd_pkt_get_auth_present (const bfd_pkt_t * pkt); +void bfd_pkt_set_auth_present (bfd_pkt_t * pkt); +u8 bfd_pkt_get_demand (const bfd_pkt_t * pkt); +void bfd_pkt_set_demand (bfd_pkt_t * pkt); +u8 bfd_pkt_get_multipoint (const bfd_pkt_t * pkt); +void bfd_pkt_set_multipoint (bfd_pkt_t * pkt); + +/* BFD diagnostic codes */ +#define foreach_bfd_diag_code(F) \ + F (0, no_diag, "No Diagnostic") \ + F (1, det_time_exp, "Control Detection Time Expired") \ + F (2, echo_failed, "Echo Function Failed") \ + F (3, neighbor_sig_down, "Neighbor Signaled Session Down") \ + F (4, fwd_plain_reset, "Forwarding Plane Reset") \ + F (5, path_down, "Path Down") \ + F (6, concat_path_down, "Concatenated Path Down") \ + F (7, admin_down, "Administratively Down") \ + F (8, reverse_concat_path_down, "Reverse Concatenated Path Down") + +#define BFD_DIAG_CODE_NAME(t) BFD_DIAG_CODE_##t + +typedef enum +{ +#define F(n, t, s) BFD_DIAG_CODE_NAME (t) = n, + foreach_bfd_diag_code (F) +#undef F +} bfd_diag_code_e; + +const char *bfd_diag_code_string (bfd_diag_code_e diag); + +/* BFD state values */ +#define foreach_bfd_state(F) \ + F (0, admin_down, "AdminDown") \ + F (1, down, "Down") \ + F (2, init, "Init") \ + F (3, up, "Up") + +#define BFD_STATE_NAME(t) BFD_STATE_##t + +typedef enum +{ +#define F(n, t, s) BFD_STATE_NAME (t) = n, + foreach_bfd_state (F) +#undef F +} bfd_state_e; + +const char *bfd_state_string (bfd_state_e state); + +#endif /* __included_bfd_protocol_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c new file mode 100644 index 00000000..3c747d86 --- /dev/null +++ b/src/vnet/bfd/bfd_udp.c @@ -0,0 +1,639 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct +{ + bfd_main_t *bfd_main; + /* hashmap - bfd session index by bfd key - used for CLI/API lookup, where + * discriminator is unknown */ + mhash_t bfd_session_idx_by_bfd_key; +} bfd_udp_main_t; + +static vlib_node_registration_t bfd_udp4_input_node; +static vlib_node_registration_t bfd_udp6_input_node; + +bfd_udp_main_t bfd_udp_main; + +void bfd_udp_transport_to_buffer (vlib_main_t *vm, vlib_buffer_t *b, + bfd_udp_session_t *bus) +{ + udp_header_t *udp; + u16 udp_length, ip_length; + bfd_udp_key_t *key = &bus->key; + + b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + if (ip46_address_is_ip4 (&key->local_addr)) + { + ip4_header_t *ip4; + const size_t data_size = sizeof (*ip4) + sizeof (*udp); + vlib_buffer_advance (b, -data_size); + ip4 = vlib_buffer_get_current (b); + udp = (udp_header_t *)(ip4 + 1); + memset (ip4, 0, data_size); + ip4->ip_version_and_header_length = 0x45; + ip4->ttl = 255; + ip4->protocol = IP_PROTOCOL_UDP; + ip4->src_address.as_u32 = key->local_addr.ip4.as_u32; + ip4->dst_address.as_u32 = key->peer_addr.ip4.as_u32; + + udp->src_port = clib_host_to_net_u16 (50000); /* FIXME */ + udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); + + /* fix ip length, checksum and udp length */ + ip_length = vlib_buffer_length_in_chain (vm, b); + + ip4->length = clib_host_to_net_u16 (ip_length); + ip4->checksum = ip4_header_checksum (ip4); + + udp_length = ip_length - (sizeof (*ip4)); + udp->length = clib_host_to_net_u16 (udp_length); + } + else + { + BFD_ERR ("not implemented"); + abort (); + } +} + +void bfd_add_udp_transport (vlib_main_t *vm, vlib_buffer_t *b, + bfd_udp_session_t *bus) +{ + vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; + vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; + bfd_udp_transport_to_buffer (vm, b, bus); +} + +static bfd_session_t *bfd_lookup_session (bfd_udp_main_t *bum, + const bfd_udp_key_t *key) +{ + uword *p = mhash_get (&bum->bfd_session_idx_by_bfd_key, key); + if (p) + { + return bfd_find_session_by_idx (bum->bfd_main, *p); + } + return 0; +} + +static vnet_api_error_t +bfd_udp_add_session_internal (bfd_udp_main_t *bum, u32 sw_if_index, + u32 desired_min_tx_us, u32 required_min_rx_us, + u8 detect_mult, const ip46_address_t *local_addr, + const ip46_address_t *peer_addr) +{ + vnet_sw_interface_t *sw_if = + vnet_get_sw_interface (vnet_get_main (), sw_if_index); + /* get a pool entry and if we end up not needing it, give it back */ + bfd_transport_t t = BFD_TRANSPORT_UDP4; + if (!ip46_address_is_ip4 (local_addr)) + { + t = BFD_TRANSPORT_UDP6; + } + bfd_session_t *bs = bfd_get_session (bum->bfd_main, t); + bfd_udp_session_t *bus = &bs->udp; + memset (bus, 0, sizeof (*bus)); + bfd_udp_key_t *key = &bus->key; + key->sw_if_index = sw_if->sw_if_index; + key->local_addr.as_u64[0] = local_addr->as_u64[0]; + key->local_addr.as_u64[1] = local_addr->as_u64[1]; + key->peer_addr.as_u64[0] = peer_addr->as_u64[0]; + key->peer_addr.as_u64[1] = peer_addr->as_u64[1]; + const bfd_session_t *tmp = bfd_lookup_session (bum, key); + if (tmp) + { + BFD_ERR ("duplicate bfd-udp session, existing bs_idx=%d", tmp->bs_idx); + bfd_put_session (bum->bfd_main, bs); + return VNET_API_ERROR_BFD_EEXIST; + } + key->sw_if_index = sw_if->sw_if_index; + mhash_set (&bum->bfd_session_idx_by_bfd_key, key, bs->bs_idx, NULL); + BFD_DBG ("session created, bs_idx=%u, sw_if_index=%d, local=%U, peer=%U", + bs->bs_idx, key->sw_if_index, format_ip46_address, &key->local_addr, + IP46_TYPE_ANY, format_ip46_address, &key->peer_addr, IP46_TYPE_ANY); + if (BFD_TRANSPORT_UDP4 == t) + { + bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4, + &key->peer_addr, key->sw_if_index); + BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, VNET_LINK_IP4, %U, %d) " + "returns %d", + format_ip46_address, &key->peer_addr, IP46_TYPE_ANY, + key->sw_if_index, bus->adj_index); + } + else + { + bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6, + &key->peer_addr, key->sw_if_index); + BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, VNET_LINK_IP6, %U, %d) " + "returns %d", + format_ip46_address, &key->peer_addr, IP46_TYPE_ANY, + key->sw_if_index, bus->adj_index); + } + bs->config_desired_min_tx_us = desired_min_tx_us; + bs->required_min_rx_us = required_min_rx_us; + bs->local_detect_mult = detect_mult; + bfd_session_start (bum->bfd_main, bs); + return 0; +} + +static vnet_api_error_t +bfd_udp_validate_api_input (u32 sw_if_index, const ip46_address_t *local_addr, + const ip46_address_t *peer_addr) +{ + vnet_sw_interface_t *sw_if = + vnet_get_sw_interface (vnet_get_main (), sw_if_index); + u8 local_ip_valid = 0; + ip_interface_address_t *ia = NULL; + if (!sw_if) + { + BFD_ERR ("got NULL sw_if"); + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + } + if (ip46_address_is_ip4 (local_addr)) + { + if (!ip46_address_is_ip4 (peer_addr)) + { + BFD_ERR ("IP family mismatch"); + return VNET_API_ERROR_INVALID_ARGUMENT; + } + ip4_main_t *im = &ip4_main; + + /* *INDENT-OFF* */ + foreach_ip_interface_address ( + &im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({ + ip4_address_t *x = + ip_interface_address_get_address (&im->lookup_main, ia); + if (x->as_u32 == local_addr->ip4.as_u32) + { + /* valid address for this interface */ + local_ip_valid = 1; + break; + } + })); + /* *INDENT-ON* */ + } + else + { + if (ip46_address_is_ip4 (peer_addr)) + { + BFD_ERR ("IP family mismatch"); + return VNET_API_ERROR_INVALID_ARGUMENT; + } + ip6_main_t *im = &ip6_main; + /* *INDENT-OFF* */ + foreach_ip_interface_address ( + &im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({ + ip6_address_t *x = + ip_interface_address_get_address (&im->lookup_main, ia); + if (local_addr->ip6.as_u64[0] == x->as_u64[0] && + local_addr->ip6.as_u64[1] == x->as_u64[1]) + { + /* valid address for this interface */ + local_ip_valid = 1; + break; + } + })); + /* *INDENT-ON* */ + } + + if (!local_ip_valid) + { + BFD_ERR ("address not found on interface"); + return VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE; + } + + return 0; +} + +vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, u32 desired_min_tx_us, + u32 required_min_rx_us, u8 detect_mult, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr) +{ + vnet_api_error_t rv = + bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); + if (rv) + { + return rv; + } + if (detect_mult < 1) + { + BFD_ERR ("detect_mult < 1"); + return VNET_API_ERROR_INVALID_ARGUMENT; + } + if (desired_min_tx_us < 1) + { + BFD_ERR ("desired_min_tx_us < 1"); + return VNET_API_ERROR_INVALID_ARGUMENT; + } + return bfd_udp_add_session_internal (&bfd_udp_main, sw_if_index, + desired_min_tx_us, required_min_rx_us, + detect_mult, local_addr, peer_addr); +} + +vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, + const ip46_address_t *local_addr, + const ip46_address_t *peer_addr) +{ + vnet_api_error_t rv = + bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); + if (rv) + { + return rv; + } + bfd_udp_main_t *bum = &bfd_udp_main; + vnet_sw_interface_t *sw_if = + vnet_get_sw_interface (vnet_get_main (), sw_if_index); + bfd_udp_key_t key; + memset (&key, 0, sizeof (key)); + key.sw_if_index = sw_if->sw_if_index; + key.local_addr.as_u64[0] = local_addr->as_u64[0]; + key.local_addr.as_u64[1] = local_addr->as_u64[1]; + key.peer_addr.as_u64[0] = peer_addr->as_u64[0]; + key.peer_addr.as_u64[1] = peer_addr->as_u64[1]; + bfd_session_t *tmp = bfd_lookup_session (bum, &key); + if (tmp) + { + BFD_DBG ("free bfd-udp session, bs_idx=%d", tmp->bs_idx); + mhash_unset (&bum->bfd_session_idx_by_bfd_key, &key, NULL); + adj_unlock (tmp->udp.adj_index); + bfd_put_session (bum->bfd_main, tmp); + } + else + { + BFD_ERR ("no such session"); + return VNET_API_ERROR_BFD_NOENT; + } + return 0; +} + +typedef enum { + BFD_UDP_INPUT_NEXT_NORMAL, + BFD_UDP_INPUT_NEXT_REPLY, + BFD_UDP_INPUT_N_NEXT, +} bfd_udp_input_next_t; + +/* Packet counters */ +#define foreach_bfd_udp_error(F) \ + F (NONE, "good bfd packets (processed)") \ + F (BAD, "invalid bfd packets") \ + F (DISABLED, "bfd packets received on disabled interfaces") + +#define F(sym, string) static char BFD_UDP_ERR_##sym##_STR[] = string; +foreach_bfd_udp_error (F); +#undef F + +static char *bfd_udp_error_strings[] = { +#define F(sym, string) BFD_UDP_ERR_##sym##_STR, + foreach_bfd_udp_error (F) +#undef F +}; + +typedef enum { +#define F(sym, str) BFD_UDP_ERROR_##sym, + foreach_bfd_udp_error (F) +#undef F + BFD_UDP_N_ERROR, +} bfd_udp_error_t; + +static void bfd_udp4_find_headers (vlib_buffer_t *b, const ip4_header_t **ip4, + const udp_header_t **udp) +{ + /* sanity check first */ + const i32 start = vnet_buffer (b)->ip.start_of_ip_header; + if (start < 0 && start < sizeof (b->pre_data)) + { + BFD_ERR ("Start of ip header is before pre_data, ignoring"); + *ip4 = NULL; + *udp = NULL; + return; + } + *ip4 = (ip4_header_t *)(b->data + start); + if ((u8 *)*ip4 > (u8 *)vlib_buffer_get_current (b)) + { + BFD_ERR ("Start of ip header is beyond current data, ignoring"); + *ip4 = NULL; + *udp = NULL; + return; + } + *udp = (udp_header_t *)((*ip4) + 1); +} + +static bfd_udp_error_t bfd_udp4_verify_transport (const ip4_header_t *ip4, + const udp_header_t *udp, + const bfd_session_t *bs) +{ + const bfd_udp_session_t *bus = &bs->udp; + const bfd_udp_key_t *key = &bus->key; + if (ip4->src_address.as_u32 != key->peer_addr.ip4.as_u32) + { + BFD_ERR ("IP src addr mismatch, got %U, expected %U", format_ip4_address, + ip4->src_address.as_u32, format_ip4_address, + key->peer_addr.ip4.as_u32); + return BFD_UDP_ERROR_BAD; + } + if (ip4->dst_address.as_u32 != key->local_addr.ip4.as_u32) + { + BFD_ERR ("IP dst addr mismatch, got %U, expected %U", format_ip4_address, + ip4->dst_address.as_u32, format_ip4_address, + key->local_addr.ip4.as_u32); + return BFD_UDP_ERROR_BAD; + } + const u8 expected_ttl = 255; + if (ip4->ttl != expected_ttl) + { + BFD_ERR ("IP unexpected TTL value %d, expected %d", ip4->ttl, + expected_ttl); + return BFD_UDP_ERROR_BAD; + } + if (clib_net_to_host_u16 (udp->src_port) < 49152 || + clib_net_to_host_u16 (udp->src_port) > 65535) + { + BFD_ERR ("Invalid UDP src port %d, out of range <49152,65535>", + udp->src_port); + } + return BFD_UDP_ERROR_NONE; +} + +typedef struct +{ + u32 bs_idx; + bfd_pkt_t pkt; +} bfd_rpc_update_t; + +static void bfd_rpc_update_session_cb (const bfd_rpc_update_t *a) +{ + bfd_consume_pkt (bfd_udp_main.bfd_main, &a->pkt, a->bs_idx); +} + +static void bfd_rpc_update_session (u32 bs_idx, const bfd_pkt_t *pkt) +{ + /* packet length was already verified to be correct by the caller */ + const u32 data_size = sizeof (bfd_rpc_update_t) - + STRUCT_SIZE_OF (bfd_rpc_update_t, pkt) + + pkt->head.length; + u8 data[data_size]; + bfd_rpc_update_t *update = (bfd_rpc_update_t *)data; + update->bs_idx = bs_idx; + clib_memcpy (&update->pkt, pkt, pkt->head.length); + vl_api_rpc_call_main_thread (bfd_rpc_update_session_cb, data, data_size); +} + +static bfd_udp_error_t bfd_udp4_scan (vlib_main_t *vm, vlib_node_runtime_t *rt, + vlib_buffer_t *b, bfd_session_t **bs_out) +{ + const bfd_pkt_t *pkt = vlib_buffer_get_current (b); + if (sizeof (*pkt) > b->current_length) + { + BFD_ERR ( + "Payload size %d too small to hold bfd packet of minimum size %d", + b->current_length, sizeof (*pkt)); + return BFD_UDP_ERROR_BAD; + } + const ip4_header_t *ip4; + const udp_header_t *udp; + bfd_udp4_find_headers (b, &ip4, &udp); + if (!ip4 || !udp) + { + BFD_ERR ("Couldn't find ip4 or udp header"); + return BFD_UDP_ERROR_BAD; + } + if (!bfd_verify_pkt_common (pkt)) + { + return BFD_UDP_ERROR_BAD; + } + bfd_session_t *bs = NULL; + if (pkt->your_disc) + { + BFD_DBG ("Looking up BFD session using discriminator %u", + pkt->your_disc); + bs = bfd_find_session_by_disc (bfd_udp_main.bfd_main, pkt->your_disc); + } + else + { + bfd_udp_key_t key; + memset (&key, 0, sizeof (key)); + key.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + key.local_addr.ip4.as_u32 = ip4->dst_address.as_u32; + key.peer_addr.ip4.as_u32 = ip4->src_address.as_u32; + BFD_DBG ("Looking up BFD session using key (sw_if_index=%u, local=%U, " + "peer=%U)", + key.sw_if_index, format_ip4_address, key.local_addr.ip4.as_u8, + format_ip4_address, key.peer_addr.ip4.as_u8); + bs = bfd_lookup_session (&bfd_udp_main, &key); + } + if (!bs) + { + BFD_ERR ("BFD session lookup failed - no session matches BFD pkt"); + return BFD_UDP_ERROR_BAD; + } + BFD_DBG ("BFD session found, bs_idx=%u", bs->bs_idx); + if (!bfd_verify_pkt_session (pkt, b->current_length, bs)) + { + return BFD_UDP_ERROR_BAD; + } + bfd_udp_error_t err; + if (BFD_UDP_ERROR_NONE != (err = bfd_udp4_verify_transport (ip4, udp, bs))) + { + return err; + } + bfd_rpc_update_session (bs->bs_idx, pkt); + *bs_out = bs; + return BFD_UDP_ERROR_NONE; +} + +static bfd_udp_error_t bfd_udp6_scan (vlib_main_t *vm, vlib_buffer_t *b) +{ + /* TODO */ + return BFD_UDP_ERROR_BAD; +} + +/* + * Process a frame of bfd packets + * Expect 1 packet / frame + */ +static uword bfd_udp_input (vlib_main_t *vm, vlib_node_runtime_t *rt, + vlib_frame_t *f, int is_ipv6) +{ + u32 n_left_from, *from; + bfd_input_trace_t *t0; + + from = vlib_frame_vector_args (f); /* array of buffer indices */ + n_left_from = f->n_vectors; /* number of buffer indices */ + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0, error0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + + bfd_session_t *bs = NULL; + + /* If this pkt is traced, snapshot the data */ + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + int len; + t0 = vlib_add_trace (vm, rt, b0, sizeof (*t0)); + len = (b0->current_length < sizeof (t0->data)) ? b0->current_length + : sizeof (t0->data); + t0->len = len; + clib_memcpy (t0->data, vlib_buffer_get_current (b0), len); + } + + /* scan this bfd pkt. error0 is the counter index to bmp */ + if (is_ipv6) + { + error0 = bfd_udp6_scan (vm, b0); + } + else + { + error0 = bfd_udp4_scan (vm, rt, b0, &bs); + } + b0->error = rt->errors[error0]; + + next0 = BFD_UDP_INPUT_NEXT_NORMAL; + if (BFD_UDP_ERROR_NONE == error0) + { + /* if everything went fine, check for poll bit, if present, re-use + the buffer and based on (now updated) session parameters, send the + final packet back */ + const bfd_pkt_t *pkt = vlib_buffer_get_current (b0); + if (bfd_pkt_get_poll (pkt)) + { + bfd_send_final (vm, b0, bs); + if (is_ipv6) + { + vlib_node_increment_counter (vm, bfd_udp6_input_node.index, + b0->error, 1); + } + else + { + vlib_node_increment_counter (vm, bfd_udp4_input_node.index, + b0->error, 1); + } + next0 = BFD_UDP_INPUT_NEXT_REPLY; + } + } + vlib_set_next_frame_buffer (vm, rt, next0, bi0); + + from += 1; + n_left_from -= 1; + } + + return f->n_vectors; +} + +static uword bfd_udp4_input (vlib_main_t *vm, vlib_node_runtime_t *rt, + vlib_frame_t *f) +{ + return bfd_udp_input (vm, rt, f, 0); +} + +/* + * bfd input graph node declaration + */ +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = { + .function = bfd_udp4_input, + .name = "bfd-udp4-input", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = BFD_UDP_N_ERROR, + .error_strings = bfd_udp_error_strings, + + .format_trace = bfd_input_format_trace, + + .n_next_nodes = BFD_UDP_INPUT_N_NEXT, + .next_nodes = + { + [BFD_UDP_INPUT_NEXT_NORMAL] = "error-drop", + [BFD_UDP_INPUT_NEXT_REPLY] = "ip4-lookup", + }, +}; +/* *INDENT-ON* */ + +static uword bfd_udp6_input (vlib_main_t *vm, vlib_node_runtime_t *rt, + vlib_frame_t *f) +{ + return bfd_udp_input (vm, rt, f, 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = { + .function = bfd_udp6_input, + .name = "bfd-udp6-input", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = BFD_UDP_N_ERROR, + .error_strings = bfd_udp_error_strings, + + .format_trace = bfd_input_format_trace, + + .n_next_nodes = BFD_UDP_INPUT_N_NEXT, + .next_nodes = + { + [BFD_UDP_INPUT_NEXT_NORMAL] = "error-drop", + [BFD_UDP_INPUT_NEXT_REPLY] = "ip6-lookup", + }, +}; +/* *INDENT-ON* */ + +static clib_error_t *bfd_sw_interface_up_down (vnet_main_t *vnm, + u32 sw_if_index, u32 flags) +{ + // vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + /* TODO */ + } + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bfd_sw_interface_up_down); + +static clib_error_t *bfd_hw_interface_up_down (vnet_main_t *vnm, + u32 hw_if_index, u32 flags) +{ + if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP) + { + /* TODO */ + } + return 0; +} + +VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bfd_hw_interface_up_down); + +/* + * setup function + */ +static clib_error_t *bfd_udp_init (vlib_main_t *vm) +{ + mhash_init (&bfd_udp_main.bfd_session_idx_by_bfd_key, sizeof (uword), + sizeof (bfd_udp_key_t)); + bfd_udp_main.bfd_main = &bfd_main; + udp_register_dst_port (vm, UDP_DST_PORT_bfd4, bfd_udp4_input_node.index, 1); + udp_register_dst_port (vm, UDP_DST_PORT_bfd6, bfd_udp6_input_node.index, 0); + return 0; +} + +VLIB_INIT_FUNCTION (bfd_udp_init); diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h new file mode 100644 index 00000000..51f5327b --- /dev/null +++ b/src/vnet/bfd/bfd_udp.h @@ -0,0 +1,56 @@ +/* * Copyright (c) 2011-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief BFD global declarations + */ + +#ifndef __included_bfd_udp_h__ +#define __included_bfd_udp_h__ + +#include +#include +#include + +#define BFD_UDP_KEY_BODY + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + + u32 sw_if_index; + ip46_address_t local_addr; + ip46_address_t peer_addr; + +}) bfd_udp_key_t; +/* *INDENT-ON* */ + +typedef struct +{ + bfd_udp_key_t key; + + adj_index_t adj_index; +} bfd_udp_session_t; + +void bfd_add_udp_transport (vlib_main_t * vm, vlib_buffer_t * b, + bfd_udp_session_t * bs); + +#endif /* __included_bfd_udp_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bfd/dir.dox b/src/vnet/bfd/dir.dox new file mode 100644 index 00000000..ed656b52 --- /dev/null +++ b/src/vnet/bfd/dir.dox @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + @dir vnet/vnet/bfd + @brief Bidirectional Forwarding Detection (BFD) implementation +*/ -- cgit 1.2.3-korg From fdc62abdc113ea63dc867375bd49ef3043dcd290 Mon Sep 17 00:00:00 2001 From: Chris Luke Date: Wed, 28 Dec 2016 09:44:47 -0500 Subject: Repair Doxygen build infrastructure After Gerrit 4430 much of the documentation failed to build, but silently so it was easily missed; equally missing that several paths have been missing for a while. - Correct paths after directory tree changes. - Doxygen now bails when input paths don't exist. - Fix up some of the less deranged entries in the documentation index. - Exclude the LUA tree, its documentation is a mess. Change-Id: I35e6b433feee5e05bca772d93aa1635c724db734 Signed-off-by: Chris Luke --- README.md | 30 +++++++++++----------- doxygen/Makefile | 38 +++++++++++++++++++-------- doxygen/user_doc.md | 2 ++ src/vnet/bfd/bfd_doc.md | 4 ++- src/vnet/span/span.md | 65 ----------------------------------------------- src/vnet/span/span_doc.md | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 112 insertions(+), 92 deletions(-) delete mode 100644 src/vnet/span/span.md create mode 100644 src/vnet/span/span_doc.md (limited to 'src/vnet/bfd') diff --git a/README.md b/README.md index cf43a4df..8520b395 100644 --- a/README.md +++ b/README.md @@ -30,22 +30,22 @@ Directory name | Description build-root | Build output directory doxygen | Documentation generator configuration dpdk | DPDK patches and build infrastructure - g2 | Event log visualization tool - perftool | Performance tool -@ref plugins | VPP bundled plugins directory -@ref svm | Shared virtual memory allocation library +@ref plugins | Not-yet-relocated VPP bundled plugins directory +@ref src | VPP source code +@ref src/plugins | VPP bundled plugins directory +@ref src/svm | Shared virtual memory allocation library + src/tests | Unit tests + src/vat | VPP API test program +@ref src/vlib | VPP application library +@ref src/vlibapi | VPP API library +@ref src/vlibmemory | VPP Memory management +@ref src/vlibsocket | VPP Socket I/O +@ref src/vnet | VPP networking +@ref src/vpp | VPP application +@ref src/vpp-api | VPP application API bindings +@ref src/vppinfra | VPP core library test | Unit tests -@ref vlib | VPP application library source -@ref vlib-api | VPP API library source -@ref vnet | VPP networking source -@ref vpp | VPP application source -@ref vpp-api | VPP application API source - vppapigen | VPP API generator source - vpp-api-test | VPP API test program source -@ref vppinfra | VPP core library source - -(If the page you are viewing is not generated by Doxygen then -ignore any @@ref labels in the above table.) +@ref vpp-api | Not-yet-relocated API bindings ## Getting started diff --git a/doxygen/Makefile b/doxygen/Makefile index 92fa3635..ffce3c13 100644 --- a/doxygen/Makefile +++ b/doxygen/Makefile @@ -42,13 +42,17 @@ DOC_MAC_PY_DEPENDS = pyparsing jinja2 DOXY_DIR ?= $(WS_ROOT)/doxygen # Primary source directories +DOXY_SRC ?= src DOXY_SRC_DIRECTORIES = \ - vppinfra \ - svm \ - vlib \ - vlib-api \ - vnet \ - vpp \ + $(DOXY_SRC)/vppinfra \ + $(DOXY_SRC)/svm \ + $(DOXY_SRC)/vlib \ + $(DOXY_SRC)/vlibapi \ + $(DOXY_SRC)/vlibmemory \ + $(DOXY_SRC)/vlibsocket \ + $(DOXY_SRC)/vnet \ + $(DOXY_SRC)/vpp \ + $(DOXY_SRC)/vpp-api \ vpp-api # Input directories and files @@ -56,6 +60,7 @@ DOXY_INPUT ?= \ $(wildcard $(WS_ROOT)/*.md) \ $(wildcard $(DOXY_DIR)/*.md) \ $(DOXY_SRC_DIRECTORIES) \ + $(DOXY_SRC)/plugins \ plugins # Strip leading workspace path from input names @@ -67,9 +72,10 @@ DOXY_INPUT := $(subst $(WS_ROOT)/,,$(DOXY_INPUT)) # there's a DPDK equivalent that conflicts. # These must be left-anchored paths for the regexp below to work. DOXY_EXCLUDE ?= \ - vlib/vlib/buffer.c \ - vlib/example \ - plugins/sample-plugin + $(DOXY_SRC)/vlib/vlib/buffer.c \ + $(DOXY_SRC)/vlib/example \ + plugins/sample-plugin \ + vpp-api/lua # Generate a regexp for filenames to exclude DOXY_EXCLUDE_REGEXP = ($(subst .,\.,$(shell echo '$(strip $(DOXY_EXCLUDE))' | sed -e 's/ /|/g'))) @@ -164,9 +170,11 @@ endif bootstrap-doxygen: $(BR)/.doxygen-bootstrap.ok .DELETE_ON_ERROR: $(BR)/.doxygen-siphon.dep -$(BR)/.doxygen-siphon.dep: Makefile +$(BR)/.doxygen-siphon.dep: Makefile \ + $(addprefix,$(WSROOT),$(DOXY_INPUT)) @echo "Building siphon dependencies..." - @set -e; rm -f "$@"; for input in $(DOXY_INPUT); do \ + @rm -f "$@"; for input in $(DOXY_INPUT); do \ + [ -e "$(WS_ROOT)/$$input" ] && \ find "$(WS_ROOT)/$$input" -type f \ \( -name '*.[ch]' -or -name '*.dox' \) -print \ | grep -v -E '^$(WS_ROOT)/$(DOXY_EXCLUDE_REGEXP)' \ @@ -182,7 +190,15 @@ $(BR)/.doxygen-siphon.dep: Makefile .NOTPARALLEL: $(SIPHON_FILES) $(SIPHON_FILES): $(BR)/.doxygen-bootstrap.ok \ $(DOXY_DIR)/siphon-generate \ + $(addprefix,$(WSROOT),$(DOXY_INPUT)) \ $(wildcard $(DOXY_DIR)/siphon/*.py) + @echo "Validating source tree..." + @set -e; for input in $(DOXY_INPUT); do \ + if [ ! -e "$(WS_ROOT)/$$input" ]; then \ + echo "ERROR: Input path '$$input' does not exist." >&2; \ + exit 1; \ + fi; \ + done @rm -rf "$(SIPHON_INPUT)" "$(SIPHON_OUTPUT)" @mkdir -p "$(SIPHON_INPUT)" "$(SIPHON_OUTPUT)" @touch $(SIPHON_INPUT)/files diff --git a/doxygen/user_doc.md b/doxygen/user_doc.md index 2e87c877..40303439 100644 --- a/doxygen/user_doc.md +++ b/doxygen/user_doc.md @@ -12,3 +12,5 @@ Several modules provide operational, dataplane-user focused documentation. - @subpage ioam_plugin_doc - @subpage lb_plugin_doc - @subpage flowperpkt_plugin_doc +- @subpage span_doc +- @subpage bfd_doc diff --git a/src/vnet/bfd/bfd_doc.md b/src/vnet/bfd/bfd_doc.md index 1333ed77..3e86b178 100644 --- a/src/vnet/bfd/bfd_doc.md +++ b/src/vnet/bfd/bfd_doc.md @@ -1 +1,3 @@ -TODO +# BFD Notes {#bfd_doc} + +@todo Someone needs to produce this or remove the stub file. diff --git a/src/vnet/span/span.md b/src/vnet/span/span.md deleted file mode 100644 index ee3f814f..00000000 --- a/src/vnet/span/span.md +++ /dev/null @@ -1,65 +0,0 @@ -# VPP SPAN implementation - -This is a memo intended to contain documentation of the VPP SPAN implementation. -Everything that is not directly obvious should come here. - - -## Switched Port Analyzer (SPAN) -Port mirroring is used on a network switch to send a copy of network packets seen on one switch port to a network monitoring connection on another switch port. -Can be used by network engineers or administrators to measure performnce, analyze and debug data or diagnose errors on a network. - -### RX traffic node -There is one static node to mirror incomming packets. -* span-input: Creates a copy of incomming buffer due to incomming buffers can be reused internally. - -Chaining: dpdk-input -> span-input -> -* original buffer is sent to ethernet-input for processing -* buffer copy is sent to interface-output - -### Configuration -SPAN supports the following CLI configuration commands: - -#### Enable/Disable SPAN (CLI) - set interface span [disable | destination ] - -: mirrored interface name -destination : monitoring interface name -disable: delete mirroring - -#### Enable/Disabl SPAN (API) -SPAN supports the following API configuration command: - sw_interface_span_enable_disable src GigabitEthernet0/8/0 dst GigabitEthernet0/9/0 - sw_interface_span_enable_disable src_sw_if_index 1 dst_sw_if_index 2 - -src/src_sw_if_index: mirrored interface name -dst/dst_sw_if_index: monitoring interface name - -#### Remove SPAN entry (API) -SPAN supports the following API configuration command: - sw_interface_span_enable_disable src_sw_if_index 1 dst_sw_if_index 2 disable - -src_sw_if_index: mirrored interface name -dst_sw_if_index: monitoring interface name - -### Configuration example - -Mirror all packets on interface GigabitEthernet0/10/0 to interface GigabitEthernet0/11/0. - -Configure IPv4 addresses on mirrored interface: -set interface ip address GigabitEthernet0/10/0 192.168.1.13/24 -set interface state GigabitEthernet0/10/0 up - -Configure IPv4 addresses on monitoring interface: -set interface ip address GigabitEthernet0/11/0 192.168.2.13/24 -set interface state GigabitEthernet0/11/0 up - -Configure SPAN -set span src GigabitEthernet0/10/0 dst GigabitEthernet0/11/0 - -### Operational data - -Active SPAN mirroring CLI show command: - show interfaces span - -Active SPAN mirroring API dump command: - sw_interface_span_dump diff --git a/src/vnet/span/span_doc.md b/src/vnet/span/span_doc.md new file mode 100644 index 00000000..46480b28 --- /dev/null +++ b/src/vnet/span/span_doc.md @@ -0,0 +1,65 @@ +# VPP SPAN implementation {#span_doc} + +This is a memo intended to contain documentation of the VPP SPAN implementation. +Everything that is not directly obvious should come here. + + +## Switched Port Analyzer (SPAN) +Port mirroring is used on a network switch to send a copy of network packets seen on one switch port to a network monitoring connection on another switch port. +Can be used by network engineers or administrators to measure performnce, analyze and debug data or diagnose errors on a network. + +### RX traffic node +There is one static node to mirror incomming packets. +* span-input: Creates a copy of incomming buffer due to incomming buffers can be reused internally. + +Chaining: dpdk-input -> span-input -> +* original buffer is sent to ethernet-input for processing +* buffer copy is sent to interface-output + +### Configuration +SPAN supports the following CLI configuration commands: + +#### Enable/Disable SPAN (CLI) + set interface span [disable | destination ] + +: mirrored interface name +destination : monitoring interface name +disable: delete mirroring + +#### Enable/Disabl SPAN (API) +SPAN supports the following API configuration command: + sw_interface_span_enable_disable src GigabitEthernet0/8/0 dst GigabitEthernet0/9/0 + sw_interface_span_enable_disable src_sw_if_index 1 dst_sw_if_index 2 + +src/src_sw_if_index: mirrored interface name +dst/dst_sw_if_index: monitoring interface name + +#### Remove SPAN entry (API) +SPAN supports the following API configuration command: + sw_interface_span_enable_disable src_sw_if_index 1 dst_sw_if_index 2 disable + +src_sw_if_index: mirrored interface name +dst_sw_if_index: monitoring interface name + +### Configuration example + +Mirror all packets on interface GigabitEthernet0/10/0 to interface GigabitEthernet0/11/0. + +Configure IPv4 addresses on mirrored interface: +set interface ip address GigabitEthernet0/10/0 192.168.1.13/24 +set interface state GigabitEthernet0/10/0 up + +Configure IPv4 addresses on monitoring interface: +set interface ip address GigabitEthernet0/11/0 192.168.2.13/24 +set interface state GigabitEthernet0/11/0 up + +Configure SPAN +set span src GigabitEthernet0/10/0 dst GigabitEthernet0/11/0 + +### Operational data + +Active SPAN mirroring CLI show command: + show interfaces span + +Active SPAN mirroring API dump command: + sw_interface_span_dump -- cgit 1.2.3-korg From 3e0a35613602ff7abf7348f7652b1d29b1352d1f Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Mon, 19 Dec 2016 09:05:21 +0100 Subject: BFD: immediately honor reduced remote_min_rx interval Change-Id: I7f09b45c926557d2ad0e2706b38fa56ff8194a3d Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_main.c | 42 ++++++++++++------------ src/vnet/bfd/bfd_main.h | 10 ++++-- src/vnet/bfd/bfd_udp.c | 1 + test/test_bfd.py | 86 +++++++++++++++++++++++++++++++++++-------------- 4 files changed, 93 insertions(+), 46 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index e25eadfc..62be1842 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -126,38 +126,36 @@ bfd_calc_next_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now) if (bs->local_detect_mult > 1) { /* common case - 75-100% of transmit interval */ - bs->tx_timeout_clocks = now + + bs->tx_timeout_clocks = bs->last_tx_clocks + (1 - .25 * (random_f64 (&bm->random_seed))) * bs->transmit_interval_clocks; if (bs->tx_timeout_clocks < now) { - /* huh, we've missed it already, skip the missed events */ - const u64 missed = - (now - bs->tx_timeout_clocks) / bs->transmit_interval_clocks; - BFD_ERR ("Missed %lu transmit events (now is %lu, calc " - "tx_timeout is %lu)!", - missed, now, bs->tx_timeout_clocks); - bs->tx_timeout_clocks += - (missed + 1) * bs->transmit_interval_clocks; + /* huh, we've missed it already, transmit now */ + BFD_DBG ("Missed %lu transmit events (now is %lu, calc " + "tx_timeout is %lu)", + (now - bs->tx_timeout_clocks) / + bs->transmit_interval_clocks, + now, bs->tx_timeout_clocks); + bs->tx_timeout_clocks = now; } } else { /* special case - 75-90% of transmit interval */ bs->tx_timeout_clocks = - now + + bs->last_tx_clocks + (.9 - .15 * (random_f64 (&bm->random_seed))) * bs->transmit_interval_clocks; if (bs->tx_timeout_clocks < now) { - /* huh, we've missed it already, skip the missed events */ - const u64 missed = - (now - bs->tx_timeout_clocks) / bs->transmit_interval_clocks; - BFD_ERR ("Missed %lu transmit events (now is %lu, calc " - "tx_timeout is %lu)!", - missed, now, bs->tx_timeout_clocks); - bs->tx_timeout_clocks += - (missed + 1) * bs->transmit_interval_clocks; + /* huh, we've missed it already, transmit now */ + BFD_DBG ("Missed %lu transmit events (now is %lu, calc " + "tx_timeout is %lu)", + (now - bs->tx_timeout_clocks) / + bs->transmit_interval_clocks, + now, bs->tx_timeout_clocks); + bs->tx_timeout_clocks = now; } } } @@ -485,7 +483,7 @@ bfd_init_control_frame (vlib_buffer_t * b, bfd_session_t * bs) pkt->your_disc = bs->remote_discr; pkt->des_min_tx = clib_host_to_net_u32 (bs->desired_min_tx_us); pkt->req_min_rx = clib_host_to_net_u32 (bs->required_min_rx_us); - pkt->req_min_echo_rx = clib_host_to_net_u32 (0); /* FIXME */ + pkt->req_min_echo_rx = clib_host_to_net_u32 (bs->required_min_echo_rx_us); b->current_length = bfd_length; } @@ -519,6 +517,7 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, } bfd_init_control_frame (b, bs); bfd_add_transport_layer (vm, b, bs); + bs->last_tx_clocks = now; bfd_calc_next_tx (bm, bs, now); } else @@ -537,6 +536,7 @@ bfd_send_final (vlib_main_t * vm, vlib_buffer_t * b, bfd_session_t * bs) bfd_init_control_frame (b, bs); bfd_pkt_set_final (vlib_buffer_get_current (b)); bfd_add_transport_layer (vm, b, bs); + bs->last_tx_clocks = clib_cpu_time_now (); } static void @@ -946,6 +946,7 @@ format_bfd_session (u8 * s, va_list * args) "bfd.LocalDiag=%s, " "bfd.DesiredMinTxInterval=%u, " "bfd.RequiredMinRxInterval=%u, " + "bfd.RequiredMinEchoRxInterval=%u, " "bfd.RemoteMinRxInterval=%u, " "bfd.DemandMode=%s, " "bfd.RemoteDemandMode=%s, " @@ -954,7 +955,8 @@ format_bfd_session (u8 * s, va_list * args) bfd_state_string (bs->remote_state), bs->local_discr, bs->remote_discr, bfd_diag_code_string (bs->local_diag), bs->desired_min_tx_us, bs->required_min_rx_us, - bs->remote_min_rx_us, (bs->local_demand ? "yes" : "no"), + bs->required_min_echo_rx_us, bs->remote_min_rx_us, + (bs->local_demand ? "yes" : "no"), (bs->remote_demand ? "yes" : "no"), bs->local_detect_mult); } diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index c72ea92a..cc82c839 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -75,16 +75,19 @@ typedef struct /* desired min tx interval (clocks) */ u64 desired_min_tx_clocks; - /* required min rx interval */ + /* required min rx interval (microseconds) */ u32 required_min_rx_us; + /* required min echo rx interval (microseconds) */ + u32 required_min_echo_rx_us; + /* remote min rx interval (microseconds) */ u32 remote_min_rx_us; /* remote min rx interval (clocks) */ u64 remote_min_rx_clocks; - /* remote desired min tx interval */ + /* remote desired min tx interval (microseconds) */ u32 remote_desired_min_tx_us; /* 1 if in demand mode, 0 otherwise */ @@ -108,6 +111,9 @@ typedef struct /* next time at which to transmit a packet */ u64 tx_timeout_clocks; + /* timestamp of last packet transmitted */ + u64 last_tx_clocks; + /* timestamp of last packet received */ u64 last_rx_clocks; diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 3c747d86..677f1e22 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -146,6 +146,7 @@ bfd_udp_add_session_internal (bfd_udp_main_t *bum, u32 sw_if_index, } bs->config_desired_min_tx_us = desired_min_tx_us; bs->required_min_rx_us = required_min_rx_us; + bs->required_min_echo_rx_us = required_min_rx_us; /* FIXME */ bs->local_detect_mult = detect_mult; bfd_session_start (bum->bfd_main, bs); return 0; diff --git a/test/test_bfd.py b/test/test_bfd.py index 1ea69f55..4aa99533 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -7,6 +7,8 @@ from bfd import * from framework import * from util import ppp +us_in_sec = 1000000 + class BFDAPITestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD) - API""" @@ -74,6 +76,7 @@ def verify_udp(test, packet): class BFDTestSession(object): + """ BFD session as seen from test framework side """ def __init__(self, test, interface, detect_mult=3): self.test = test @@ -90,7 +93,9 @@ class BFDTestSession(object): def create_packet(self): packet = create_packet(self.interface) + self.test.logger.debug("BFD: Creating packet") for name, value in self.bfd_values.iteritems(): + self.test.logger.debug("BFD: setting packet.%s=%s", name, value) packet[BFD].setfieldval(name, value) return packet @@ -134,11 +139,12 @@ class BFDTestCase(VppTestCase): self.vpp_session.add_vpp_config() self.vpp_session.admin_up() self.test_session = BFDTestSession(self, self.pg0) + self.test_session.update(required_min_rx_interval=100000) def tearDown(self): - self.vapi.want_bfd_events(enable_disable=0) self.vapi.collect_events() # clear the event queue if not self.vpp_dead: + self.vapi.want_bfd_events(enable_disable=0) self.vpp_session.remove_vpp_config() super(BFDTestCase, self).tearDown() @@ -167,8 +173,16 @@ class BFDTestCase(VppTestCase): self.assert_equal(e.state, expected_state, BFDState) def wait_for_bfd_packet(self, timeout=1): + """ wait for BFD packet + + :param timeout: how long to wait max + + :returns: tuple (packet, time spent waiting for packet) + """ self.logger.info("BFD: Waiting for BFD packet") + before = time.time() p = self.pg0.wait_for_packet(timeout=timeout) + after = time.time() bfd = p[BFD] if bfd is None: raise Exception(ppp("Unexpected or invalid BFD packet:", p)) @@ -177,7 +191,26 @@ class BFDTestCase(VppTestCase): verify_ip(self, p, self.pg0.local_ip4, self.pg0.remote_ip4) verify_udp(self, p) self.test_session.verify_packet(p) - return p + return p, after - before + + def bfd_session_up(self): + self.pg_enable_capture([self.pg0]) + self.logger.info("BFD: Waiting for slow hello") + p, ttp = self.wait_for_bfd_packet() + self.logger.info("BFD: Sending Init") + self.test_session.update(my_discriminator=randint(0, 40000000), + your_discriminator=p[BFD].my_discriminator, + state=BFDState.init) + self.test_session.send_packet() + self.logger.info("BFD: Waiting for event") + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + self.verify_event(e, expected_state=BFDState.up) + self.logger.info("BFD: Session is Up") + self.test_session.update(state=BFDState.up) + + def test_session_up(self): + """ bring BFD session up """ + self.bfd_session_up() def test_slow_timer(self): """ verify slow periodic control frames while session down """ @@ -198,7 +231,7 @@ class BFDTestCase(VppTestCase): def test_zero_remote_min_rx(self): """ no packets when zero BFD RemoteMinRxInterval """ self.pg_enable_capture([self.pg0]) - p = self.wait_for_bfd_packet(2) + p, timeout = self.wait_for_bfd_packet(2) self.test_session.update(my_discriminator=randint(0, 40000000), your_discriminator=p[BFD].my_discriminator, state=BFDState.init, @@ -213,26 +246,6 @@ class BFDTestCase(VppTestCase): return raise Exception(ppp("Received unexpected BFD packet:", p)) - def bfd_session_up(self): - self.pg_enable_capture([self.pg0]) - self.logger.info("BFD: Waiting for slow hello") - p = self.wait_for_bfd_packet(2) - self.logger.info("BFD: Sending Init") - self.test_session.update(my_discriminator=randint(0, 40000000), - your_discriminator=p[BFD].my_discriminator, - state=BFDState.init, - required_min_rx_interval=100000) - self.test_session.send_packet() - self.logger.info("BFD: Waiting for event") - e = self.vapi.wait_for_event(1, "bfd_udp_session_details") - self.verify_event(e, expected_state=BFDState.up) - self.logger.info("BFD: Session is Up") - self.test_session.update(state=BFDState.up) - - def test_session_up(self): - """ bring BFD session up """ - self.bfd_session_up() - def test_hold_up(self): """ hold BFD session up """ self.bfd_session_up() @@ -260,7 +273,7 @@ class BFDTestCase(VppTestCase): self.test_session.send_packet() now = time.time() count = 0 - while time.time() < now + interval / 1000000: + while time.time() < now + interval / us_in_sec: try: p = self.wait_for_bfd_packet() if count > 1: @@ -270,6 +283,31 @@ class BFDTestCase(VppTestCase): pass self.assert_in_range(count, 0, 1, "number of packets received") + def test_immediate_remote_min_rx_reduce(self): + """ immediately honor remote min rx reduction """ + self.vpp_session.remove_vpp_config() + self.vpp_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, + desired_min_tx=10000) + self.vpp_session.add_vpp_config() + self.test_session.update(desired_min_tx_interval=1000000, + required_min_rx_interval=1000000) + self.bfd_session_up() + self.wait_for_bfd_packet() + interval = 100000 + self.test_session.update(required_min_rx_interval=interval) + self.test_session.send_packet() + p, ttp = self.wait_for_bfd_packet() + # allow extra 10% to work around timing issues, first packet is special + self.assert_in_range(ttp, 0, 1.10 * interval / us_in_sec, + "time between BFD packets") + p, ttp = self.wait_for_bfd_packet() + self.assert_in_range(ttp, .9 * .75 * interval / us_in_sec, + 1.10 * interval / us_in_sec, + "time between BFD packets") + p, ttp = self.wait_for_bfd_packet() + self.assert_in_range(ttp, .9 * .75 * interval / us_in_sec, + 1.10 * interval / us_in_sec, + "time between BFD packets") if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) -- cgit 1.2.3-korg From 10db26f7bfed97022734fb808bd56532fdda48c5 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Wed, 11 Jan 2017 08:16:53 +0100 Subject: BFD: fix bfd_udp_add API Fix reporting of bs_index in the return message. Enhance test suite to cover this case. Change-Id: I37d35b850818bc1a05abe67ca919c22aeac978b6 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_api.c | 43 ++++++++++++++++++++----------------------- src/vnet/bfd/bfd_api.h | 3 ++- src/vnet/bfd/bfd_udp.c | 12 +++++++----- test/bfd.py | 10 +++++++--- test/framework.py | 3 +++ test/test_bfd.py | 33 +++++++++++++++++++++++++-------- test/vpp_object.py | 21 ++++++++++++--------- 7 files changed, 76 insertions(+), 49 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c index 126cf29a..2e63fe90 100644 --- a/src/vnet/bfd/bfd_api.c +++ b/src/vnet/bfd/bfd_api.c @@ -43,12 +43,12 @@ #include -#define foreach_vpe_api_msg \ -_(BFD_UDP_ADD, bfd_udp_add) \ -_(BFD_UDP_DEL, bfd_udp_del) \ -_(BFD_UDP_SESSION_DUMP, bfd_udp_session_dump) \ -_(BFD_SESSION_SET_FLAGS, bfd_session_set_flags) \ -_(WANT_BFD_EVENTS, want_bfd_events) +#define foreach_vpe_api_msg \ + _ (BFD_UDP_ADD, bfd_udp_add) \ + _ (BFD_UDP_DEL, bfd_udp_del) \ + _ (BFD_UDP_SESSION_DUMP, bfd_udp_session_dump) \ + _ (BFD_SESSION_SET_FLAGS, bfd_session_set_flags) \ + _ (WANT_BFD_EVENTS, want_bfd_events) pub_sub_handler (bfd_events, BFD_EVENTS); @@ -75,13 +75,16 @@ vl_api_bfd_udp_add_t_handler (vl_api_bfd_udp_add_t * mp) clib_memcpy (&peer_addr.ip4, mp->peer_addr, sizeof (peer_addr.ip4)); } + u32 bs_index = 0; rv = bfd_udp_add_session (clib_net_to_host_u32 (mp->sw_if_index), clib_net_to_host_u32 (mp->desired_min_tx), clib_net_to_host_u32 (mp->required_min_rx), - mp->detect_mult, &local_addr, &peer_addr); + mp->detect_mult, &local_addr, &peer_addr, + &bs_index); BAD_SW_IF_INDEX_LABEL; - REPLY_MACRO (VL_API_BFD_UDP_ADD_REPLY); + REPLY_MACRO2 (VL_API_BFD_UDP_ADD_REPLY, + rmp->bs_index = clib_host_to_net_u32 (bs_index)); } static void @@ -107,9 +110,8 @@ vl_api_bfd_udp_del_t_handler (vl_api_bfd_udp_del_t * mp) clib_memcpy (&peer_addr.ip4, mp->peer_addr, sizeof (peer_addr.ip4)); } - rv = - bfd_udp_del_session (clib_net_to_host_u32 (mp->sw_if_index), &local_addr, - &peer_addr); + rv = bfd_udp_del_session (clib_net_to_host_u32 (mp->sw_if_index), + &local_addr, &peer_addr); BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_BFD_UDP_DEL_REPLY); @@ -201,14 +203,12 @@ vl_api_bfd_session_set_flags_t_handler (vl_api_bfd_session_set_flags_t * mp) vl_api_bfd_session_set_flags_reply_t *rmp; int rv; - rv = - bfd_session_set_flags (clib_net_to_host_u32 (mp->bs_index), - mp->admin_up_down); + rv = bfd_session_set_flags (clib_net_to_host_u32 (mp->bs_index), + mp->admin_up_down); REPLY_MACRO (VL_API_BFD_SESSION_SET_FLAGS_REPLY); } - /* * bfd_api_hookup * Add vpe's API message handlers to the table. @@ -223,7 +223,7 @@ vl_api_bfd_session_set_flags_t_handler (vl_api_bfd_session_set_flags_t * mp) static void setup_message_id_table (api_main_t * am) { -#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); +#define _(id, n, crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); foreach_vl_msg_name_crc_bfd; #undef _ } @@ -233,13 +233,10 @@ bfd_api_hookup (vlib_main_t * vm) { api_main_t *am = &api_main; -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); +#define _(N, n) \ + vl_msg_api_set_handlers (VL_API_##N, #n, vl_api_##n##_t_handler, \ + vl_noop_handler, vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, sizeof (vl_api_##n##_t), 1); foreach_vpe_api_msg; #undef _ diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h index cfcd04f3..a9bc5a1f 100644 --- a/src/vnet/bfd/bfd_api.h +++ b/src/vnet/bfd/bfd_api.h @@ -27,7 +27,8 @@ vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, u32 desired_min_tx_us, u32 required_min_rx_us, u8 detect_mult, const ip46_address_t * local_addr, - const ip46_address_t * peer_addr); + const ip46_address_t * peer_addr, + u32 * bs_index); vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, const ip46_address_t * local_addr, diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 677f1e22..c1596bf6 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -95,7 +95,7 @@ static vnet_api_error_t bfd_udp_add_session_internal (bfd_udp_main_t *bum, u32 sw_if_index, u32 desired_min_tx_us, u32 required_min_rx_us, u8 detect_mult, const ip46_address_t *local_addr, - const ip46_address_t *peer_addr) + const ip46_address_t *peer_addr, u32 *bs_index) { vnet_sw_interface_t *sw_if = vnet_get_sw_interface (vnet_get_main (), sw_if_index); @@ -149,6 +149,7 @@ bfd_udp_add_session_internal (bfd_udp_main_t *bum, u32 sw_if_index, bs->required_min_echo_rx_us = required_min_rx_us; /* FIXME */ bs->local_detect_mult = detect_mult; bfd_session_start (bum->bfd_main, bs); + *bs_index = bs->bs_idx; return 0; } @@ -224,7 +225,8 @@ bfd_udp_validate_api_input (u32 sw_if_index, const ip46_address_t *local_addr, vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, u32 desired_min_tx_us, u32 required_min_rx_us, u8 detect_mult, const ip46_address_t *local_addr, - const ip46_address_t *peer_addr) + const ip46_address_t *peer_addr, + u32 *bs_index) { vnet_api_error_t rv = bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); @@ -242,9 +244,9 @@ vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, u32 desired_min_tx_us, BFD_ERR ("desired_min_tx_us < 1"); return VNET_API_ERROR_INVALID_ARGUMENT; } - return bfd_udp_add_session_internal (&bfd_udp_main, sw_if_index, - desired_min_tx_us, required_min_rx_us, - detect_mult, local_addr, peer_addr); + return bfd_udp_add_session_internal ( + &bfd_udp_main, sw_if_index, desired_min_tx_us, required_min_rx_us, + detect_mult, local_addr, peer_addr, bs_index); } vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, diff --git a/test/bfd.py b/test/bfd.py index fe63264e..57a5bd86 100644 --- a/test/bfd.py +++ b/test/bfd.py @@ -145,8 +145,8 @@ class VppBFDUDPSession(VppObject): session = s break if session is None: - raise Exception( - "Could not find BFD session in VPP response: %s" % repr(result)) + raise Exception("Could not find BFD session in VPP response: %s" % + repr(result)) return session.state @property @@ -185,6 +185,7 @@ class VppBFDUDPSession(VppObject): self.peer_addr_n, is_ipv6=is_ipv6) self._bs_index = result.bs_index + self._test.registry.register(self, self.test.logger) def query_vpp_config(self): result = self.test.vapi.bfd_udp_session_dump() @@ -202,7 +203,7 @@ class VppBFDUDPSession(VppObject): return True def remove_vpp_config(self): - if hasattr(self, '_bs_index'): + if self._bs_index is not None: is_ipv6 = 1 if AF_INET6 == self._af else 0 self.test.vapi.bfd_udp_del( self._interface.sw_if_index, @@ -213,5 +214,8 @@ class VppBFDUDPSession(VppObject): def object_id(self): return "bfd-udp-%d" % self.bs_index + def __str__(self): + return self.object_id() + def admin_up(self): self.test.vapi.bfd_session_set_flags(self.bs_index, 1) diff --git a/test/framework.py b/test/framework.py index e364a8f5..896a1e0d 100644 --- a/test/framework.py +++ b/test/framework.py @@ -16,6 +16,7 @@ from vpp_papi_provider import VppPapiProvider from scapy.packet import Raw from logging import FileHandler, DEBUG from log import * +from vpp_object import VppObjectRegistry """ Test framework module. @@ -194,6 +195,7 @@ class VppTestCase(unittest.TestCase): cls._zombie_captures = [] cls.verbose = 0 cls.vpp_dead = False + cls.registry = VppObjectRegistry() print(double_line_delim) print(colorize(getdoc(cls).splitlines()[0], YELLOW)) print(double_line_delim) @@ -290,6 +292,7 @@ class VppTestCase(unittest.TestCase): self.logger.info(self.vapi.ppcli("show hardware")) self.logger.info(self.vapi.ppcli("show error")) self.logger.info(self.vapi.ppcli("show run")) + self.registry.remove_vpp_config(self.logger) def setUp(self): """ Clear trace before running each test""" diff --git a/test/test_bfd.py b/test/test_bfd.py index 4aa99533..b6222524 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -18,14 +18,17 @@ class BFDAPITestCase(VppTestCase): super(BFDAPITestCase, cls).setUpClass() try: - cls.create_pg_interfaces([0]) - cls.pg0.config_ip4() - cls.pg0.resolve_arp() + cls.create_pg_interfaces(range(2)) + for i in cls.pg_interfaces: + i.config_ip4() + i.resolve_arp() except Exception: super(BFDAPITestCase, cls).tearDownClass() raise + + def test_add_bfd(self): """ create a BFD session """ session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) @@ -50,6 +53,16 @@ class BFDAPITestCase(VppTestCase): raise Exception("Expected failure while adding duplicate " "configuration") + def test_add_two(self): + """ create two BFD sessions """ + session1 = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) + session1.add_vpp_config() + session2 = VppBFDUDPSession(self, self.pg1, self.pg1.remote_ip4) + session2.add_vpp_config() + self.assertNotEqual(session1.bs_index, session2.bs_index, + "Different BFD sessions share bs_index (%s)" % + session1.bs_index) + def create_packet(interface, ttl=255, src_port=50000, **kwargs): p = (Ether(src=interface.remote_mac, dst=interface.local_mac) / @@ -114,6 +127,7 @@ class BFDTestSession(object): "BFD - your discriminator") +@unittest.skip("") class BFDTestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD)""" @@ -135,7 +149,8 @@ class BFDTestCase(VppTestCase): def setUp(self): super(BFDTestCase, self).setUp() self.vapi.want_bfd_events() - self.vpp_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) + self.vpp_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip4) self.vpp_session.add_vpp_config() self.vpp_session.admin_up() self.test_session = BFDTestSession(self, self.pg0) @@ -154,8 +169,10 @@ class BFDTestCase(VppTestCase): self.logger.debug("BFD: Event: %s" % repr(e)) self.assert_equal(e.bs_index, self.vpp_session.bs_index, "BFD session index") - self.assert_equal(e.sw_if_index, self.vpp_session.interface.sw_if_index, - "BFD interface index") + self.assert_equal( + e.sw_if_index, + self.vpp_session.interface.sw_if_index, + "BFD interface index") is_ipv6 = 0 if self.vpp_session.af == AF_INET6: is_ipv6 = 1 @@ -286,8 +303,8 @@ class BFDTestCase(VppTestCase): def test_immediate_remote_min_rx_reduce(self): """ immediately honor remote min rx reduction """ self.vpp_session.remove_vpp_config() - self.vpp_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, - desired_min_tx=10000) + self.vpp_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip4, desired_min_tx=10000) self.vpp_session.add_vpp_config() self.test_session.update(desired_min_tx_interval=1000000, required_min_rx_interval=1000000) diff --git a/test/vpp_object.py b/test/vpp_object.py index 2b71fc1f..1997bf55 100644 --- a/test/vpp_object.py +++ b/test/vpp_object.py @@ -42,13 +42,13 @@ class VppObjectRegistry(object): if not hasattr(self, "_object_dict"): self._object_dict = dict() - def register(self, o): + def register(self, o, logger): """ Register an object in the registry. """ - if not o.unique_id() in self._object_dict: + if not o.object_id() in self._object_dict: self._object_registry.append(o) - self._object_dict[o.unique_id()] = o + self._object_dict[o.object_id()] = o else: - print "not adding duplicate %s" % o + logger.debug("REG: duplicate add, ignoring (%s)" % o) def remove_vpp_config(self, logger): """ @@ -56,15 +56,18 @@ class VppObjectRegistry(object): from the registry. """ if not self._object_registry: - logger.info("No objects registered for auto-cleanup.") + logger.info("REG: No objects registered for auto-cleanup.") return - logger.info("Removing VPP configuration for registered objects") + logger.info("REG: Removing VPP configuration for registered objects") + # remove the config in reverse order as there might be dependencies for o in reversed(self._object_registry): if o.query_vpp_config(): - logger.info("Removing %s", o) + logger.info("REG: Removing configuration for %s" % o) o.remove_vpp_config() else: - logger.info("Skipping %s, configuration not present", o) + logger.info( + "REG: Skipping removal for %s, configuration not present" % + o) failed = [] for o in self._object_registry: if o.query_vpp_config(): @@ -72,7 +75,7 @@ class VppObjectRegistry(object): self._object_registry = [] self._object_dict = dict() if failed: - logger.error("Couldn't remove configuration for object(s):") + logger.error("REG: Couldn't remove configuration for object(s):") for x in failed: logger.error(repr(x)) raise Exception("Couldn't remove configuration for object(s): %s" % -- cgit 1.2.3-korg From 46a87adf10d41af4b1b14f06bdab33228cbaae95 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Mon, 2 Jan 2017 08:22:23 +0100 Subject: BFD: IPv6 support Change-Id: Iaa9538c7cca500c04cf2704e5bf87480543cfcdf Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_main.c | 52 +++++----- src/vnet/bfd/bfd_main.h | 2 +- src/vnet/bfd/bfd_udp.c | 245 ++++++++++++++++++++++++++++++++++++++---------- src/vnet/bfd/bfd_udp.h | 6 +- test/bfd.py | 14 ++- test/test_bfd.py | 221 +++++++++++++++++++++++++++---------------- test/util.py | 11 ++- test/vpp_interface.py | 11 ++- 8 files changed, 394 insertions(+), 168 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 62be1842..7e1a2ef2 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -34,16 +34,9 @@ bfd_us_to_clocks (bfd_main_t * bm, u64 us) static vlib_node_registration_t bfd_process_node; -typedef enum -{ -#define F(t, n) BFD_OUTPUT_##t, - foreach_bfd_transport (F) -#undef F - BFD_OUTPUT_N_NEXT, -} bfd_output_next_t; - -static u32 bfd_next_index_by_transport[] = { -#define F(t, n) [BFD_TRANSPORT_##t] = BFD_OUTPUT_##t, +/* set to 0 here, real values filled at startup */ +static u32 bfd_node_index_by_transport[] = { +#define F(t, n) [BFD_TRANSPORT_##t] = 0, foreach_bfd_transport (F) #undef F }; @@ -378,7 +371,7 @@ bfd_input_format_trace (u8 * s, va_list * args) clib_net_to_host_u32 (pkt->des_min_tx)); s = format (s, " required min rx interval: %u\n", clib_net_to_host_u32 (pkt->req_min_rx)); - s = format (s, " required min echo rx interval: %u\n", + s = format (s, " required min echo rx interval: %u", clib_net_to_host_u32 (pkt->req_min_echo_rx)); } } @@ -426,10 +419,12 @@ bfd_add_transport_layer (vlib_main_t * vm, vlib_buffer_t * b, switch (bs->transport) { case BFD_TRANSPORT_UDP4: - /* fallthrough */ + BFD_DBG ("Transport bfd via udp4, bs_idx=%u", bs->bs_idx); + bfd_add_udp4_transport (vm, b, &bs->udp); + break; case BFD_TRANSPORT_UDP6: - BFD_DBG ("Transport bfd via udp, bs_idx=%u", bs->bs_idx); - bfd_add_udp_transport (vm, b, &bs->udp); + BFD_DBG ("Transport bfd via udp6, bs_idx=%u", bs->bs_idx); + bfd_add_udp6_transport (vm, b, &bs->udp); break; } } @@ -448,17 +443,14 @@ bfd_create_frame (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_buffer_t *b = vlib_get_buffer (vm, bi); ASSERT (b->current_data == 0); - u32 *to_next; - u32 n_left_to_next; - - vlib_get_next_frame (vm, rt, bfd_next_index_by_transport[bs->transport], - to_next, n_left_to_next); + vlib_frame_t *f = + vlib_get_frame_to_node (vm, bfd_node_index_by_transport[bs->transport]); + u32 *to_next = vlib_frame_vector_args (f); to_next[0] = bi; - n_left_to_next -= 1; + f->n_vectors = 1; - vlib_put_next_frame (vm, rt, bfd_next_index_by_transport[bs->transport], - n_left_to_next); + vlib_put_frame_to_node (vm, bfd_node_index_by_transport[bs->transport], f); return b; } @@ -680,13 +672,8 @@ VLIB_REGISTER_NODE (bfd_process_node, static) = { .function = bfd_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "bfd-process", - .n_next_nodes = BFD_OUTPUT_N_NEXT, - .next_nodes = - { -#define F(t, n) [BFD_OUTPUT_##t] = n, - foreach_bfd_transport (F) -#undef F - }, + .n_next_nodes = 0, + .next_nodes = {}, }; /* *INDENT-ON* */ @@ -734,6 +721,13 @@ bfd_main_init (vlib_main_t * vm) timing_wheel_init (&bm->wheel, now, bm->cpu_cps); bm->wheel_inaccuracy = 2 << bm->wheel.log2_clocks_per_bin; + vlib_node_t *node = NULL; +#define F(t, n) \ + node = vlib_get_node_by_name (vm, (u8 *)n); \ + bfd_node_index_by_transport[BFD_TRANSPORT_##t] = node->index;\ + BFD_DBG("node '%s' has index %u", n, node->index); + foreach_bfd_transport (F); +#undef F return 0; } diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index cc82c839..20da381a 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -25,7 +25,7 @@ #include #define foreach_bfd_transport(F) \ - F (UDP4, "ip4-rewrite") \ + F (UDP4, "ip4-rewrite") \ F (UDP6, "ip6-rewrite") typedef enum diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index c1596bf6..fe348404 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -31,53 +31,80 @@ static vlib_node_registration_t bfd_udp6_input_node; bfd_udp_main_t bfd_udp_main; -void bfd_udp_transport_to_buffer (vlib_main_t *vm, vlib_buffer_t *b, - bfd_udp_session_t *bus) +void bfd_add_udp4_transport (vlib_main_t *vm, vlib_buffer_t *b, + bfd_udp_session_t *bus) { udp_header_t *udp; - u16 udp_length, ip_length; - bfd_udp_key_t *key = &bus->key; + const bfd_udp_key_t *key = &bus->key; b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; - if (ip46_address_is_ip4 (&key->local_addr)) - { - ip4_header_t *ip4; - const size_t data_size = sizeof (*ip4) + sizeof (*udp); - vlib_buffer_advance (b, -data_size); - ip4 = vlib_buffer_get_current (b); - udp = (udp_header_t *)(ip4 + 1); - memset (ip4, 0, data_size); - ip4->ip_version_and_header_length = 0x45; - ip4->ttl = 255; - ip4->protocol = IP_PROTOCOL_UDP; - ip4->src_address.as_u32 = key->local_addr.ip4.as_u32; - ip4->dst_address.as_u32 = key->peer_addr.ip4.as_u32; - - udp->src_port = clib_host_to_net_u16 (50000); /* FIXME */ - udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); - - /* fix ip length, checksum and udp length */ - ip_length = vlib_buffer_length_in_chain (vm, b); - - ip4->length = clib_host_to_net_u16 (ip_length); - ip4->checksum = ip4_header_checksum (ip4); - - udp_length = ip_length - (sizeof (*ip4)); - udp->length = clib_host_to_net_u16 (udp_length); - } - else - { - BFD_ERR ("not implemented"); - abort (); - } + vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; + vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; + ip4_header_t *ip4; + const size_t headers_size = sizeof (*ip4) + sizeof (*udp); + vlib_buffer_advance (b, -headers_size); + ip4 = vlib_buffer_get_current (b); + udp = (udp_header_t *)(ip4 + 1); + memset (ip4, 0, headers_size); + ip4->ip_version_and_header_length = 0x45; + ip4->ttl = 255; + ip4->protocol = IP_PROTOCOL_UDP; + ip4->src_address.as_u32 = key->local_addr.ip4.as_u32; + ip4->dst_address.as_u32 = key->peer_addr.ip4.as_u32; + + udp->src_port = clib_host_to_net_u16 (50000); /* FIXME */ + udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); + + /* fix ip length, checksum and udp length */ + const u16 ip_length = vlib_buffer_length_in_chain (vm, b); + + ip4->length = clib_host_to_net_u16 (ip_length); + ip4->checksum = ip4_header_checksum (ip4); + + const u16 udp_length = ip_length - (sizeof (*ip4)); + udp->length = clib_host_to_net_u16 (udp_length); } -void bfd_add_udp_transport (vlib_main_t *vm, vlib_buffer_t *b, - bfd_udp_session_t *bus) +void bfd_add_udp6_transport (vlib_main_t *vm, vlib_buffer_t *b, + bfd_udp_session_t *bus) { + udp_header_t *udp; + const bfd_udp_key_t *key = &bus->key; + + b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; - bfd_udp_transport_to_buffer (vm, b, bus); + ip6_header_t *ip6; + const size_t headers_size = sizeof (*ip6) + sizeof (*udp); + vlib_buffer_advance (b, -headers_size); + ip6 = vlib_buffer_get_current (b); + udp = (udp_header_t *)(ip6 + 1); + memset (ip6, 0, headers_size); + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + ip6->hop_limit = 255; + ip6->protocol = IP_PROTOCOL_UDP; + clib_memcpy (&ip6->src_address, &key->local_addr.ip6, + sizeof (ip6->src_address)); + clib_memcpy (&ip6->dst_address, &key->peer_addr.ip6, + sizeof (ip6->dst_address)); + + udp->src_port = clib_host_to_net_u16 (50000); /* FIXME */ + udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); + + /* fix ip payload length and udp length */ + const u16 udp_length = vlib_buffer_length_in_chain (vm, b) - (sizeof (*ip6)); + udp->length = clib_host_to_net_u16 (udp_length); + ip6->payload_length = udp->length; + + /* IPv6 UDP checksum is mandatory */ + int bogus = 0; + udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus); + ASSERT (bogus == 0); + if (udp->checksum == 0) + { + udp->checksum = 0xffff; + } } static bfd_session_t *bfd_lookup_session (bfd_udp_main_t *bum, @@ -345,29 +372,29 @@ static bfd_udp_error_t bfd_udp4_verify_transport (const ip4_header_t *ip4, const bfd_udp_key_t *key = &bus->key; if (ip4->src_address.as_u32 != key->peer_addr.ip4.as_u32) { - BFD_ERR ("IP src addr mismatch, got %U, expected %U", format_ip4_address, - ip4->src_address.as_u32, format_ip4_address, - key->peer_addr.ip4.as_u32); + BFD_ERR ("IPv4 src addr mismatch, got %U, expected %U", + format_ip4_address, ip4->src_address.as_u8, format_ip4_address, + key->peer_addr.ip4.as_u8); return BFD_UDP_ERROR_BAD; } if (ip4->dst_address.as_u32 != key->local_addr.ip4.as_u32) { - BFD_ERR ("IP dst addr mismatch, got %U, expected %U", format_ip4_address, - ip4->dst_address.as_u32, format_ip4_address, - key->local_addr.ip4.as_u32); + BFD_ERR ("IPv4 dst addr mismatch, got %U, expected %U", + format_ip4_address, ip4->dst_address.as_u8, format_ip4_address, + key->local_addr.ip4.as_u8); return BFD_UDP_ERROR_BAD; } const u8 expected_ttl = 255; if (ip4->ttl != expected_ttl) { - BFD_ERR ("IP unexpected TTL value %d, expected %d", ip4->ttl, + BFD_ERR ("IPv4 unexpected TTL value %u, expected %u", ip4->ttl, expected_ttl); return BFD_UDP_ERROR_BAD; } if (clib_net_to_host_u16 (udp->src_port) < 49152 || clib_net_to_host_u16 (udp->src_port) > 65535) { - BFD_ERR ("Invalid UDP src port %d, out of range <49152,65535>", + BFD_ERR ("Invalid UDP src port %u, out of range <49152,65535>", udp->src_port); } return BFD_UDP_ERROR_NONE; @@ -460,10 +487,128 @@ static bfd_udp_error_t bfd_udp4_scan (vlib_main_t *vm, vlib_node_runtime_t *rt, return BFD_UDP_ERROR_NONE; } -static bfd_udp_error_t bfd_udp6_scan (vlib_main_t *vm, vlib_buffer_t *b) +static void bfd_udp6_find_headers (vlib_buffer_t *b, const ip6_header_t **ip6, + const udp_header_t **udp) +{ + /* sanity check first */ + const i32 start = vnet_buffer (b)->ip.start_of_ip_header; + if (start < 0 && start < sizeof (b->pre_data)) + { + BFD_ERR ("Start of ip header is before pre_data, ignoring"); + *ip6 = NULL; + *udp = NULL; + return; + } + *ip6 = (ip6_header_t *)(b->data + start); + if ((u8 *)*ip6 > (u8 *)vlib_buffer_get_current (b)) + { + BFD_ERR ("Start of ip header is beyond current data, ignoring"); + *ip6 = NULL; + *udp = NULL; + return; + } + *udp = (udp_header_t *)((*ip6) + 1); +} + +static bfd_udp_error_t bfd_udp6_verify_transport (const ip6_header_t *ip6, + const udp_header_t *udp, + const bfd_session_t *bs) { - /* TODO */ - return BFD_UDP_ERROR_BAD; + const bfd_udp_session_t *bus = &bs->udp; + const bfd_udp_key_t *key = &bus->key; + if (ip6->src_address.as_u64[0] != key->peer_addr.ip6.as_u64[0] && + ip6->src_address.as_u64[1] != key->peer_addr.ip6.as_u64[1]) + { + BFD_ERR ("IP src addr mismatch, got %U, expected %U", format_ip6_address, + ip6, format_ip6_address, &key->peer_addr.ip6); + return BFD_UDP_ERROR_BAD; + } + if (ip6->dst_address.as_u64[0] != key->local_addr.ip6.as_u64[0] && + ip6->dst_address.as_u64[1] != key->local_addr.ip6.as_u64[1]) + { + BFD_ERR ("IP dst addr mismatch, got %U, expected %U", format_ip6_address, + ip6, format_ip6_address, &key->local_addr.ip6); + return BFD_UDP_ERROR_BAD; + } + const u8 expected_hop_limit = 255; + if (ip6->hop_limit != expected_hop_limit) + { + BFD_ERR ("IPv6 unexpected hop-limit value %u, expected %u", + ip6->hop_limit, expected_hop_limit); + return BFD_UDP_ERROR_BAD; + } + if (clib_net_to_host_u16 (udp->src_port) < 49152 || + clib_net_to_host_u16 (udp->src_port) > 65535) + { + BFD_ERR ("Invalid UDP src port %u, out of range <49152,65535>", + udp->src_port); + } + return BFD_UDP_ERROR_NONE; +} + +static bfd_udp_error_t bfd_udp6_scan (vlib_main_t *vm, vlib_node_runtime_t *rt, + vlib_buffer_t *b, bfd_session_t **bs_out) +{ + const bfd_pkt_t *pkt = vlib_buffer_get_current (b); + if (sizeof (*pkt) > b->current_length) + { + BFD_ERR ( + "Payload size %d too small to hold bfd packet of minimum size %d", + b->current_length, sizeof (*pkt)); + return BFD_UDP_ERROR_BAD; + } + const ip6_header_t *ip6; + const udp_header_t *udp; + bfd_udp6_find_headers (b, &ip6, &udp); + if (!ip6 || !udp) + { + BFD_ERR ("Couldn't find ip6 or udp header"); + return BFD_UDP_ERROR_BAD; + } + if (!bfd_verify_pkt_common (pkt)) + { + return BFD_UDP_ERROR_BAD; + } + bfd_session_t *bs = NULL; + if (pkt->your_disc) + { + BFD_DBG ("Looking up BFD session using discriminator %u", + pkt->your_disc); + bs = bfd_find_session_by_disc (bfd_udp_main.bfd_main, pkt->your_disc); + } + else + { + bfd_udp_key_t key; + memset (&key, 0, sizeof (key)); + key.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + key.local_addr.ip6.as_u64[0] = ip6->dst_address.as_u64[0]; + key.local_addr.ip6.as_u64[1] = ip6->dst_address.as_u64[1]; + key.peer_addr.ip6.as_u64[0] = ip6->src_address.as_u64[0]; + key.peer_addr.ip6.as_u64[1] = ip6->src_address.as_u64[1]; + BFD_DBG ("Looking up BFD session using key (sw_if_index=%u, local=%U, " + "peer=%U)", + key.sw_if_index, format_ip6_address, &key.local_addr, + format_ip6_address, &key.peer_addr); + bs = bfd_lookup_session (&bfd_udp_main, &key); + } + if (!bs) + { + BFD_ERR ("BFD session lookup failed - no session matches BFD pkt"); + return BFD_UDP_ERROR_BAD; + } + BFD_DBG ("BFD session found, bs_idx=%u", bs->bs_idx); + if (!bfd_verify_pkt_session (pkt, b->current_length, bs)) + { + return BFD_UDP_ERROR_BAD; + } + bfd_udp_error_t err; + if (BFD_UDP_ERROR_NONE != (err = bfd_udp6_verify_transport (ip6, udp, bs))) + { + return err; + } + bfd_rpc_update_session (bs->bs_idx, pkt); + *bs_out = bs; + return BFD_UDP_ERROR_NONE; } /* @@ -504,7 +649,7 @@ static uword bfd_udp_input (vlib_main_t *vm, vlib_node_runtime_t *rt, /* scan this bfd pkt. error0 is the counter index to bmp */ if (is_ipv6) { - error0 = bfd_udp6_scan (vm, b0); + error0 = bfd_udp6_scan (vm, rt, b0, &bs); } else { diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h index 51f5327b..2cd89ca2 100644 --- a/src/vnet/bfd/bfd_udp.h +++ b/src/vnet/bfd/bfd_udp.h @@ -42,8 +42,10 @@ typedef struct adj_index_t adj_index; } bfd_udp_session_t; -void bfd_add_udp_transport (vlib_main_t * vm, vlib_buffer_t * b, - bfd_udp_session_t * bs); +void bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, + bfd_udp_session_t * bs); +void bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, + bfd_udp_session_t * bs); #endif /* __included_bfd_udp_h__ */ diff --git a/test/bfd.py b/test/bfd.py index 57a5bd86..51716813 100644 --- a/test/bfd.py +++ b/test/bfd.py @@ -111,14 +111,24 @@ class VppBFDUDPSession(VppObject): def local_addr(self): """ BFD session local address (VPP address) """ if self._local_addr is None: - return self._interface.local_ip4 + if self.af == AF_INET: + return self._interface.local_ip4 + elif self.af == AF_INET6: + return self._interface.local_ip6 + else: + raise Exception("Unexpected af %s' % af" % self.af) return self._local_addr @property def local_addr_n(self): """ BFD session local address (VPP address) - raw, suitable for API """ if self._local_addr is None: - return self._interface.local_ip4n + if self.af == AF_INET: + return self._interface.local_ip4n + elif self.af == AF_INET6: + return self._interface.local_ip6n + else: + raise Exception("Unexpected af %s' % af" % self.af) return self._local_addr_n @property diff --git a/test/test_bfd.py b/test/test_bfd.py index b7832247..b56df339 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -62,36 +62,14 @@ class BFDAPITestCase(VppTestCase): session1.bs_index) -def create_packet(interface, ttl=255, src_port=50000, **kwargs): - p = (Ether(src=interface.remote_mac, dst=interface.local_mac) / - IP(src=interface.remote_ip4, dst=interface.local_ip4, ttl=ttl) / - UDP(sport=src_port, dport=BFD.udp_dport) / - BFD(*kwargs)) - return p - - -def verify_ip(test, packet, local_ip, remote_ip): - """ Verify correctness of IP layer. """ - ip = packet[IP] - test.assert_equal(ip.src, local_ip, "IP source address") - test.assert_equal(ip.dst, remote_ip, "IP destination address") - test.assert_equal(ip.ttl, 255, "IP TTL") - - -def verify_udp(test, packet): - """ Verify correctness of UDP layer. """ - udp = packet[UDP] - test.assert_equal(udp.dport, BFD.udp_dport, "UDP destination port") - test.assert_in_range(udp.sport, BFD.udp_sport_min, BFD.udp_sport_max, - "UDP source port") - - class BFDTestSession(object): """ BFD session as seen from test framework side """ - def __init__(self, test, interface, detect_mult=3): + def __init__(self, test, interface, af, detect_mult=3): self.test = test + self.af = af self.interface = interface + self.udp_sport = 50000 self.bfd_values = { 'my_discriminator': 0, 'desired_min_tx_interval': 100000, @@ -103,7 +81,22 @@ class BFDTestSession(object): self.bfd_values.update(kwargs) def create_packet(self): - packet = create_packet(self.interface) + if self.af == AF_INET6: + packet = (Ether(src=self.interface.remote_mac, + dst=self.interface.local_mac) / + IPv6(src=self.interface.remote_ip6, + dst=self.interface.local_ip6, + hlim=255) / + UDP(sport=self.udp_sport, dport=BFD.udp_dport) / + BFD()) + else: + packet = (Ether(src=self.interface.remote_mac, + dst=self.interface.local_mac) / + IP(src=self.interface.remote_ip4, + dst=self.interface.local_ip4, + ttl=255) / + UDP(sport=self.udp_sport, dport=BFD.udp_dport) / + BFD()) self.test.logger.debug("BFD: Creating packet") for name, value in self.bfd_values.iteritems(): self.test.logger.debug("BFD: setting packet.%s=%s", name, value) @@ -125,41 +118,52 @@ class BFDTestSession(object): "BFD - your discriminator") -@unittest.skip("") -class BFDTestCase(VppTestCase): - """Bidirectional Forwarding Detection (BFD)""" - - @classmethod - def setUpClass(cls): - super(BFDTestCase, cls).setUpClass() - try: - cls.create_pg_interfaces([0]) - cls.pg0.config_ip4() - cls.pg0.generate_remote_hosts() - cls.pg0.configure_ipv4_neighbors() - cls.pg0.admin_up() - cls.pg0.resolve_arp() - - except Exception: - super(BFDTestCase, cls).tearDownClass() - raise - - def setUp(self): - super(BFDTestCase, self).setUp() - self.vapi.want_bfd_events() - self.vpp_session = VppBFDUDPSession( - self, self.pg0, self.pg0.remote_ip4) - self.vpp_session.add_vpp_config() - self.vpp_session.admin_up() - self.test_session = BFDTestSession(self, self.pg0) - self.test_session.update(required_min_rx_interval=100000) +class BFDCommonCode: + """Common code used by both IPv4 and IPv6 Test Cases""" def tearDown(self): self.vapi.collect_events() # clear the event queue if not self.vpp_dead: self.vapi.want_bfd_events(enable_disable=0) self.vpp_session.remove_vpp_config() - super(BFDTestCase, self).tearDown() + + def bfd_session_up(self): + self.pg_enable_capture([self.pg0]) + self.logger.info("BFD: Waiting for slow hello") + p, timeout = self.wait_for_bfd_packet(2) + self.logger.info("BFD: Sending Init") + self.test_session.update(my_discriminator=randint(0, 40000000), + your_discriminator=p[BFD].my_discriminator, + state=BFDState.init, + required_min_rx_interval=100000) + self.test_session.send_packet() + self.logger.info("BFD: Waiting for event") + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + self.verify_event(e, expected_state=BFDState.up) + self.logger.info("BFD: Session is Up") + self.test_session.update(state=BFDState.up) + + def verify_ip(self, packet): + """ Verify correctness of IP layer. """ + if self.vpp_session.af == AF_INET6: + ip = packet[IPv6] + local_ip = self.pg0.local_ip6 + remote_ip = self.pg0.remote_ip6 + self.assert_equal(ip.hlim, 255, "IPv6 hop limit") + else: + ip = packet[IP] + local_ip = self.pg0.local_ip4 + remote_ip = self.pg0.remote_ip4 + self.assert_equal(ip.ttl, 255, "IPv4 TTL") + self.assert_equal(ip.src, local_ip, "IP source address") + self.assert_equal(ip.dst, remote_ip, "IP destination address") + + def verify_udp(self, packet): + """ Verify correctness of UDP layer. """ + udp = packet[UDP] + self.assert_equal(udp.dport, BFD.udp_dport, "UDP destination port") + self.assert_in_range(udp.sport, BFD.udp_sport_min, BFD.udp_sport_max, + "UDP source port") def verify_event(self, event, expected_state): """ Verify correctness of event values. """ @@ -198,35 +202,64 @@ class BFDTestCase(VppTestCase): before = time.time() p = self.pg0.wait_for_packet(timeout=timeout) after = time.time() + self.logger.debug(ppp("Got packet:", p)) bfd = p[BFD] if bfd is None: raise Exception(ppp("Unexpected or invalid BFD packet:", p)) if bfd.payload: raise Exception(ppp("Unexpected payload in BFD packet:", bfd)) - verify_ip(self, p, self.pg0.local_ip4, self.pg0.remote_ip4) - verify_udp(self, p) + self.verify_ip(p) + self.verify_udp(p) self.test_session.verify_packet(p) return p, after - before - def bfd_session_up(self): - self.pg_enable_capture([self.pg0]) - self.logger.info("BFD: Waiting for slow hello") - p, ttp = self.wait_for_bfd_packet() - self.logger.info("BFD: Sending Init") - self.test_session.update(my_discriminator=randint(0, 40000000), - your_discriminator=p[BFD].my_discriminator, - state=BFDState.init) - self.test_session.send_packet() - self.logger.info("BFD: Waiting for event") - e = self.vapi.wait_for_event(1, "bfd_udp_session_details") - self.verify_event(e, expected_state=BFDState.up) - self.logger.info("BFD: Session is Up") - self.test_session.update(state=BFDState.up) - def test_session_up(self): """ bring BFD session up """ self.bfd_session_up() + def test_hold_up(self): + """ hold BFD session up """ + self.bfd_session_up() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + + +class BFD4TestCase(VppTestCase, BFDCommonCode): + """Bidirectional Forwarding Detection (BFD)""" + + @classmethod + def setUpClass(cls): + super(BFD4TestCase, cls).setUpClass() + try: + cls.create_pg_interfaces([0]) + cls.pg0.config_ip4() + cls.pg0.generate_remote_hosts() + cls.pg0.configure_ipv4_neighbors() + cls.pg0.admin_up() + cls.pg0.resolve_arp() + + except Exception: + super(BFD4TestCase, cls).tearDownClass() + raise + + def setUp(self): + super(BFD4TestCase, self).setUp() + self.vapi.want_bfd_events() + try: + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip4) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession(self, self.pg0, AF_INET) + except: + self.vapi.want_bfd_events(enable_disable=0) + raise + + def tearDown(self): + BFDCommonCode.tearDown(self) + super(BFD4TestCase, self).tearDown() + def test_slow_timer(self): """ verify slow periodic control frames while session down """ self.pg_enable_capture([self.pg0]) @@ -261,13 +294,6 @@ class BFDTestCase(VppTestCase): return raise Exception(ppp("Received unexpected BFD packet:", p)) - def test_hold_up(self): - """ hold BFD session up """ - self.bfd_session_up() - for i in range(5): - self.wait_for_bfd_packet() - self.test_session.send_packet() - def test_conn_down(self): """ verify session goes down after inactivity """ self.bfd_session_up() @@ -324,5 +350,42 @@ class BFDTestCase(VppTestCase): 1.10 * interval / us_in_sec, "time between BFD packets") + +class BFD6TestCase(VppTestCase, BFDCommonCode): + """Bidirectional Forwarding Detection (BFD) (IPv6) """ + + @classmethod + def setUpClass(cls): + super(BFD6TestCase, cls).setUpClass() + try: + cls.create_pg_interfaces([0]) + cls.pg0.config_ip6() + cls.pg0.configure_ipv6_neighbors() + cls.pg0.admin_up() + cls.pg0.resolve_ndp() + + except Exception: + super(BFD6TestCase, cls).tearDownClass() + raise + + def setUp(self): + super(BFD6TestCase, self).setUp() + self.vapi.want_bfd_events() + try: + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip6, + af=AF_INET6) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession(self, self.pg0, AF_INET6) + self.logger.debug(self.vapi.cli("show adj nbr")) + except: + self.vapi.want_bfd_events(enable_disable=0) + raise + + def tearDown(self): + BFDCommonCode.tearDown(self) + super(BFD6TestCase, self).tearDown() + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/util.py b/test/util.py index 79893602..24e9af44 100644 --- a/test/util.py +++ b/test/util.py @@ -76,19 +76,24 @@ class Host(object): @property def ip4(self): - """ IPv4 address """ + """ IPv4 address - string """ return self._ip4 @property def ip4n(self): - """ IPv4 address """ + """ IPv4 address of remote host - raw, suitable as API parameter.""" return socket.inet_pton(socket.AF_INET, self._ip4) @property def ip6(self): - """ IPv6 address """ + """ IPv6 address - string """ return self._ip6 + @property + def ip6n(self): + """ IPv6 address of remote host - raw, suitable as API parameter.""" + return socket.inet_pton(socket.AF_INET6, self._ip6) + def __init__(self, mac=None, ip4=None, ip6=None): self._mac = mac self._ip4 = ip4 diff --git a/test/vpp_interface.py b/test/vpp_interface.py index e0a29f94..ee4a9ef6 100644 --- a/test/vpp_interface.py +++ b/test/vpp_interface.py @@ -131,7 +131,7 @@ class VppInterface(object): 2, count + 2): # 0: network address, 1: local vpp address mac = "02:%02x:00:00:ff:%02x" % (self.sw_if_index, i) ip4 = "172.16.%u.%u" % (self.sw_if_index, i) - ip6 = "fd01:%04x::%04x" % (self.sw_if_index, i) + ip6 = "fd01:%x::%x" % (self.sw_if_index, i) host = Host(mac, ip4, ip6) self._remote_hosts.append(host) self._hosts_by_mac[mac] = host @@ -155,7 +155,7 @@ class VppInterface(object): self.has_ip4_config = False self.ip4_table_id = 0 - self._local_ip6 = "fd01:%04x::1" % self.sw_if_index + self._local_ip6 = "fd01:%x::1" % self.sw_if_index self._local_ip6n = socket.inet_pton(socket.AF_INET6, self.local_ip6) self.local_ip6_prefix_len = 64 self.has_ip6_config = False @@ -226,6 +226,13 @@ class VppInterface(object): self.has_ip6_config = False self.has_ip6_config = False + def configure_ipv6_neighbors(self): + """For every remote host assign neighbor's MAC to IPv6 address.""" + for host in self._remote_hosts: + macn = host.mac.replace(":", "").decode('hex') + self.test.vapi.ip_neighbor_add_del( + self.sw_if_index, macn, host.ip6n, is_ipv6=1) + def unconfig(self): """Unconfigure IPv6 and IPv4 address on the VPP interface.""" self.unconfig_ip4() -- cgit 1.2.3-korg From c5fccc0c65a441700dc92add4880127331a37f33 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Wed, 18 Jan 2017 09:56:00 +0100 Subject: BFD: reformat code to match vpp code style Change-Id: I40deb8b40f5d3a96d2c0dcb400f489cd05a64348 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_udp.c | 344 ++++++++++++++++++++++++++----------------------- 1 file changed, 186 insertions(+), 158 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index fe348404..1b3c20b8 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -31,8 +31,9 @@ static vlib_node_registration_t bfd_udp6_input_node; bfd_udp_main_t bfd_udp_main; -void bfd_add_udp4_transport (vlib_main_t *vm, vlib_buffer_t *b, - bfd_udp_session_t *bus) +void +bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, + bfd_udp_session_t * bus) { udp_header_t *udp; const bfd_udp_key_t *key = &bus->key; @@ -44,7 +45,7 @@ void bfd_add_udp4_transport (vlib_main_t *vm, vlib_buffer_t *b, const size_t headers_size = sizeof (*ip4) + sizeof (*udp); vlib_buffer_advance (b, -headers_size); ip4 = vlib_buffer_get_current (b); - udp = (udp_header_t *)(ip4 + 1); + udp = (udp_header_t *) (ip4 + 1); memset (ip4, 0, headers_size); ip4->ip_version_and_header_length = 0x45; ip4->ttl = 255; @@ -52,7 +53,7 @@ void bfd_add_udp4_transport (vlib_main_t *vm, vlib_buffer_t *b, ip4->src_address.as_u32 = key->local_addr.ip4.as_u32; ip4->dst_address.as_u32 = key->peer_addr.ip4.as_u32; - udp->src_port = clib_host_to_net_u16 (50000); /* FIXME */ + udp->src_port = clib_host_to_net_u16 (50000); /* FIXME */ udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); /* fix ip length, checksum and udp length */ @@ -65,8 +66,9 @@ void bfd_add_udp4_transport (vlib_main_t *vm, vlib_buffer_t *b, udp->length = clib_host_to_net_u16 (udp_length); } -void bfd_add_udp6_transport (vlib_main_t *vm, vlib_buffer_t *b, - bfd_udp_session_t *bus) +void +bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, + bfd_udp_session_t * bus) { udp_header_t *udp; const bfd_udp_key_t *key = &bus->key; @@ -78,22 +80,23 @@ void bfd_add_udp6_transport (vlib_main_t *vm, vlib_buffer_t *b, const size_t headers_size = sizeof (*ip6) + sizeof (*udp); vlib_buffer_advance (b, -headers_size); ip6 = vlib_buffer_get_current (b); - udp = (udp_header_t *)(ip6 + 1); + udp = (udp_header_t *) (ip6 + 1); memset (ip6, 0, headers_size); ip6->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (0x6 << 28); + clib_host_to_net_u32 (0x6 << 28); ip6->hop_limit = 255; ip6->protocol = IP_PROTOCOL_UDP; clib_memcpy (&ip6->src_address, &key->local_addr.ip6, - sizeof (ip6->src_address)); + sizeof (ip6->src_address)); clib_memcpy (&ip6->dst_address, &key->peer_addr.ip6, - sizeof (ip6->dst_address)); + sizeof (ip6->dst_address)); - udp->src_port = clib_host_to_net_u16 (50000); /* FIXME */ + udp->src_port = clib_host_to_net_u16 (50000); /* FIXME */ udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); /* fix ip payload length and udp length */ - const u16 udp_length = vlib_buffer_length_in_chain (vm, b) - (sizeof (*ip6)); + const u16 udp_length = + vlib_buffer_length_in_chain (vm, b) - (sizeof (*ip6)); udp->length = clib_host_to_net_u16 (udp_length); ip6->payload_length = udp->length; @@ -107,8 +110,8 @@ void bfd_add_udp6_transport (vlib_main_t *vm, vlib_buffer_t *b, } } -static bfd_session_t *bfd_lookup_session (bfd_udp_main_t *bum, - const bfd_udp_key_t *key) +static bfd_session_t * +bfd_lookup_session (bfd_udp_main_t * bum, const bfd_udp_key_t * key) { uword *p = mhash_get (&bum->bfd_session_idx_by_bfd_key, key); if (p) @@ -119,13 +122,15 @@ static bfd_session_t *bfd_lookup_session (bfd_udp_main_t *bum, } static vnet_api_error_t -bfd_udp_add_session_internal (bfd_udp_main_t *bum, u32 sw_if_index, - u32 desired_min_tx_us, u32 required_min_rx_us, - u8 detect_mult, const ip46_address_t *local_addr, - const ip46_address_t *peer_addr, u32 *bs_index) +bfd_udp_add_session_internal (bfd_udp_main_t * bum, u32 sw_if_index, + u32 desired_min_tx_us, u32 required_min_rx_us, + u8 detect_mult, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, + u32 * bs_index) { vnet_sw_interface_t *sw_if = - vnet_get_sw_interface (vnet_get_main (), sw_if_index); + vnet_get_sw_interface (vnet_get_main (), sw_if_index); /* get a pool entry and if we end up not needing it, give it back */ bfd_transport_t t = BFD_TRANSPORT_UDP4; if (!ip46_address_is_ip4 (local_addr)) @@ -151,29 +156,30 @@ bfd_udp_add_session_internal (bfd_udp_main_t *bum, u32 sw_if_index, key->sw_if_index = sw_if->sw_if_index; mhash_set (&bum->bfd_session_idx_by_bfd_key, key, bs->bs_idx, NULL); BFD_DBG ("session created, bs_idx=%u, sw_if_index=%d, local=%U, peer=%U", - bs->bs_idx, key->sw_if_index, format_ip46_address, &key->local_addr, - IP46_TYPE_ANY, format_ip46_address, &key->peer_addr, IP46_TYPE_ANY); + bs->bs_idx, key->sw_if_index, format_ip46_address, + &key->local_addr, IP46_TYPE_ANY, format_ip46_address, + &key->peer_addr, IP46_TYPE_ANY); if (BFD_TRANSPORT_UDP4 == t) { bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4, - &key->peer_addr, key->sw_if_index); + &key->peer_addr, + key->sw_if_index); BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, VNET_LINK_IP4, %U, %d) " - "returns %d", - format_ip46_address, &key->peer_addr, IP46_TYPE_ANY, - key->sw_if_index, bus->adj_index); + "returns %d", format_ip46_address, &key->peer_addr, + IP46_TYPE_ANY, key->sw_if_index, bus->adj_index); } else { bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6, - &key->peer_addr, key->sw_if_index); + &key->peer_addr, + key->sw_if_index); BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, VNET_LINK_IP6, %U, %d) " - "returns %d", - format_ip46_address, &key->peer_addr, IP46_TYPE_ANY, - key->sw_if_index, bus->adj_index); + "returns %d", format_ip46_address, &key->peer_addr, + IP46_TYPE_ANY, key->sw_if_index, bus->adj_index); } bs->config_desired_min_tx_us = desired_min_tx_us; bs->required_min_rx_us = required_min_rx_us; - bs->required_min_echo_rx_us = required_min_rx_us; /* FIXME */ + bs->required_min_echo_rx_us = required_min_rx_us; /* FIXME */ bs->local_detect_mult = detect_mult; bfd_session_start (bum->bfd_main, bs); *bs_index = bs->bs_idx; @@ -181,11 +187,12 @@ bfd_udp_add_session_internal (bfd_udp_main_t *bum, u32 sw_if_index, } static vnet_api_error_t -bfd_udp_validate_api_input (u32 sw_if_index, const ip46_address_t *local_addr, - const ip46_address_t *peer_addr) +bfd_udp_validate_api_input (u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr) { vnet_sw_interface_t *sw_if = - vnet_get_sw_interface (vnet_get_main (), sw_if_index); + vnet_get_sw_interface (vnet_get_main (), sw_if_index); u8 local_ip_valid = 0; ip_interface_address_t *ia = NULL; if (!sw_if) @@ -196,10 +203,10 @@ bfd_udp_validate_api_input (u32 sw_if_index, const ip46_address_t *local_addr, if (ip46_address_is_ip4 (local_addr)) { if (!ip46_address_is_ip4 (peer_addr)) - { - BFD_ERR ("IP family mismatch"); - return VNET_API_ERROR_INVALID_ARGUMENT; - } + { + BFD_ERR ("IP family mismatch"); + return VNET_API_ERROR_INVALID_ARGUMENT; + } ip4_main_t *im = &ip4_main; /* *INDENT-OFF* */ @@ -219,10 +226,10 @@ bfd_udp_validate_api_input (u32 sw_if_index, const ip46_address_t *local_addr, else { if (ip46_address_is_ip4 (peer_addr)) - { - BFD_ERR ("IP family mismatch"); - return VNET_API_ERROR_INVALID_ARGUMENT; - } + { + BFD_ERR ("IP family mismatch"); + return VNET_API_ERROR_INVALID_ARGUMENT; + } ip6_main_t *im = &ip6_main; /* *INDENT-OFF* */ foreach_ip_interface_address ( @@ -249,14 +256,14 @@ bfd_udp_validate_api_input (u32 sw_if_index, const ip46_address_t *local_addr, return 0; } -vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, u32 desired_min_tx_us, - u32 required_min_rx_us, u8 detect_mult, - const ip46_address_t *local_addr, - const ip46_address_t *peer_addr, - u32 *bs_index) +vnet_api_error_t +bfd_udp_add_session (u32 sw_if_index, u32 desired_min_tx_us, + u32 required_min_rx_us, u8 detect_mult, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, u32 * bs_index) { vnet_api_error_t rv = - bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); + bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); if (rv) { return rv; @@ -271,24 +278,26 @@ vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, u32 desired_min_tx_us, BFD_ERR ("desired_min_tx_us < 1"); return VNET_API_ERROR_INVALID_ARGUMENT; } - return bfd_udp_add_session_internal ( - &bfd_udp_main, sw_if_index, desired_min_tx_us, required_min_rx_us, - detect_mult, local_addr, peer_addr, bs_index); + return bfd_udp_add_session_internal (&bfd_udp_main, sw_if_index, + desired_min_tx_us, required_min_rx_us, + detect_mult, local_addr, peer_addr, + bs_index); } -vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, - const ip46_address_t *local_addr, - const ip46_address_t *peer_addr) +vnet_api_error_t +bfd_udp_del_session (u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr) { vnet_api_error_t rv = - bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); + bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); if (rv) { return rv; } bfd_udp_main_t *bum = &bfd_udp_main; vnet_sw_interface_t *sw_if = - vnet_get_sw_interface (vnet_get_main (), sw_if_index); + vnet_get_sw_interface (vnet_get_main (), sw_if_index); bfd_udp_key_t key; memset (&key, 0, sizeof (key)); key.sw_if_index = sw_if->sw_if_index; @@ -312,7 +321,8 @@ vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, return 0; } -typedef enum { +typedef enum +{ BFD_UDP_INPUT_NEXT_NORMAL, BFD_UDP_INPUT_NEXT_REPLY, BFD_UDP_INPUT_N_NEXT, @@ -334,15 +344,17 @@ static char *bfd_udp_error_strings[] = { #undef F }; -typedef enum { +typedef enum +{ #define F(sym, str) BFD_UDP_ERROR_##sym, foreach_bfd_udp_error (F) #undef F - BFD_UDP_N_ERROR, + BFD_UDP_N_ERROR, } bfd_udp_error_t; -static void bfd_udp4_find_headers (vlib_buffer_t *b, const ip4_header_t **ip4, - const udp_header_t **udp) +static void +bfd_udp4_find_headers (vlib_buffer_t * b, const ip4_header_t ** ip4, + const udp_header_t ** udp) { /* sanity check first */ const i32 start = vnet_buffer (b)->ip.start_of_ip_header; @@ -353,49 +365,49 @@ static void bfd_udp4_find_headers (vlib_buffer_t *b, const ip4_header_t **ip4, *udp = NULL; return; } - *ip4 = (ip4_header_t *)(b->data + start); - if ((u8 *)*ip4 > (u8 *)vlib_buffer_get_current (b)) + *ip4 = (ip4_header_t *) (b->data + start); + if ((u8 *) * ip4 > (u8 *) vlib_buffer_get_current (b)) { BFD_ERR ("Start of ip header is beyond current data, ignoring"); *ip4 = NULL; *udp = NULL; return; } - *udp = (udp_header_t *)((*ip4) + 1); + *udp = (udp_header_t *) ((*ip4) + 1); } -static bfd_udp_error_t bfd_udp4_verify_transport (const ip4_header_t *ip4, - const udp_header_t *udp, - const bfd_session_t *bs) +static bfd_udp_error_t +bfd_udp4_verify_transport (const ip4_header_t * ip4, + const udp_header_t * udp, const bfd_session_t * bs) { const bfd_udp_session_t *bus = &bs->udp; const bfd_udp_key_t *key = &bus->key; if (ip4->src_address.as_u32 != key->peer_addr.ip4.as_u32) { BFD_ERR ("IPv4 src addr mismatch, got %U, expected %U", - format_ip4_address, ip4->src_address.as_u8, format_ip4_address, - key->peer_addr.ip4.as_u8); + format_ip4_address, ip4->src_address.as_u8, format_ip4_address, + key->peer_addr.ip4.as_u8); return BFD_UDP_ERROR_BAD; } if (ip4->dst_address.as_u32 != key->local_addr.ip4.as_u32) { BFD_ERR ("IPv4 dst addr mismatch, got %U, expected %U", - format_ip4_address, ip4->dst_address.as_u8, format_ip4_address, - key->local_addr.ip4.as_u8); + format_ip4_address, ip4->dst_address.as_u8, format_ip4_address, + key->local_addr.ip4.as_u8); return BFD_UDP_ERROR_BAD; } const u8 expected_ttl = 255; if (ip4->ttl != expected_ttl) { BFD_ERR ("IPv4 unexpected TTL value %u, expected %u", ip4->ttl, - expected_ttl); + expected_ttl); return BFD_UDP_ERROR_BAD; } if (clib_net_to_host_u16 (udp->src_port) < 49152 || clib_net_to_host_u16 (udp->src_port) > 65535) { BFD_ERR ("Invalid UDP src port %u, out of range <49152,65535>", - udp->src_port); + udp->src_port); } return BFD_UDP_ERROR_NONE; } @@ -406,33 +418,35 @@ typedef struct bfd_pkt_t pkt; } bfd_rpc_update_t; -static void bfd_rpc_update_session_cb (const bfd_rpc_update_t *a) +static void +bfd_rpc_update_session_cb (const bfd_rpc_update_t * a) { bfd_consume_pkt (bfd_udp_main.bfd_main, &a->pkt, a->bs_idx); } -static void bfd_rpc_update_session (u32 bs_idx, const bfd_pkt_t *pkt) +static void +bfd_rpc_update_session (u32 bs_idx, const bfd_pkt_t * pkt) { /* packet length was already verified to be correct by the caller */ const u32 data_size = sizeof (bfd_rpc_update_t) - - STRUCT_SIZE_OF (bfd_rpc_update_t, pkt) + - pkt->head.length; + STRUCT_SIZE_OF (bfd_rpc_update_t, pkt) + pkt->head.length; u8 data[data_size]; - bfd_rpc_update_t *update = (bfd_rpc_update_t *)data; + bfd_rpc_update_t *update = (bfd_rpc_update_t *) data; update->bs_idx = bs_idx; clib_memcpy (&update->pkt, pkt, pkt->head.length); vl_api_rpc_call_main_thread (bfd_rpc_update_session_cb, data, data_size); } -static bfd_udp_error_t bfd_udp4_scan (vlib_main_t *vm, vlib_node_runtime_t *rt, - vlib_buffer_t *b, bfd_session_t **bs_out) +static bfd_udp_error_t +bfd_udp4_scan (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_buffer_t * b, bfd_session_t ** bs_out) { const bfd_pkt_t *pkt = vlib_buffer_get_current (b); if (sizeof (*pkt) > b->current_length) { - BFD_ERR ( - "Payload size %d too small to hold bfd packet of minimum size %d", - b->current_length, sizeof (*pkt)); + BFD_ERR + ("Payload size %d too small to hold bfd packet of minimum size %d", + b->current_length, sizeof (*pkt)); return BFD_UDP_ERROR_BAD; } const ip4_header_t *ip4; @@ -451,7 +465,7 @@ static bfd_udp_error_t bfd_udp4_scan (vlib_main_t *vm, vlib_node_runtime_t *rt, if (pkt->your_disc) { BFD_DBG ("Looking up BFD session using discriminator %u", - pkt->your_disc); + pkt->your_disc); bs = bfd_find_session_by_disc (bfd_udp_main.bfd_main, pkt->your_disc); } else @@ -462,9 +476,9 @@ static bfd_udp_error_t bfd_udp4_scan (vlib_main_t *vm, vlib_node_runtime_t *rt, key.local_addr.ip4.as_u32 = ip4->dst_address.as_u32; key.peer_addr.ip4.as_u32 = ip4->src_address.as_u32; BFD_DBG ("Looking up BFD session using key (sw_if_index=%u, local=%U, " - "peer=%U)", - key.sw_if_index, format_ip4_address, key.local_addr.ip4.as_u8, - format_ip4_address, key.peer_addr.ip4.as_u8); + "peer=%U)", + key.sw_if_index, format_ip4_address, key.local_addr.ip4.as_u8, + format_ip4_address, key.peer_addr.ip4.as_u8); bs = bfd_lookup_session (&bfd_udp_main, &key); } if (!bs) @@ -487,8 +501,9 @@ static bfd_udp_error_t bfd_udp4_scan (vlib_main_t *vm, vlib_node_runtime_t *rt, return BFD_UDP_ERROR_NONE; } -static void bfd_udp6_find_headers (vlib_buffer_t *b, const ip6_header_t **ip6, - const udp_header_t **udp) +static void +bfd_udp6_find_headers (vlib_buffer_t * b, const ip6_header_t ** ip6, + const udp_header_t ** udp) { /* sanity check first */ const i32 start = vnet_buffer (b)->ip.start_of_ip_header; @@ -499,62 +514,65 @@ static void bfd_udp6_find_headers (vlib_buffer_t *b, const ip6_header_t **ip6, *udp = NULL; return; } - *ip6 = (ip6_header_t *)(b->data + start); - if ((u8 *)*ip6 > (u8 *)vlib_buffer_get_current (b)) + *ip6 = (ip6_header_t *) (b->data + start); + if ((u8 *) * ip6 > (u8 *) vlib_buffer_get_current (b)) { BFD_ERR ("Start of ip header is beyond current data, ignoring"); *ip6 = NULL; *udp = NULL; return; } - *udp = (udp_header_t *)((*ip6) + 1); + *udp = (udp_header_t *) ((*ip6) + 1); } -static bfd_udp_error_t bfd_udp6_verify_transport (const ip6_header_t *ip6, - const udp_header_t *udp, - const bfd_session_t *bs) +static bfd_udp_error_t +bfd_udp6_verify_transport (const ip6_header_t * ip6, + const udp_header_t * udp, const bfd_session_t * bs) { const bfd_udp_session_t *bus = &bs->udp; const bfd_udp_key_t *key = &bus->key; if (ip6->src_address.as_u64[0] != key->peer_addr.ip6.as_u64[0] && ip6->src_address.as_u64[1] != key->peer_addr.ip6.as_u64[1]) { - BFD_ERR ("IP src addr mismatch, got %U, expected %U", format_ip6_address, - ip6, format_ip6_address, &key->peer_addr.ip6); + BFD_ERR ("IP src addr mismatch, got %U, expected %U", + format_ip6_address, ip6, format_ip6_address, + &key->peer_addr.ip6); return BFD_UDP_ERROR_BAD; } if (ip6->dst_address.as_u64[0] != key->local_addr.ip6.as_u64[0] && ip6->dst_address.as_u64[1] != key->local_addr.ip6.as_u64[1]) { - BFD_ERR ("IP dst addr mismatch, got %U, expected %U", format_ip6_address, - ip6, format_ip6_address, &key->local_addr.ip6); + BFD_ERR ("IP dst addr mismatch, got %U, expected %U", + format_ip6_address, ip6, format_ip6_address, + &key->local_addr.ip6); return BFD_UDP_ERROR_BAD; } const u8 expected_hop_limit = 255; if (ip6->hop_limit != expected_hop_limit) { BFD_ERR ("IPv6 unexpected hop-limit value %u, expected %u", - ip6->hop_limit, expected_hop_limit); + ip6->hop_limit, expected_hop_limit); return BFD_UDP_ERROR_BAD; } if (clib_net_to_host_u16 (udp->src_port) < 49152 || clib_net_to_host_u16 (udp->src_port) > 65535) { BFD_ERR ("Invalid UDP src port %u, out of range <49152,65535>", - udp->src_port); + udp->src_port); } return BFD_UDP_ERROR_NONE; } -static bfd_udp_error_t bfd_udp6_scan (vlib_main_t *vm, vlib_node_runtime_t *rt, - vlib_buffer_t *b, bfd_session_t **bs_out) +static bfd_udp_error_t +bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_buffer_t * b, bfd_session_t ** bs_out) { const bfd_pkt_t *pkt = vlib_buffer_get_current (b); if (sizeof (*pkt) > b->current_length) { - BFD_ERR ( - "Payload size %d too small to hold bfd packet of minimum size %d", - b->current_length, sizeof (*pkt)); + BFD_ERR + ("Payload size %d too small to hold bfd packet of minimum size %d", + b->current_length, sizeof (*pkt)); return BFD_UDP_ERROR_BAD; } const ip6_header_t *ip6; @@ -573,7 +591,7 @@ static bfd_udp_error_t bfd_udp6_scan (vlib_main_t *vm, vlib_node_runtime_t *rt, if (pkt->your_disc) { BFD_DBG ("Looking up BFD session using discriminator %u", - pkt->your_disc); + pkt->your_disc); bs = bfd_find_session_by_disc (bfd_udp_main.bfd_main, pkt->your_disc); } else @@ -586,9 +604,9 @@ static bfd_udp_error_t bfd_udp6_scan (vlib_main_t *vm, vlib_node_runtime_t *rt, key.peer_addr.ip6.as_u64[0] = ip6->src_address.as_u64[0]; key.peer_addr.ip6.as_u64[1] = ip6->src_address.as_u64[1]; BFD_DBG ("Looking up BFD session using key (sw_if_index=%u, local=%U, " - "peer=%U)", - key.sw_if_index, format_ip6_address, &key.local_addr, - format_ip6_address, &key.peer_addr); + "peer=%U)", + key.sw_if_index, format_ip6_address, &key.local_addr, + format_ip6_address, &key.peer_addr); bs = bfd_lookup_session (&bfd_udp_main, &key); } if (!bs) @@ -615,14 +633,15 @@ static bfd_udp_error_t bfd_udp6_scan (vlib_main_t *vm, vlib_node_runtime_t *rt, * Process a frame of bfd packets * Expect 1 packet / frame */ -static uword bfd_udp_input (vlib_main_t *vm, vlib_node_runtime_t *rt, - vlib_frame_t *f, int is_ipv6) +static uword +bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f, int is_ipv6) { u32 n_left_from, *from; bfd_input_trace_t *t0; - from = vlib_frame_vector_args (f); /* array of buffer indices */ - n_left_from = f->n_vectors; /* number of buffer indices */ + from = vlib_frame_vector_args (f); /* array of buffer indices */ + n_left_from = f->n_vectors; /* number of buffer indices */ while (n_left_from > 0) { @@ -637,49 +656,49 @@ static uword bfd_udp_input (vlib_main_t *vm, vlib_node_runtime_t *rt, /* If this pkt is traced, snapshot the data */ if (b0->flags & VLIB_BUFFER_IS_TRACED) - { - int len; - t0 = vlib_add_trace (vm, rt, b0, sizeof (*t0)); - len = (b0->current_length < sizeof (t0->data)) ? b0->current_length - : sizeof (t0->data); - t0->len = len; - clib_memcpy (t0->data, vlib_buffer_get_current (b0), len); - } + { + int len; + t0 = vlib_add_trace (vm, rt, b0, sizeof (*t0)); + len = (b0->current_length < sizeof (t0->data)) ? b0->current_length + : sizeof (t0->data); + t0->len = len; + clib_memcpy (t0->data, vlib_buffer_get_current (b0), len); + } /* scan this bfd pkt. error0 is the counter index to bmp */ if (is_ipv6) - { - error0 = bfd_udp6_scan (vm, rt, b0, &bs); - } + { + error0 = bfd_udp6_scan (vm, rt, b0, &bs); + } else - { - error0 = bfd_udp4_scan (vm, rt, b0, &bs); - } + { + error0 = bfd_udp4_scan (vm, rt, b0, &bs); + } b0->error = rt->errors[error0]; next0 = BFD_UDP_INPUT_NEXT_NORMAL; if (BFD_UDP_ERROR_NONE == error0) - { - /* if everything went fine, check for poll bit, if present, re-use - the buffer and based on (now updated) session parameters, send the - final packet back */ - const bfd_pkt_t *pkt = vlib_buffer_get_current (b0); - if (bfd_pkt_get_poll (pkt)) - { - bfd_send_final (vm, b0, bs); - if (is_ipv6) - { - vlib_node_increment_counter (vm, bfd_udp6_input_node.index, - b0->error, 1); - } - else - { - vlib_node_increment_counter (vm, bfd_udp4_input_node.index, - b0->error, 1); - } - next0 = BFD_UDP_INPUT_NEXT_REPLY; - } - } + { + /* if everything went fine, check for poll bit, if present, re-use + the buffer and based on (now updated) session parameters, send the + final packet back */ + const bfd_pkt_t *pkt = vlib_buffer_get_current (b0); + if (bfd_pkt_get_poll (pkt)) + { + bfd_send_final (vm, b0, bs); + if (is_ipv6) + { + vlib_node_increment_counter (vm, bfd_udp6_input_node.index, + b0->error, 1); + } + else + { + vlib_node_increment_counter (vm, bfd_udp4_input_node.index, + b0->error, 1); + } + next0 = BFD_UDP_INPUT_NEXT_REPLY; + } + } vlib_set_next_frame_buffer (vm, rt, next0, bi0); from += 1; @@ -689,8 +708,8 @@ static uword bfd_udp_input (vlib_main_t *vm, vlib_node_runtime_t *rt, return f->n_vectors; } -static uword bfd_udp4_input (vlib_main_t *vm, vlib_node_runtime_t *rt, - vlib_frame_t *f) +static uword +bfd_udp4_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { return bfd_udp_input (vm, rt, f, 0); } @@ -719,8 +738,8 @@ VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = { }; /* *INDENT-ON* */ -static uword bfd_udp6_input (vlib_main_t *vm, vlib_node_runtime_t *rt, - vlib_frame_t *f) +static uword +bfd_udp6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { return bfd_udp_input (vm, rt, f, 1); } @@ -746,8 +765,8 @@ VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = { }; /* *INDENT-ON* */ -static clib_error_t *bfd_sw_interface_up_down (vnet_main_t *vnm, - u32 sw_if_index, u32 flags) +static clib_error_t * +bfd_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) { // vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index); if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) @@ -759,8 +778,8 @@ static clib_error_t *bfd_sw_interface_up_down (vnet_main_t *vnm, VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bfd_sw_interface_up_down); -static clib_error_t *bfd_hw_interface_up_down (vnet_main_t *vnm, - u32 hw_if_index, u32 flags) +static clib_error_t * +bfd_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) { if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP) { @@ -774,10 +793,11 @@ VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bfd_hw_interface_up_down); /* * setup function */ -static clib_error_t *bfd_udp_init (vlib_main_t *vm) +static clib_error_t * +bfd_udp_init (vlib_main_t * vm) { mhash_init (&bfd_udp_main.bfd_session_idx_by_bfd_key, sizeof (uword), - sizeof (bfd_udp_key_t)); + sizeof (bfd_udp_key_t)); bfd_udp_main.bfd_main = &bfd_main; udp_register_dst_port (vm, UDP_DST_PORT_bfd4, bfd_udp4_input_node.index, 1); udp_register_dst_port (vm, UDP_DST_PORT_bfd6, bfd_udp6_input_node.index, 0); @@ -785,3 +805,11 @@ static clib_error_t *bfd_udp_init (vlib_main_t *vm) } VLIB_INIT_FUNCTION (bfd_udp_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 402ed3128512efc091a560729ce1e772a86e9f74 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Wed, 18 Jan 2017 09:44:36 +0100 Subject: BFD: improve finding of ipv4/ipv6 headers Avoid coverity warning and improve safety by declaring a helper structure and working with it when searching for ip headers. Make sure the content following IPv6 header is actually UDP before parsing it. Bail out if unexpcted IPv6 header found ... Change-Id: I1c6b9fd42d6fdae226f12c91c53c07a932b29522 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_udp.c | 110 ++++++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 48 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 1b3c20b8..dfd030ae 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -35,78 +35,82 @@ void bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, bfd_udp_session_t * bus) { - udp_header_t *udp; const bfd_udp_key_t *key = &bus->key; b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; - ip4_header_t *ip4; - const size_t headers_size = sizeof (*ip4) + sizeof (*udp); - vlib_buffer_advance (b, -headers_size); - ip4 = vlib_buffer_get_current (b); - udp = (udp_header_t *) (ip4 + 1); - memset (ip4, 0, headers_size); - ip4->ip_version_and_header_length = 0x45; - ip4->ttl = 255; - ip4->protocol = IP_PROTOCOL_UDP; - ip4->src_address.as_u32 = key->local_addr.ip4.as_u32; - ip4->dst_address.as_u32 = key->peer_addr.ip4.as_u32; - - udp->src_port = clib_host_to_net_u16 (50000); /* FIXME */ - udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); + typedef struct + { + ip4_header_t ip4; + udp_header_t udp; + } ip4_udp_headers; + ip4_udp_headers *headers = vlib_buffer_get_current (b); + vlib_buffer_advance (b, -sizeof (*headers)); + headers = vlib_buffer_get_current (b); + memset (headers, 0, sizeof (*headers)); + headers->ip4.ip_version_and_header_length = 0x45; + headers->ip4.ttl = 255; + headers->ip4.protocol = IP_PROTOCOL_UDP; + headers->ip4.src_address.as_u32 = key->local_addr.ip4.as_u32; + headers->ip4.dst_address.as_u32 = key->peer_addr.ip4.as_u32; + + headers->udp.src_port = clib_host_to_net_u16 (50000); /* FIXME */ + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); /* fix ip length, checksum and udp length */ const u16 ip_length = vlib_buffer_length_in_chain (vm, b); - ip4->length = clib_host_to_net_u16 (ip_length); - ip4->checksum = ip4_header_checksum (ip4); + headers->ip4.length = clib_host_to_net_u16 (ip_length); + headers->ip4.checksum = ip4_header_checksum (&headers->ip4); - const u16 udp_length = ip_length - (sizeof (*ip4)); - udp->length = clib_host_to_net_u16 (udp_length); + const u16 udp_length = ip_length - (sizeof (headers->ip4)); + headers->udp.length = clib_host_to_net_u16 (udp_length); } void bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, bfd_udp_session_t * bus) { - udp_header_t *udp; const bfd_udp_key_t *key = &bus->key; b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; - ip6_header_t *ip6; - const size_t headers_size = sizeof (*ip6) + sizeof (*udp); - vlib_buffer_advance (b, -headers_size); - ip6 = vlib_buffer_get_current (b); - udp = (udp_header_t *) (ip6 + 1); - memset (ip6, 0, headers_size); - ip6->ip_version_traffic_class_and_flow_label = + typedef struct + { + ip6_header_t ip6; + udp_header_t udp; + } ip6_udp_headers; + vlib_buffer_advance (b, -sizeof (ip6_udp_headers)); + ip6_udp_headers *headers = vlib_buffer_get_current (b); + memset (headers, 0, sizeof (*headers)); + headers->ip6.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28); - ip6->hop_limit = 255; - ip6->protocol = IP_PROTOCOL_UDP; - clib_memcpy (&ip6->src_address, &key->local_addr.ip6, - sizeof (ip6->src_address)); - clib_memcpy (&ip6->dst_address, &key->peer_addr.ip6, - sizeof (ip6->dst_address)); + headers->ip6.hop_limit = 255; + headers->ip6.protocol = IP_PROTOCOL_UDP; + clib_memcpy (&headers->ip6.src_address, &key->local_addr.ip6, + sizeof (headers->ip6.src_address)); + clib_memcpy (&headers->ip6.dst_address, &key->peer_addr.ip6, + sizeof (headers->ip6.dst_address)); - udp->src_port = clib_host_to_net_u16 (50000); /* FIXME */ - udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); + headers->udp.src_port = clib_host_to_net_u16 (50000); /* FIXME */ + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); /* fix ip payload length and udp length */ const u16 udp_length = - vlib_buffer_length_in_chain (vm, b) - (sizeof (*ip6)); - udp->length = clib_host_to_net_u16 (udp_length); - ip6->payload_length = udp->length; + vlib_buffer_length_in_chain (vm, b) - (sizeof (headers->ip6)); + headers->udp.length = clib_host_to_net_u16 (udp_length); + headers->ip6.payload_length = headers->udp.length; /* IPv6 UDP checksum is mandatory */ int bogus = 0; - udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus); + headers->udp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, b, &headers->ip6, &bogus); ASSERT (bogus == 0); - if (udp->checksum == 0) + if (headers->udp.checksum == 0) { - udp->checksum = 0xffff; + headers->udp.checksum = 0xffff; } } @@ -237,7 +241,7 @@ bfd_udp_validate_api_input (u32 sw_if_index, ip6_address_t *x = ip_interface_address_get_address (&im->lookup_main, ia); if (local_addr->ip6.as_u64[0] == x->as_u64[0] && - local_addr->ip6.as_u64[1] == x->as_u64[1]) + local_addr->ip6.as_u64[1] == x->as_u64[1]) { /* valid address for this interface */ local_ip_valid = 1; @@ -522,6 +526,15 @@ bfd_udp6_find_headers (vlib_buffer_t * b, const ip6_header_t ** ip6, *udp = NULL; return; } + /* FIXME skip extra headers when searching for UDP ? */ + if ((*ip6)->protocol != IP_PROTOCOL_UDP) + { + BFD_ERR ("Unexpected protocol in IPv6 header '%u', expected '%u' (== " + "IP_PROTOCOL_UDP)" (*ip6)->protocol, IP_PROTOCOL_UDP); + *ip6 = NULL; + *udp = NULL; + return; + } *udp = (udp_header_t *) ((*ip6) + 1); } @@ -604,9 +617,8 @@ bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt, key.peer_addr.ip6.as_u64[0] = ip6->src_address.as_u64[0]; key.peer_addr.ip6.as_u64[1] = ip6->src_address.as_u64[1]; BFD_DBG ("Looking up BFD session using key (sw_if_index=%u, local=%U, " - "peer=%U)", - key.sw_if_index, format_ip6_address, &key.local_addr, - format_ip6_address, &key.peer_addr); + "peer=%U)", key.sw_if_index, format_ip6_address, + &key.local_addr, format_ip6_address, &key.peer_addr); bs = bfd_lookup_session (&bfd_udp_main, &key); } if (!bs) @@ -679,9 +691,11 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, next0 = BFD_UDP_INPUT_NEXT_NORMAL; if (BFD_UDP_ERROR_NONE == error0) { - /* if everything went fine, check for poll bit, if present, re-use - the buffer and based on (now updated) session parameters, send the - final packet back */ + /* + * if everything went fine, check for poll bit, if present, re-use + * the buffer and based on (now updated) session parameters, send + * the final packet back + */ const bfd_pkt_t *pkt = vlib_buffer_get_current (b0); if (bfd_pkt_get_poll (pkt)) { -- cgit 1.2.3-korg From b17dd9607ee8ecba5ae3ef69c7b4915b57de292a Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Mon, 9 Jan 2017 07:43:48 +0100 Subject: BFD: SHA1 authentication Add authentication support to BFD feature. Out of three existing authentication types, implement SHA1 (sole RFC requirement). Simple password is insecure and MD5 is discouraged by the RFC, so ignore those. Add/change APIs to allow configuring BFD authentication keys and their usage with BFD sessions. Change-Id: Ifb0fb5b19c2e72196d84c1cde919bd4c074ea415 Signed-off-by: Klement Sekera --- src/vnet/api_errno.h | 6 +- src/vnet/bfd/bfd.api | 207 +++++++++++-- src/vnet/bfd/bfd_api.c | 177 ++++++++--- src/vnet/bfd/bfd_api.h | 31 +- src/vnet/bfd/bfd_main.c | 628 +++++++++++++++++++++++++++++++++---- src/vnet/bfd/bfd_main.h | 79 ++++- src/vnet/bfd/bfd_protocol.c | 127 ++++++-- src/vnet/bfd/bfd_protocol.h | 119 +++++-- src/vnet/bfd/bfd_udp.c | 343 +++++++++++++++++---- test/bfd.py | 265 ++++++++++++---- test/framework.py | 15 +- test/test_bfd.py | 733 +++++++++++++++++++++++++++++++++++++++++--- test/vpp_papi_provider.py | 77 ++++- 13 files changed, 2437 insertions(+), 370 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 7166da67..192bfaa4 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -91,8 +91,10 @@ _(INVALID_ADDRESS_FAMILY, -97, "Invalid address family") \ _(INVALID_SUB_SW_IF_INDEX, -98, "Invalid sub-interface sw_if_index") \ _(TABLE_TOO_BIG, -99, "Table too big") \ _(CANNOT_ENABLE_DISABLE_FEATURE, -100, "Cannot enable/disable feature") \ -_(BFD_EEXIST, -101, "Duplicate BFD session") \ -_(BFD_NOENT, -102, "No such BFD session") +_(BFD_EEXIST, -101, "Duplicate BFD object") \ +_(BFD_ENOENT, -102, "No such BFD object") \ +_(BFD_EINUSE, -103, "BFD object in use") \ +_(BFD_NOTSUPP, -104, "BFD feature not supported") typedef enum { diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api index 5798ee69..17ca35b6 100644 --- a/src/vnet/bfd/bfd.api +++ b/src/vnet/bfd/bfd.api @@ -21,7 +21,8 @@ @param min_rx - desired min rx interval @param detect_mult - desired detection multiplier */ -define bfd_set_config { +define bfd_set_config +{ u32 client_index; u32 context; u32 slow_timer; @@ -34,14 +35,16 @@ define bfd_set_config { @param context - sender context, to match reply w/ request @param retval - return code for the request */ -define bfd_set_config_reply { +define bfd_set_config_reply +{ u32 context; i32 retval; }; /** \brief Get BFD configuration */ -define bfd_get_config { +define bfd_get_config +{ u32 client_index; u32 context; }; @@ -54,7 +57,8 @@ define bfd_get_config { @param min_rx - desired min rx interval @param detect_mult - desired detection multiplier */ -define bfd_get_config_reply { +define bfd_get_config_reply +{ u32 client_index; u32 context; u32 slow_timer; @@ -69,12 +73,16 @@ define bfd_get_config_reply { @param sw_if_index - sw index of the interface @param desired_min_tx - desired min transmit interval (microseconds) @param required_min_rx - required min receive interval (microseconds) - @param detect_mult - detect multiplier (# of packets missed between connection goes down) @param local_addr - local address @param peer_addr - peer address @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 + @param detect_mult - detect multiplier (# of packets missed before connection goes down) + @param is_authenticated - non-zero if authentication is required + @param bfd_key_id - key id sent out in BFD packets (if is_authenticated) + @param conf_key_id - id of already configured key (if is_authenticated) */ -define bfd_udp_add { +define bfd_udp_add +{ u32 client_index; u32 context; u32 sw_if_index; @@ -84,17 +92,19 @@ define bfd_udp_add { u8 peer_addr[16]; u8 is_ipv6; u8 detect_mult; + u8 is_authenticated; + u8 bfd_key_id; + u32 conf_key_id; }; /** \brief Add UDP BFD session response @param context - sender context, to match reply w/ request @param retval - return code for the request - @param bs_index - index of the session created */ -define bfd_udp_add_reply { +define bfd_udp_add_reply +{ u32 context; i32 retval; - u32 bs_index; }; /** \brief Delete UDP BFD session on interface @@ -105,7 +115,8 @@ define bfd_udp_add_reply { @param peer_addr - peer address @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 */ -define bfd_udp_del { +define bfd_udp_del +{ u32 client_index; u32 context; u32 sw_if_index; @@ -118,7 +129,8 @@ define bfd_udp_del { @param context - sender context, to match reply w/ request @param retval - return code for the request */ -define bfd_udp_del_reply { +define bfd_udp_del_reply +{ u32 context; i32 retval; }; @@ -127,48 +139,61 @@ define bfd_udp_del_reply { @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define bfd_udp_session_dump { +define bfd_udp_session_dump +{ u32 client_index; u32 context; }; /** \brief BFD session details structure @param context - sender context, to match reply w/ request - @param bs_index - index of the session @param sw_if_index - sw index of the interface @param local_addr - local address @param peer_addr - peer address @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param state - session state + @param is_authenticated - non-zero if authentication in-use, zero otherwise + @param bfd_key_id - ID of key currently in-use if auth is on + @param conf_key_id - configured key ID for this session */ -define bfd_udp_session_details { +define bfd_udp_session_details +{ u32 context; - u32 bs_index; u32 sw_if_index; u8 local_addr[16]; u8 peer_addr[16]; u8 is_ipv6; u8 state; + u8 is_authenticated; + u8 bfd_key_id; + u32 conf_key_id; }; -/** \brief Set flags of BFD session +/** \brief Set flags of BFD UDP session @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param bs_index - index of the bfd session to set flags on + @param sw_if_index - sw index of the interface + @param local_addr - local address + @param peer_addr - peer address + @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param admin_up_down - set the admin state, 1 = up, 0 = down */ -define bfd_session_set_flags { +define bfd_udp_session_set_flags +{ u32 client_index; u32 context; - u32 bs_index; + u32 sw_if_index; + u8 local_addr[16]; + u8 peer_addr[16]; + u8 is_ipv6; u8 admin_up_down; }; -/** \brief Reply to bfd_session_set_flags +/** \brief Reply to bfd_udp_session_set_flags @param context - sender context which was passed in the request @param retval - return code of the set flags request */ -define bfd_session_set_flags_reply +define bfd_udp_session_set_flags_reply { u32 context; i32 retval; @@ -198,6 +223,146 @@ define want_bfd_events_reply i32 retval; }; +/** \brief BFD UDP - add/replace key to configuration + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param conf_key_id - key ID to add/replace/delete + @param key_len - length of key (must be non-zero) + @param auth_type - authentication type (RFC 5880/4.1/Auth Type) + @param key - key data +*/ +define bfd_auth_set_key +{ + u32 client_index; + u32 context; + u32 conf_key_id; + u8 key_len; + u8 auth_type; + u8 key[20]; +}; + +/** \brief BFD UDP - add/replace key reply + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define bfd_auth_set_key_reply +{ + u32 context; + i32 retval; +}; + +/** \brief BFD UDP - delete key from configuration + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param conf_key_id - key ID to add/replace/delete + @param key_len - length of key (must be non-zero) + @param key - key data +*/ +define bfd_auth_del_key +{ + u32 client_index; + u32 context; + u32 conf_key_id; +}; + +/** \brief BFD UDP - delete key reply + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define bfd_auth_del_key_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Get a list of configured authentication keys + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define bfd_auth_keys_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief BFD authentication key details + @param context - sender context, to match reply w/ request + @param conf_key_id - configured key ID + @param use_count - how many BFD sessions currently use this key + @param auth_type - authentication type (RFC 5880/4.1/Auth Type) +*/ +define bfd_auth_keys_details +{ + u32 context; + u32 conf_key_id; + u32 use_count; + u8 auth_type; +}; + +/** \brief BFD UDP - activate/change authentication + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - sw index of the interface + @param local_addr - local address + @param peer_addr - peer address + @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 + @param is_delayed - change is applied once peer applies the change (on first received packet with this auth) + @param bfd_key_id - key id sent out in BFD packets + @param conf_key_id - id of already configured key +*/ +define bfd_udp_auth_activate +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 local_addr[16]; + u8 peer_addr[16]; + u8 is_ipv6; + u8 is_delayed; + u8 bfd_key_id; + u32 conf_key_id; +}; + +/** \brief BFD UDP - activate/change authentication reply + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define bfd_udp_auth_activate_reply +{ + u32 context; + i32 retval; +}; + +/** \brief BFD UDP - deactivate authentication + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - sw index of the interface + @param local_addr - local address + @param peer_addr - peer address + @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 + @param is_delayed - change is applied once peer applies the change (on first received non-authenticated packet) +*/ +define bfd_udp_auth_deactivate +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 local_addr[16]; + u8 peer_addr[16]; + u8 is_ipv6; + u8 is_delayed; +}; + +/** \brief BFD UDP - deactivate authentication reply + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define bfd_udp_auth_deactivate_reply +{ + u32 context; + i32 retval; +}; + /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c index 2e63fe90..cfc3a38d 100644 --- a/src/vnet/bfd/bfd_api.c +++ b/src/vnet/bfd/bfd_api.c @@ -43,15 +43,39 @@ #include -#define foreach_vpe_api_msg \ - _ (BFD_UDP_ADD, bfd_udp_add) \ - _ (BFD_UDP_DEL, bfd_udp_del) \ - _ (BFD_UDP_SESSION_DUMP, bfd_udp_session_dump) \ - _ (BFD_SESSION_SET_FLAGS, bfd_session_set_flags) \ - _ (WANT_BFD_EVENTS, want_bfd_events) +#define foreach_vpe_api_msg \ + _ (BFD_UDP_ADD, bfd_udp_add) \ + _ (BFD_UDP_DEL, bfd_udp_del) \ + _ (BFD_UDP_SESSION_DUMP, bfd_udp_session_dump) \ + _ (BFD_UDP_SESSION_SET_FLAGS, bfd_udp_session_set_flags) \ + _ (WANT_BFD_EVENTS, want_bfd_events) \ + _ (BFD_AUTH_SET_KEY, bfd_auth_set_key) \ + _ (BFD_AUTH_DEL_KEY, bfd_auth_del_key) \ + _ (BFD_AUTH_KEYS_DUMP, bfd_auth_keys_dump) \ + _ (BFD_UDP_AUTH_ACTIVATE, bfd_udp_auth_activate) \ + _ (BFD_UDP_AUTH_DEACTIVATE, bfd_udp_auth_deactivate) pub_sub_handler (bfd_events, BFD_EVENTS); +#define BFD_UDP_API_PARAM_COMMON_CODE \ + ip46_address_t local_addr; \ + memset (&local_addr, 0, sizeof (local_addr)); \ + ip46_address_t peer_addr; \ + memset (&peer_addr, 0, sizeof (peer_addr)); \ + if (mp->is_ipv6) \ + { \ + clib_memcpy (&local_addr.ip6, mp->local_addr, sizeof (local_addr.ip6)); \ + clib_memcpy (&peer_addr.ip6, mp->peer_addr, sizeof (peer_addr.ip6)); \ + } \ + else \ + { \ + clib_memcpy (&local_addr.ip4, mp->local_addr, sizeof (local_addr.ip4)); \ + clib_memcpy (&peer_addr.ip4, mp->peer_addr, sizeof (peer_addr.ip4)); \ + } + +#define BFD_UDP_API_PARAM_FROM_MP(mp) \ + clib_net_to_host_u32 (mp->sw_if_index), &local_addr, &peer_addr + static void vl_api_bfd_udp_add_t_handler (vl_api_bfd_udp_add_t * mp) { @@ -60,31 +84,17 @@ vl_api_bfd_udp_add_t_handler (vl_api_bfd_udp_add_t * mp) VALIDATE_SW_IF_INDEX (mp); - ip46_address_t local_addr; - memset (&local_addr, 0, sizeof (local_addr)); - ip46_address_t peer_addr; - memset (&peer_addr, 0, sizeof (peer_addr)); - if (mp->is_ipv6) - { - clib_memcpy (&local_addr.ip6, mp->local_addr, sizeof (local_addr.ip6)); - clib_memcpy (&peer_addr.ip6, mp->peer_addr, sizeof (peer_addr.ip6)); - } - else - { - clib_memcpy (&local_addr.ip4, mp->local_addr, sizeof (local_addr.ip4)); - clib_memcpy (&peer_addr.ip4, mp->peer_addr, sizeof (peer_addr.ip4)); - } + BFD_UDP_API_PARAM_COMMON_CODE; - u32 bs_index = 0; - rv = bfd_udp_add_session (clib_net_to_host_u32 (mp->sw_if_index), + rv = bfd_udp_add_session (BFD_UDP_API_PARAM_FROM_MP (mp), clib_net_to_host_u32 (mp->desired_min_tx), clib_net_to_host_u32 (mp->required_min_rx), - mp->detect_mult, &local_addr, &peer_addr, - &bs_index); + mp->detect_mult, mp->is_authenticated, + clib_net_to_host_u32 (mp->conf_key_id), + mp->bfd_key_id); BAD_SW_IF_INDEX_LABEL; - REPLY_MACRO2 (VL_API_BFD_UDP_ADD_REPLY, - rmp->bs_index = clib_host_to_net_u32 (bs_index)); + REPLY_MACRO (VL_API_BFD_UDP_ADD_REPLY); } static void @@ -95,23 +105,9 @@ vl_api_bfd_udp_del_t_handler (vl_api_bfd_udp_del_t * mp) VALIDATE_SW_IF_INDEX (mp); - ip46_address_t local_addr; - memset (&local_addr, 0, sizeof (local_addr)); - ip46_address_t peer_addr; - memset (&peer_addr, 0, sizeof (peer_addr)); - if (mp->is_ipv6) - { - clib_memcpy (&local_addr.ip6, mp->local_addr, sizeof (local_addr.ip6)); - clib_memcpy (&peer_addr.ip6, mp->peer_addr, sizeof (peer_addr.ip6)); - } - else - { - clib_memcpy (&local_addr.ip4, mp->local_addr, sizeof (local_addr.ip4)); - clib_memcpy (&peer_addr.ip4, mp->peer_addr, sizeof (peer_addr.ip4)); - } + BFD_UDP_API_PARAM_COMMON_CODE; - rv = bfd_udp_del_session (clib_net_to_host_u32 (mp->sw_if_index), - &local_addr, &peer_addr); + rv = bfd_udp_del_session (BFD_UDP_API_PARAM_FROM_MP (mp)); BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_BFD_UDP_DEL_REPLY); @@ -131,7 +127,6 @@ send_bfd_udp_session_details (unix_shared_memory_queue_t * q, u32 context, memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_BFD_UDP_SESSION_DETAILS); mp->context = context; - mp->bs_index = clib_host_to_net_u32 (bs->bs_idx); mp->state = bs->local_state; bfd_udp_session_t *bus = &bs->udp; bfd_udp_key_t *key = &bus->key; @@ -198,15 +193,101 @@ vl_api_bfd_udp_session_dump_t_handler (vl_api_bfd_udp_session_dump_t * mp) } static void -vl_api_bfd_session_set_flags_t_handler (vl_api_bfd_session_set_flags_t * mp) +vl_api_bfd_udp_session_set_flags_t_handler (vl_api_bfd_udp_session_set_flags_t + * mp) { - vl_api_bfd_session_set_flags_reply_t *rmp; + vl_api_bfd_udp_session_set_flags_reply_t *rmp; int rv; - rv = bfd_session_set_flags (clib_net_to_host_u32 (mp->bs_index), - mp->admin_up_down); + BFD_UDP_API_PARAM_COMMON_CODE; + + rv = bfd_udp_session_set_flags (BFD_UDP_API_PARAM_FROM_MP (mp), + mp->admin_up_down); + + REPLY_MACRO (VL_API_BFD_UDP_SESSION_SET_FLAGS_REPLY); +} + +static void +vl_api_bfd_auth_set_key_t_handler (vl_api_bfd_auth_set_key_t * mp) +{ + vl_api_bfd_auth_set_key_reply_t *rmp; + int rv = bfd_auth_set_key (clib_net_to_host_u32 (mp->conf_key_id), + mp->auth_type, mp->key_len, mp->key); + + REPLY_MACRO (VL_API_BFD_AUTH_SET_KEY_REPLY); +} + +static void +vl_api_bfd_auth_del_key_t_handler (vl_api_bfd_auth_del_key_t * mp) +{ + vl_api_bfd_auth_del_key_reply_t *rmp; + int rv = bfd_auth_del_key (clib_net_to_host_u32 (mp->conf_key_id)); + + REPLY_MACRO (VL_API_BFD_AUTH_DEL_KEY_REPLY); +} + +static void +vl_api_bfd_auth_keys_dump_t_handler (vl_api_bfd_auth_keys_dump_t * mp) +{ + unix_shared_memory_queue_t *q; - REPLY_MACRO (VL_API_BFD_SESSION_SET_FLAGS_REPLY); + q = vl_api_client_index_to_input_queue (mp->client_index); + + if (q == 0) + return; + + bfd_auth_key_t *key = NULL; + vl_api_bfd_auth_keys_details_t *rmp = NULL; + + /* *INDENT-OFF* */ + pool_foreach (key, bfd_main.auth_keys, ({ + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_BFD_AUTH_KEYS_DETAILS); + rmp->context = mp->context; + rmp->conf_key_id = clib_host_to_net_u32 (key->conf_key_id); + rmp->auth_type = key->auth_type; + rmp->use_count = clib_host_to_net_u32 (key->use_count); + vl_msg_api_send_shmem (q, (u8 *)&rmp); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_bfd_udp_auth_activate_t_handler (vl_api_bfd_udp_auth_activate_t * mp) +{ + vl_api_bfd_udp_auth_activate_reply_t *rmp; + int rv; + + VALIDATE_SW_IF_INDEX (mp); + + BFD_UDP_API_PARAM_COMMON_CODE; + + rv = + bfd_udp_auth_activate (BFD_UDP_API_PARAM_FROM_MP (mp), + clib_net_to_host_u32 (mp->conf_key_id), + mp->bfd_key_id, mp->is_delayed); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_BFD_UDP_AUTH_ACTIVATE_REPLY); +} + +static void +vl_api_bfd_udp_auth_deactivate_t_handler (vl_api_bfd_udp_auth_deactivate_t * + mp) +{ + vl_api_bfd_udp_auth_deactivate_reply_t *rmp; + int rv; + + VALIDATE_SW_IF_INDEX (mp); + + BFD_UDP_API_PARAM_COMMON_CODE; + + rv = + bfd_udp_auth_deactivate (BFD_UDP_API_PARAM_FROM_MP (mp), mp->is_delayed); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_BFD_UDP_AUTH_DEACTIVATE_REPLY); } /* diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h index a9bc5a1f..128a3dc4 100644 --- a/src/vnet/bfd/bfd_api.h +++ b/src/vnet/bfd/bfd_api.h @@ -24,17 +24,36 @@ #include #include -vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, u32 desired_min_tx_us, - u32 required_min_rx_us, u8 detect_mult, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - u32 * bs_index); +vnet_api_error_t +bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, u32 desired_min_tx_us, + u32 required_min_rx_us, u8 detect_mult, + u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id); vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, const ip46_address_t * local_addr, const ip46_address_t * peer_addr); -vnet_api_error_t bfd_session_set_flags (u32 bs_index, u8 admin_up_down); +vnet_api_error_t bfd_udp_session_set_flags (u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, + u8 admin_up_down); + +vnet_api_error_t bfd_auth_set_key (u32 conf_key_id, u8 auth_type, u8 key_len, + const u8 * key); + +vnet_api_error_t bfd_auth_del_key (u32 conf_key_id); + +vnet_api_error_t bfd_udp_auth_activate (u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, + u32 conf_key_id, u8 bfd_key_id, + u8 is_delayed); + +vnet_api_error_t bfd_udp_auth_deactivate (u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, + u8 is_delayed); #endif /* __included_bfd_api_h__ */ diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 7e1a2ef2..8f2fae2b 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -25,6 +25,9 @@ #include #include #include +#if WITH_LIBSSL > 0 +#include +#endif static u64 bfd_us_to_clocks (bfd_main_t * bm, u64 us) @@ -41,6 +44,23 @@ static u32 bfd_node_index_by_transport[] = { #undef F }; +static u8 * +format_bfd_auth_key (u8 * s, va_list * args) +{ + const bfd_auth_key_t *key = va_arg (*args, bfd_auth_key_t *); + if (key) + { + s = format (s, "{auth-type=%u:%s, conf-key-id=%u, use-count=%u}, ", + key->auth_type, bfd_auth_type_str (key->auth_type), + key->conf_key_id, key->use_count); + } + else + { + s = format (s, "{none}"); + } + return s; +} + /* * We actually send all bfd pkts to the "error" node after scanning * them, so the graph node has only one next-index. The "error-drop" @@ -67,6 +87,9 @@ bfd_set_defaults (bfd_main_t * bm, bfd_session_t * bs) bs->desired_min_tx_clocks = bfd_us_to_clocks (bm, bs->desired_min_tx_us); bs->remote_min_rx_us = 1; bs->remote_demand = 0; + bs->auth.remote_seq_number = 0; + bs->auth.remote_seq_number_known = 0; + bs->auth.local_seq_number = random_u32 (&bm->random_seed); } static void @@ -288,7 +311,7 @@ bfd_del_session (uword bs_idx) else { BFD_ERR ("no such session"); - return VNET_API_ERROR_BFD_NOENT; + return VNET_API_ERROR_BFD_ENOENT; } return 0; } @@ -319,16 +342,10 @@ bfd_state_string (bfd_state_e state) #undef F } -vnet_api_error_t -bfd_session_set_flags (u32 bs_idx, u8 admin_up_down) +void +bfd_session_set_flags (bfd_session_t * bs, u8 admin_up_down) { bfd_main_t *bm = &bfd_main; - if (pool_is_free_index (bm->sessions, bs_idx)) - { - BFD_ERR ("invalid bs_idx=%u", bs_idx); - return VNET_API_ERROR_BFD_NOENT; - } - bfd_session_t *bs = pool_elt_at_index (bm->sessions, bs_idx); if (admin_up_down) { bfd_set_state (bm, bs, BFD_STATE_down, 0); @@ -338,7 +355,6 @@ bfd_session_set_flags (u32 bs_idx, u8 admin_up_down) bfd_set_diag (bs, BFD_DIAG_CODE_neighbor_sig_down); bfd_set_state (bm, bs, BFD_STATE_admin_down, 0); } - return 0; } u8 * @@ -351,8 +367,8 @@ bfd_input_format_trace (u8 * s, va_list * args) if (t->len > STRUCT_SIZE_OF (bfd_pkt_t, head)) { s = format (s, "BFD v%u, diag=%u(%s), state=%u(%s),\n" - " flags=(P:%u, F:%u, C:%u, A:%u, D:%u, M:%u), detect_mult=%u, " - "length=%u\n", + " flags=(P:%u, F:%u, C:%u, A:%u, D:%u, M:%u), " + "detect_mult=%u, length=%u\n", bfd_pkt_get_version (pkt), bfd_pkt_get_diag_code (pkt), bfd_diag_code_string (bfd_pkt_get_diag_code (pkt)), bfd_pkt_get_state (pkt), @@ -362,8 +378,8 @@ bfd_input_format_trace (u8 * s, va_list * args) bfd_pkt_get_auth_present (pkt), bfd_pkt_get_demand (pkt), bfd_pkt_get_multipoint (pkt), pkt->head.detect_mult, pkt->head.length); - if (t->len >= sizeof (bfd_pkt_t) - && pkt->head.length >= sizeof (bfd_pkt_t)) + if (t->len >= sizeof (bfd_pkt_t) && + pkt->head.length >= sizeof (bfd_pkt_t)) { s = format (s, " my discriminator: %u\n", pkt->my_disc); s = format (s, " your discriminator: %u\n", pkt->your_disc); @@ -430,8 +446,7 @@ bfd_add_transport_layer (vlib_main_t * vm, vlib_buffer_t * b, } static vlib_buffer_t * -bfd_create_frame (vlib_main_t * vm, vlib_node_runtime_t * rt, - bfd_session_t * bs) +bfd_create_frame_to_next_node (vlib_main_t * vm, bfd_session_t * bs) { u32 bi; if (vlib_buffer_alloc (vm, &bi, 1) != 1) @@ -454,13 +469,82 @@ bfd_create_frame (vlib_main_t * vm, vlib_node_runtime_t * rt, return b; } +#if WITH_LIBSSL > 0 +static void +bfd_add_sha1_auth_section (vlib_buffer_t * b, bfd_session_t * bs) +{ + bfd_pkt_with_sha1_auth_t *pkt = vlib_buffer_get_current (b); + bfd_auth_sha1_t *auth = &pkt->sha1_auth; + b->current_length += sizeof (*auth); + pkt->pkt.head.length += sizeof (*auth); + bfd_pkt_set_auth_present (&pkt->pkt); + memset (auth, 0, sizeof (*auth)); + auth->type_len.type = bs->auth.curr_key->auth_type; + /* + * only meticulous authentication types require incrementing seq number + * for every message, but doing so doesn't violate the RFC + */ + ++bs->auth.local_seq_number; + auth->type_len.len = sizeof (bfd_auth_sha1_t); + auth->key_id = bs->auth.curr_bfd_key_id; + auth->seq_num = clib_host_to_net_u32 (bs->auth.local_seq_number); + /* + * first copy the password into the packet, then calculate the hash + * and finally replace the password with the calculated hash + */ + clib_memcpy (auth->hash, bs->auth.curr_key->key, + sizeof (bs->auth.curr_key->key)); + unsigned char hash[sizeof (auth->hash)]; + SHA1 ((unsigned char *) pkt, sizeof (*pkt), hash); + BFD_DBG ("hashing: %U", format_hex_bytes, pkt, sizeof (*pkt)); + clib_memcpy (auth->hash, hash, sizeof (hash)); +#endif +} + +static void +bfd_add_auth_section (vlib_buffer_t * b, bfd_session_t * bs) +{ + if (bs->auth.curr_key) + { + const bfd_auth_type_e auth_type = bs->auth.curr_key->auth_type; + switch (auth_type) + { + case BFD_AUTH_TYPE_reserved: + /* fallthrough */ + case BFD_AUTH_TYPE_simple_password: + /* fallthrough */ + case BFD_AUTH_TYPE_keyed_md5: + /* fallthrough */ + case BFD_AUTH_TYPE_meticulous_keyed_md5: + clib_warning ("Internal error, unexpected BFD auth type '%d'", + auth_type); + break; +#if WITH_LIBSSL > 0 + case BFD_AUTH_TYPE_keyed_sha1: + /* fallthrough */ + case BFD_AUTH_TYPE_meticulous_keyed_sha1: + bfd_add_sha1_auth_section (b, bs); + break; +#else + case BFD_AUTH_TYPE_keyed_sha1: + /* fallthrough */ + case BFD_AUTH_TYPE_meticulous_keyed_sha1: + clib_warning ("Internal error, unexpected BFD auth type '%d'", + auth_type); + break; +#endif + } + } +} + static void bfd_init_control_frame (vlib_buffer_t * b, bfd_session_t * bs) { bfd_pkt_t *pkt = vlib_buffer_get_current (b); - const u32 bfd_length = 24; - memset (pkt, 0, sizeof (*pkt)); + u32 bfd_length = 0; + bfd_length = sizeof (bfd_pkt_t); + memset (pkt, 0, sizeof (*pkt)); bfd_pkt_set_version (pkt, 1); bfd_pkt_set_diag_code (pkt, bs->local_diag); bfd_pkt_set_state (pkt, bs->local_state); @@ -477,6 +561,7 @@ bfd_init_control_frame (vlib_buffer_t * b, bfd_session_t * bs) pkt->req_min_rx = clib_host_to_net_u32 (bs->required_min_rx_us); pkt->req_min_echo_rx = clib_host_to_net_u32 (bs->required_min_echo_rx_us); b->current_length = bfd_length; + bfd_add_auth_section (b, bs); } static void @@ -502,7 +587,7 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, if (now + bm->wheel_inaccuracy >= bs->tx_timeout_clocks) { BFD_DBG ("Send periodic control frame for bs_idx=%lu", bs->bs_idx); - vlib_buffer_t *b = bfd_create_frame (vm, rt, bs); + vlib_buffer_t *b = bfd_create_frame_to_next_node (vm, bs); if (!b) { return; @@ -522,7 +607,8 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, } void -bfd_send_final (vlib_main_t * vm, vlib_buffer_t * b, bfd_session_t * bs) +bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b, + bfd_session_t * bs) { BFD_DBG ("Send final control frame for bs_idx=%lu", bs->bs_idx); bfd_init_control_frame (b, bs); @@ -624,13 +710,17 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) * each event or timeout */ break; case BFD_EVENT_NEW_SESSION: - do + if (!pool_is_free_index (bm->sessions, *event_data)) { bfd_session_t *bs = pool_elt_at_index (bm->sessions, *event_data); bfd_send_periodic (vm, rt, bm, bs, now, 1); } - while (0); + else + { + BFD_DBG ("Ignoring event for non-existent session index %u", + (u32) * event_data); + } break; default: clib_warning ("BUG: event type 0x%wx", event_type); @@ -710,22 +800,25 @@ VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bfd_hw_interface_up_down); static clib_error_t * bfd_main_init (vlib_main_t * vm) { +#if BFD_DEBUG + setbuf (stdout, NULL); +#endif bfd_main_t *bm = &bfd_main; bm->random_seed = random_default_seed (); bm->vlib_main = vm; bm->vnet_main = vnet_get_main (); memset (&bm->wheel, 0, sizeof (bm->wheel)); - bm->cpu_cps = 2590000000; // vm->clib_time.clocks_per_second; + bm->cpu_cps = vm->clib_time.clocks_per_second; BFD_DBG ("cps is %.2f", bm->cpu_cps); const u64 now = clib_cpu_time_now (); timing_wheel_init (&bm->wheel, now, bm->cpu_cps); bm->wheel_inaccuracy = 2 << bm->wheel.log2_clocks_per_bin; vlib_node_t *node = NULL; -#define F(t, n) \ - node = vlib_get_node_by_name (vm, (u8 *)n); \ - bfd_node_index_by_transport[BFD_TRANSPORT_##t] = node->index;\ - BFD_DBG("node '%s' has index %u", n, node->index); +#define F(t, n) \ + node = vlib_get_node_by_name (vm, (u8 *)n); \ + bfd_node_index_by_transport[BFD_TRANSPORT_##t] = node->index; \ + BFD_DBG ("node '%s' has index %u", n, node->index); foreach_bfd_transport (F); #undef F return 0; @@ -750,6 +843,14 @@ bfd_get_session (bfd_main_t * bm, bfd_transport_t t) void bfd_put_session (bfd_main_t * bm, bfd_session_t * bs) { + if (bs->auth.curr_key) + { + --bs->auth.curr_key->use_count; + } + if (bs->auth.next_key) + { + --bs->auth.next_key->use_count; + } hash_unset (bm->session_by_disc, bs->local_discr); pool_put (bm->sessions, bs); } @@ -793,7 +894,7 @@ bfd_verify_pkt_common (const bfd_pkt_t * pkt) } if (pkt->head.length < sizeof (bfd_pkt_t) || (bfd_pkt_get_auth_present (pkt) && - pkt->head.length < sizeof (bfd_pkt_with_auth_t))) + pkt->head.length < sizeof (bfd_pkt_with_common_auth_t))) { BFD_ERR ("BFD verification failed - unexpected length: '%d' (auth " "present: %d)", @@ -831,6 +932,226 @@ bfd_verify_pkt_common (const bfd_pkt_t * pkt) return 1; } +static void +bfd_session_switch_auth_to_next (bfd_session_t * bs) +{ + BFD_DBG ("Switching authentication key from %U to %U for bs_idx=%u", + format_bfd_auth_key, bs->auth.curr_key, format_bfd_auth_key, + bs->auth.next_key, bs->bs_idx); + bs->auth.is_delayed = 0; + if (bs->auth.curr_key) + { + --bs->auth.curr_key->use_count; + } + bs->auth.curr_key = bs->auth.next_key; + bs->auth.next_key = NULL; + bs->auth.curr_bfd_key_id = bs->auth.next_bfd_key_id; +} + +static int +bfd_auth_type_is_meticulous (bfd_auth_type_e auth_type) +{ + if (BFD_AUTH_TYPE_meticulous_keyed_md5 == auth_type || + BFD_AUTH_TYPE_meticulous_keyed_sha1 == auth_type) + { + return 1; + } + return 0; +} + +static int +bfd_verify_pkt_auth_seq_num (bfd_session_t * bs, + u32 received_seq_num, int is_meticulous) +{ + /* + * RFC 5880 6.8.1: + * + * This variable MUST be set to zero after no packets have been + * received on this session for at least twice the Detection Time. + */ + u64 now = clib_cpu_time_now (); + if (now - bs->last_rx_clocks > bs->detection_time_clocks * 2) + { + BFD_DBG ("BFD peer unresponsive for %lu clocks, which is > 2 * " + "detection_time=%u clocks, resetting remote_seq_number_known " + "flag", + now - bs->last_rx_clocks, bs->detection_time_clocks * 2); + bs->auth.remote_seq_number_known = 0; + } + if (bs->auth.remote_seq_number_known) + { + /* remote sequence number is known, verify its validity */ + const u32 max_u32 = 0xffffffff; + /* the calculation might wrap, account for the special case... */ + if (bs->auth.remote_seq_number > max_u32 - 3 * bs->local_detect_mult) + { + /* + * special case + * + * x y z + * |----------+----------------------------+-----------| + * 0 ^ ^ 0xffffffff + * | remote_seq_num------+ + * | + * +-----(remote_seq_num + 3*detect_mult) % * 0xffffffff + * + * x + y + z = 0xffffffff + * x + z = 3 * detect_mult + */ + const u32 z = max_u32 - bs->auth.remote_seq_number; + const u32 x = 3 * bs->local_detect_mult - z; + if (received_seq_num > x && + received_seq_num < bs->auth.remote_seq_number + is_meticulous) + { + BFD_ERR + ("Recvd sequence number=%u out of ranges <0, %u>, <%u, %u>", + received_seq_num, x, + bs->auth.remote_seq_number + is_meticulous, max_u32); + return 0; + } + } + else + { + /* regular case */ + const u32 min = bs->auth.remote_seq_number + is_meticulous; + const u32 max = + bs->auth.remote_seq_number + 3 * bs->local_detect_mult; + if (received_seq_num < min || received_seq_num > max) + { + BFD_ERR ("Recvd sequence number=%u out of range <%u, %u>", + received_seq_num, min, max); + return 0; + } + } + } + return 1; +} + +static int +bfd_verify_pkt_auth_key_sha1 (const bfd_pkt_t * pkt, u32 pkt_size, + bfd_session_t * bs, u8 bfd_key_id, + bfd_auth_key_t * auth_key) +{ + ASSERT (auth_key->auth_type == BFD_AUTH_TYPE_keyed_sha1 || + auth_key->auth_type == BFD_AUTH_TYPE_meticulous_keyed_sha1); + + u8 result[SHA_DIGEST_LENGTH]; + bfd_pkt_with_common_auth_t *with_common = (void *) pkt; + if (pkt_size < sizeof (*with_common)) + { + BFD_ERR ("Packet size too small to hold authentication common header"); + return 0; + } + if (with_common->common_auth.type != auth_key->auth_type) + { + BFD_ERR ("BFD auth type mismatch, packet auth=%d:%s doesn't match " + "in-use auth=%d:%s", + with_common->common_auth.type, + bfd_auth_type_str (with_common->common_auth.type), + auth_key->auth_type, bfd_auth_type_str (auth_key->auth_type)); + return 0; + } + bfd_pkt_with_sha1_auth_t *with_sha1 = (void *) pkt; + if (pkt_size < sizeof (*with_sha1) || + with_sha1->sha1_auth.type_len.len < sizeof (with_sha1->sha1_auth)) + { + BFD_ERR + ("BFD size mismatch, payload size=%u, expected=%u, auth_len=%u, " + "expected=%u", pkt_size, sizeof (*with_sha1), + with_sha1->sha1_auth.type_len.len, sizeof (with_sha1->sha1_auth)); + return 0; + } + if (with_sha1->sha1_auth.key_id != bfd_key_id) + { + BFD_ERR + ("BFD key ID mismatch, packet key ID=%u doesn't match key ID=%u%s", + with_sha1->sha1_auth.key_id, bfd_key_id, + bs-> + auth.is_delayed ? " (but a delayed auth change is scheduled)" : ""); + return 0; + } + SHA_CTX ctx; + if (!SHA1_Init (&ctx)) + { + BFD_ERR ("SHA1_Init failed"); + return 0; + } + /* ignore last 20 bytes - use the actual key data instead pkt data */ + if (!SHA1_Update (&ctx, with_sha1, + sizeof (*with_sha1) - sizeof (with_sha1->sha1_auth.hash))) + { + BFD_ERR ("SHA1_Update failed"); + return 0; + } + if (!SHA1_Update (&ctx, auth_key->key, sizeof (auth_key->key))) + { + BFD_ERR ("SHA1_Update failed"); + return 0; + } + if (!SHA1_Final (result, &ctx)) + { + BFD_ERR ("SHA1_Final failed"); + return 0; + } + if (0 == memcmp (result, with_sha1->sha1_auth.hash, SHA_DIGEST_LENGTH)) + { + return 1; + } + BFD_ERR ("SHA1 hash: %U doesn't match the expected value: %U", + format_hex_bytes, with_sha1->sha1_auth.hash, SHA_DIGEST_LENGTH, + format_hex_bytes, result, SHA_DIGEST_LENGTH); + return 0; +} + +static int +bfd_verify_pkt_auth_key (const bfd_pkt_t * pkt, u32 pkt_size, + bfd_session_t * bs, u8 bfd_key_id, + bfd_auth_key_t * auth_key) +{ + switch (auth_key->auth_type) + { + case BFD_AUTH_TYPE_reserved: + clib_warning ("Internal error, unexpected auth_type=%d:%s", + auth_key->auth_type, + bfd_auth_type_str (auth_key->auth_type)); + return 0; + case BFD_AUTH_TYPE_simple_password: + clib_warning + ("Internal error, not implemented, unexpected auth_type=%d:%s", + auth_key->auth_type, bfd_auth_type_str (auth_key->auth_type)); + return 0; + case BFD_AUTH_TYPE_keyed_md5: + /* fallthrough */ + case BFD_AUTH_TYPE_meticulous_keyed_md5: + clib_warning + ("Internal error, not implemented, unexpected auth_type=%d:%s", + auth_key->auth_type, bfd_auth_type_str (auth_key->auth_type)); + return 0; + case BFD_AUTH_TYPE_keyed_sha1: + /* fallthrough */ + case BFD_AUTH_TYPE_meticulous_keyed_sha1: +#if WITH_LIBSSL > 0 + do + { + const u32 seq_num = clib_net_to_host_u32 (((bfd_pkt_with_sha1_auth_t + *) pkt)-> + sha1_auth.seq_num); + return bfd_verify_pkt_auth_seq_num (bs, seq_num, + bfd_auth_type_is_meticulous + (auth_key->auth_type)) + && bfd_verify_pkt_auth_key_sha1 (pkt, pkt_size, bs, bfd_key_id, + auth_key); + } + while (0); +#else + clib_warning + ("Internal error, attempt to use SHA1 without SSL support"); + return 0; +#endif + } + return 0; +} + /** * @brief verify bfd packet - authentication * @@ -839,30 +1160,81 @@ bfd_verify_pkt_common (const bfd_pkt_t * pkt) * @return 1 if bfd packet is valid */ int -bfd_verify_pkt_session (const bfd_pkt_t * pkt, u16 pkt_size, - const bfd_session_t * bs) +bfd_verify_pkt_auth (const bfd_pkt_t * pkt, u16 pkt_size, bfd_session_t * bs) { - const bfd_pkt_with_auth_t *with_auth = (bfd_pkt_with_auth_t *) pkt; - if (!bfd_pkt_get_auth_present (pkt)) + if (bfd_pkt_get_auth_present (pkt)) { - if (pkt_size > sizeof (*pkt)) + /* authentication present in packet */ + if (!bs->auth.curr_key) { - BFD_ERR ("BFD verification failed - unexpected packet size '%d' " - "(auth not present)", pkt_size); - return 0; + /* currently not using authentication - can we turn it on? */ + if (bs->auth.is_delayed && bs->auth.next_key) + { + /* yes, switch is scheduled - make sure the auth is valid */ + if (bfd_verify_pkt_auth_key (pkt, pkt_size, bs, + bs->auth.next_bfd_key_id, + bs->auth.next_key)) + { + /* auth matches next key, do the switch, packet is valid */ + bfd_session_switch_auth_to_next (bs); + return 1; + } + } + } + else + { + /* yes, using authentication, verify the key */ + if (bfd_verify_pkt_auth_key (pkt, pkt_size, bs, + bs->auth.curr_bfd_key_id, + bs->auth.curr_key)) + { + /* verification passed, packet is valid */ + return 1; + } + else + { + /* verification failed - but maybe we need to switch key */ + if (bs->auth.is_delayed && bs->auth.next_key) + { + /* delayed switch present, verify if that key works */ + if (bfd_verify_pkt_auth_key (pkt, pkt_size, bs, + bs->auth.next_bfd_key_id, + bs->auth.next_key)) + { + /* auth matches next key, switch key, packet is valid */ + bfd_session_switch_auth_to_next (bs); + return 1; + } + } + } } } else { - if (!with_auth->auth.type) + /* authentication in packet not present */ + if (pkt_size > sizeof (*pkt)) { - BFD_ERR ("BFD verification failed - unexpected auth type: '%d'", - with_auth->auth.type); + BFD_ERR ("BFD verification failed - unexpected packet size '%d' " + "(auth not present)", pkt_size); return 0; } - /* TODO FIXME - implement the actual verification */ + if (bs->auth.curr_key) + { + /* currently authenticating - could we turn it off? */ + if (bs->auth.is_delayed && !bs->auth.next_key) + { + /* yes, delayed switch to NULL key is scheduled */ + bfd_session_switch_auth_to_next (bs); + return 1; + } + } + else + { + /* no auth in packet, no auth in use - packet is valid */ + return 1; + } } - return 1; + return 0; } void @@ -879,6 +1251,38 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) bs->remote_demand = bfd_pkt_get_demand (pkt); u64 now = clib_cpu_time_now (); bs->last_rx_clocks = now; + if (bfd_pkt_get_auth_present (pkt)) + { + bfd_auth_type_e auth_type = + ((bfd_pkt_with_common_auth_t *) (pkt))->common_auth.type; + switch (auth_type) + { + case BFD_AUTH_TYPE_reserved: + /* fallthrough */ + case BFD_AUTH_TYPE_simple_password: + /* fallthrough */ + case BFD_AUTH_TYPE_keyed_md5: + /* fallthrough */ + case BFD_AUTH_TYPE_meticulous_keyed_md5: + clib_warning ("Internal error, unexpected auth_type=%d:%s", + auth_type, bfd_auth_type_str (auth_type)); + break; + case BFD_AUTH_TYPE_keyed_sha1: + /* fallthrough */ + case BFD_AUTH_TYPE_meticulous_keyed_sha1: + do + { + bfd_pkt_with_sha1_auth_t *with_sha1 = + (bfd_pkt_with_sha1_auth_t *) pkt; + bs->auth.remote_seq_number = + clib_net_to_host_u32 (with_sha1->sha1_auth.seq_num); + bs->auth.remote_seq_number_known = 1; + BFD_DBG ("Received sequence number %u", + bs->auth.remote_seq_number); + } + while (0); + } + } bs->remote_desired_min_tx_us = clib_net_to_host_u32 (pkt->des_min_tx); bs->remote_detect_mult = pkt->head.detect_mult; bfd_set_remote_required_min_rx (bm, bs, now, @@ -933,25 +1337,129 @@ u8 * format_bfd_session (u8 * s, va_list * args) { const bfd_session_t *bs = va_arg (*args, bfd_session_t *); - return format (s, "BFD(%u): bfd.SessionState=%s, " - "bfd.RemoteSessionState=%s, " - "bfd.LocalDiscr=%u, " - "bfd.RemoteDiscr=%u, " - "bfd.LocalDiag=%s, " - "bfd.DesiredMinTxInterval=%u, " - "bfd.RequiredMinRxInterval=%u, " - "bfd.RequiredMinEchoRxInterval=%u, " - "bfd.RemoteMinRxInterval=%u, " - "bfd.DemandMode=%s, " - "bfd.RemoteDemandMode=%s, " - "bfd.DetectMult=%u, ", - bs->bs_idx, bfd_state_string (bs->local_state), - bfd_state_string (bs->remote_state), bs->local_discr, - bs->remote_discr, bfd_diag_code_string (bs->local_diag), - bs->desired_min_tx_us, bs->required_min_rx_us, - bs->required_min_echo_rx_us, bs->remote_min_rx_us, - (bs->local_demand ? "yes" : "no"), - (bs->remote_demand ? "yes" : "no"), bs->local_detect_mult); + s = format (s, "BFD(%u): bfd.SessionState=%s, " + "bfd.RemoteSessionState=%s, " + "bfd.LocalDiscr=%u, " + "bfd.RemoteDiscr=%u, " + "bfd.LocalDiag=%s, " + "bfd.DesiredMinTxInterval=%u, " + "bfd.RequiredMinRxInterval=%u, " + "bfd.RequiredMinEchoRxInterval=%u, " + "bfd.RemoteMinRxInterval=%u, " + "bfd.DemandMode=%s, " + "bfd.RemoteDemandMode=%s, " + "bfd.DetectMult=%u, " + "Auth: {local-seq-num=%u, " + "remote-seq-num=%u, " + "is-delayed=%s, " + "curr-key=%U, " + "next-key=%U}", + bs->bs_idx, bfd_state_string (bs->local_state), + bfd_state_string (bs->remote_state), bs->local_discr, + bs->remote_discr, bfd_diag_code_string (bs->local_diag), + bs->desired_min_tx_us, bs->required_min_rx_us, + bs->required_min_echo_rx_us, bs->remote_min_rx_us, + (bs->local_demand ? "yes" : "no"), + (bs->remote_demand ? "yes" : "no"), bs->local_detect_mult, + bs->auth.local_seq_number, bs->auth.remote_seq_number, + (bs->auth.is_delayed ? "yes" : "no"), format_bfd_auth_key, + bs->auth.curr_key, format_bfd_auth_key, bs->auth.next_key); + return s; +} + +unsigned +bfd_auth_type_supported (bfd_auth_type_e auth_type) +{ + if (auth_type == BFD_AUTH_TYPE_keyed_sha1 || + auth_type == BFD_AUTH_TYPE_meticulous_keyed_sha1) + { + return 1; + } + return 0; +} + +vnet_api_error_t +bfd_auth_activate (bfd_session_t * bs, u32 conf_key_id, + u8 bfd_key_id, u8 is_delayed) +{ + bfd_main_t *bm = &bfd_main; + const uword *key_idx_p = + hash_get (bm->auth_key_by_conf_key_id, conf_key_id); + if (!key_idx_p) + { + clib_warning ("Authentication key with config ID %u doesn't exist)", + conf_key_id); + return VNET_API_ERROR_BFD_ENOENT; + } + const uword key_idx = *key_idx_p; + bfd_auth_key_t *key = pool_elt_at_index (bm->auth_keys, key_idx); + if (is_delayed) + { + if (bs->auth.next_key == key) + { + /* already using this key, no changes required */ + return 0; + } + bs->auth.next_key = key; + bs->auth.next_bfd_key_id = bfd_key_id; + bs->auth.is_delayed = 1; + } + else + { + if (bs->auth.curr_key == key) + { + /* already using this key, no changes required */ + return 0; + } + if (bs->auth.curr_key) + { + --bs->auth.curr_key->use_count; + } + bs->auth.curr_key = key; + bs->auth.curr_bfd_key_id = bfd_key_id; + bs->auth.is_delayed = 0; + } + ++key->use_count; + BFD_DBG ("Session auth modified: %U", format_bfd_session, bs); + return 0; +} + +vnet_api_error_t +bfd_auth_deactivate (bfd_session_t * bs, u8 is_delayed) +{ +#if WITH_LIBSSL > 0 + if (!is_delayed) + { + /* not delayed - deactivate the current key right now */ + if (bs->auth.curr_key) + { + --bs->auth.curr_key->use_count; + bs->auth.curr_key = NULL; + } + bs->auth.is_delayed = 0; + } + else + { + /* delayed - mark as so */ + bs->auth.is_delayed = 1; + } + /* + * clear the next key unconditionally - either the auth change is not delayed + * in which case the caller expects the session to not use authentication + * from this point forward, or it is delayed, in which case the next_key + * needs to be set to NULL to make it so in the future + */ + if (bs->auth.next_key) + { + --bs->auth.next_key->use_count; + bs->auth.next_key = NULL; + } + BFD_DBG ("Session auth modified: %U", format_bfd_session, bs); + return 0; +#else + clib_warning ("SSL missing, cannot deactivate BFD authentication"); + return VNET_API_ERROR_BFD_NOTSUPP; +#endif } bfd_main_t bfd_main; diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index 20da381a..b66b79e7 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -25,7 +25,7 @@ #include #define foreach_bfd_transport(F) \ - F (UDP4, "ip4-rewrite") \ + F (UDP4, "ip4-rewrite") \ F (UDP6, "ip6-rewrite") typedef enum @@ -46,6 +46,24 @@ typedef enum #undef F } bfd_mode_e; +typedef struct +{ + /* global configuration key ID */ + u32 conf_key_id; + + /* keeps track of how many sessions reference this key */ + u32 use_count; + + /* + * key data directly usable for bfd purposes - already padded with zeroes + * (so we don't need the actual length) + */ + u8 key[20]; + + /* authentication type for this key */ + bfd_auth_type_e auth_type; +} bfd_auth_key_t; + typedef struct { /* index in bfd_main.sessions pool */ @@ -120,6 +138,40 @@ typedef struct /* detection time */ u64 detection_time_clocks; + /* authentication information */ + struct + { + /* current key in use */ + bfd_auth_key_t *curr_key; + + /* + * set to next key to use if delayed switch is enabled - in that case + * the key is switched when first incoming packet is signed with next_key + */ + bfd_auth_key_t *next_key; + + /* sequence number incremented occasionally or always (if meticulous) */ + u32 local_seq_number; + + /* remote sequence number */ + u32 remote_seq_number; + + /* set to 1 if remote sequence number is known */ + u8 remote_seq_number_known; + + /* current key ID sent out in bfd packet */ + u8 curr_bfd_key_id; + + /* key ID to use when switched to next_key */ + u8 next_bfd_key_id; + + /* + * set to 1 if delayed action is pending, which might be activation + * of authentication, change of key or deactivation + */ + u8 is_delayed; + } auth; + /* transport type for this session */ bfd_transport_t transport; @@ -129,12 +181,6 @@ typedef struct }; } bfd_session_t; -typedef struct -{ - u32 client_index; - u32 client_pid; -} event_subscriber_t; - typedef struct { /* pool of bfd sessions context data */ @@ -162,6 +208,12 @@ typedef struct /* for generating random numbers */ u32 random_seed; + /* pool of authentication keys */ + bfd_auth_key_t *auth_keys; + + /* hashmap - index in pool auth_keys by conf_key_id */ + u32 *auth_key_by_conf_key_id; + } bfd_main_t; extern bfd_main_t bfd_main; @@ -202,12 +254,17 @@ bfd_session_t *bfd_find_session_by_disc (bfd_main_t * bm, u32 disc); void bfd_session_start (bfd_main_t * bm, bfd_session_t * bs); void bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * bfd, u32 bs_idx); int bfd_verify_pkt_common (const bfd_pkt_t * pkt); -int bfd_verify_pkt_session (const bfd_pkt_t * pkt, u16 pkt_size, - const bfd_session_t * bs); +int bfd_verify_pkt_auth (const bfd_pkt_t * pkt, u16 pkt_size, + bfd_session_t * bs); void bfd_event (bfd_main_t * bm, bfd_session_t * bs); -void bfd_send_final (vlib_main_t * vm, vlib_buffer_t * b, bfd_session_t * bs); +void bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b, + bfd_session_t * bs); u8 *format_bfd_session (u8 * s, va_list * args); - +void bfd_session_set_flags (bfd_session_t * bs, u8 admin_up_down); +unsigned bfd_auth_type_supported (bfd_auth_type_e auth_type); +vnet_api_error_t bfd_auth_activate (bfd_session_t * bs, u32 conf_key_id, + u8 bfd_key_id, u8 is_delayed); +vnet_api_error_t bfd_auth_deactivate (bfd_session_t * bs, u8 is_delayed); #define USEC_PER_MS 1000LL #define USEC_PER_SECOND (1000 * USEC_PER_MS) diff --git a/src/vnet/bfd/bfd_protocol.c b/src/vnet/bfd/bfd_protocol.c index ede9536f..180fc6df 100644 --- a/src/vnet/bfd/bfd_protocol.c +++ b/src/vnet/bfd/bfd_protocol.c @@ -1,74 +1,159 @@ +/* + * Copyright (c) 2011-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include -u8 bfd_pkt_get_version (const bfd_pkt_t *pkt) +u8 +bfd_pkt_get_version (const bfd_pkt_t * pkt) { return pkt->head.vers_diag >> 5; } -void bfd_pkt_set_version (bfd_pkt_t *pkt, int version) +void +bfd_pkt_set_version (bfd_pkt_t * pkt, int version) { pkt->head.vers_diag = - (version << 5) | (pkt->head.vers_diag & ((1 << 5) - 1)); + (version << 5) | (pkt->head.vers_diag & ((1 << 5) - 1)); } -u8 bfd_pkt_get_diag_code (const bfd_pkt_t *pkt) +u8 +bfd_pkt_get_diag_code (const bfd_pkt_t * pkt) { return pkt->head.vers_diag & ((1 << 5) - 1); } -void bfd_pkt_set_diag_code (bfd_pkt_t *pkt, int value) +void +bfd_pkt_set_diag_code (bfd_pkt_t * pkt, int value) { pkt->head.vers_diag = - (pkt->head.vers_diag & ~((1 << 5) - 1)) | (value & ((1 << 5) - 1)); + (pkt->head.vers_diag & ~((1 << 5) - 1)) | (value & ((1 << 5) - 1)); } -u8 bfd_pkt_get_state (const bfd_pkt_t *pkt) +u8 +bfd_pkt_get_state (const bfd_pkt_t * pkt) { return pkt->head.sta_flags >> 6; } -void bfd_pkt_set_state (bfd_pkt_t *pkt, int value) +void +bfd_pkt_set_state (bfd_pkt_t * pkt, int value) { pkt->head.sta_flags = (value << 6) | (pkt->head.sta_flags & ((1 << 6) - 1)); } -u8 bfd_pkt_get_poll (const bfd_pkt_t *pkt) +u8 +bfd_pkt_get_poll (const bfd_pkt_t * pkt) { return (pkt->head.sta_flags >> 5) & 1; } -void bfd_pkt_set_final (bfd_pkt_t *pkt) { pkt->head.sta_flags |= 1 << 5; } +void +bfd_pkt_set_poll (bfd_pkt_t * pkt) +{ + pkt->head.sta_flags |= 1 << 5; +} -u8 bfd_pkt_get_final (const bfd_pkt_t *pkt) +u8 +bfd_pkt_get_final (const bfd_pkt_t * pkt) { return (pkt->head.sta_flags >> 4) & 1; } -void bfd_pkt_set_poll (bfd_pkt_t *pkt); -u8 bfd_pkt_get_control_plane_independent (const bfd_pkt_t *pkt) +void +bfd_pkt_set_final (bfd_pkt_t * pkt) +{ + pkt->head.sta_flags |= 1 << 4; +} + +u8 +bfd_pkt_get_control_plane_independent (const bfd_pkt_t * pkt) { return (pkt->head.sta_flags >> 3) & 1; } -void bfd_pkt_set_control_plane_independent (bfd_pkt_t *pkt); +void +bfd_pkt_set_control_plane_independent (bfd_pkt_t * pkt) +{ + pkt->head.sta_flags |= 1 << 3; +} -u8 bfd_pkt_get_auth_present (const bfd_pkt_t *pkt) +u8 +bfd_pkt_get_auth_present (const bfd_pkt_t * pkt) { return (pkt->head.sta_flags >> 2) & 1; } -void bfd_pkt_set_auth_present (bfd_pkt_t *pkt); +void +bfd_pkt_set_auth_present (bfd_pkt_t * pkt) +{ + pkt->head.sta_flags |= 1 << 2; +} -u8 bfd_pkt_get_demand (const bfd_pkt_t *pkt) +u8 +bfd_pkt_get_demand (const bfd_pkt_t * pkt) { return (pkt->head.sta_flags >> 1) & 1; } -void bfd_pkt_set_demand (bfd_pkt_t *pkt) { pkt->head.sta_flags |= 1 << 1; } +void +bfd_pkt_set_demand (bfd_pkt_t * pkt) +{ + pkt->head.sta_flags |= 1 << 1; +} + +u8 +bfd_pkt_get_multipoint (const bfd_pkt_t * pkt) +{ + return (pkt->head.sta_flags >> 0) & 1; +} + +void +bfd_pkt_set_multipoint (bfd_pkt_t * pkt) +{ + pkt->head.sta_flags |= 1 << 0; +} + +u32 +bfd_max_len_for_auth_type (bfd_auth_type_e auth_type) +{ +#define F(t, l, n, s) \ + if (auth_type == t) \ + { \ + return l; \ + } + foreach_bfd_auth_type (F); +#undef F + return 0; +} -u8 bfd_pkt_get_multipoint (const bfd_pkt_t *pkt) +const char * +bfd_auth_type_str (bfd_auth_type_e auth_type) { - return pkt->head.sta_flags & 1; +#define F(t, l, n, s) \ + if (auth_type == t) \ + { \ + return s; \ + } + foreach_bfd_auth_type (F); +#undef F + return "UNKNOWN"; } -void bfd_pkt_set_multipoint (bfd_pkt_t *pkt); +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bfd/bfd_protocol.h b/src/vnet/bfd/bfd_protocol.h index cf751b3b..cdbb8fa7 100644 --- a/src/vnet/bfd/bfd_protocol.h +++ b/src/vnet/bfd/bfd_protocol.h @@ -22,45 +22,93 @@ #include #include +/* auth type value, max key length, name, description */ +#define foreach_bfd_auth_type(F) \ + F (0, 0, reserved, "Reserved") \ + F (1, 16, simple_password, "Simple Password") \ + F (2, 16, keyed_md5, "Keyed MD5") \ + F (3, 16, meticulous_keyed_md5, "Meticulous Keyed MD5") \ + F (4, 20, keyed_sha1, "Keyed SHA1") \ + F (5, 20, meticulous_keyed_sha1, "Meticulous Keyed SHA1") + +#define BFD_AUTH_TYPE_NAME(t) BFD_AUTH_TYPE_##t + +typedef enum +{ +#define F(n, l, t, s) BFD_AUTH_TYPE_NAME (t) = n, + foreach_bfd_auth_type (F) +#undef F +} bfd_auth_type_e; + +u32 bfd_max_len_for_auth_type (bfd_auth_type_e auth_type); +const char *bfd_auth_type_str (bfd_auth_type_e auth_type); + /* *INDENT-OFF* */ typedef CLIB_PACKED (struct { - /* - An optional Authentication Section MAY be present: - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Auth Type | Auth Len | Authentication Data... | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - */ u8 type; u8 len; - u8 data[0]; -}) bfd_auth_t; +}) bfd_auth_common_t; /* *INDENT-ON* */ /* *INDENT-OFF* */ typedef CLIB_PACKED (struct { /* - The Mandatory Section of a BFD Control packet has the following - format: - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - |Vers | Diag |Sta|P|F|C|A|D|M| Detect Mult | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | My Discriminator | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Your Discriminator | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Desired Min TX Interval | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Required Min RX Interval | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Required Min Echo RX Interval | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - */ + * 4.4. Keyed SHA1 and Meticulous Keyed SHA1 Authentication Section Format + + * If the Authentication Present (A) bit is set in the header, and the + * Authentication Type field contains 4 (Keyed SHA1) or 5 (Meticulous + * Keyed SHA1), the Authentication Section has the following format: + + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Auth Type | Auth Len | Auth Key ID | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Sequence Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Auth Key/Hash... | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ... | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + bfd_auth_common_t type_len; + u8 key_id; + u8 reserved; + u32 seq_num; + /* + * Auth Key/Hash + + * This field carries the 20-byte SHA1 hash for the packet. When the + * hash is calculated, the shared SHA1 key is stored in this field, + * padded to a length of 20 bytes with trailing zero bytes if needed. + * The shared key MUST be encoded and configured to section 6.7.4. + */ + u8 hash[20]; +}) bfd_auth_sha1_t; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + /* + * The Mandatory Section of a BFD Control packet has the following + * format: + + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Vers | Diag |Sta|P|F|C|A|D|M| Detect Mult | Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | My Discriminator | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Your Discriminator | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Desired Min TX Interval | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Required Min RX Interval | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Required Min Echo RX Interval | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ struct { u8 vers_diag; @@ -79,8 +127,15 @@ typedef CLIB_PACKED (struct { /* *INDENT-OFF* */ typedef CLIB_PACKED (struct { bfd_pkt_t pkt; - bfd_auth_t auth; -}) bfd_pkt_with_auth_t; + bfd_auth_common_t common_auth; +}) bfd_pkt_with_common_auth_t; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + bfd_pkt_t pkt; + bfd_auth_sha1_t sha1_auth; +}) bfd_pkt_with_sha1_auth_t; /* *INDENT-ON* */ u8 bfd_pkt_get_version (const bfd_pkt_t * pkt); diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index dfd030ae..443f4253 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -45,7 +45,7 @@ bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, ip4_header_t ip4; udp_header_t udp; } ip4_udp_headers; - ip4_udp_headers *headers = vlib_buffer_get_current (b); + ip4_udp_headers *headers = NULL; vlib_buffer_advance (b, -sizeof (*headers)); headers = vlib_buffer_get_current (b); memset (headers, 0, sizeof (*headers)); @@ -82,8 +82,9 @@ bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t ip6; udp_header_t udp; } ip6_udp_headers; - vlib_buffer_advance (b, -sizeof (ip6_udp_headers)); - ip6_udp_headers *headers = vlib_buffer_get_current (b); + ip6_udp_headers *headers = NULL; + vlib_buffer_advance (b, -sizeof (*headers)); + headers = vlib_buffer_get_current (b); memset (headers, 0, sizeof (*headers)); headers->ip6.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28); @@ -125,16 +126,27 @@ bfd_lookup_session (bfd_udp_main_t * bum, const bfd_udp_key_t * key) return 0; } +static void +bfd_udp_key_init (bfd_udp_key_t * key, u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr) +{ + memset (key, 0, sizeof (*key)); + key->sw_if_index = sw_if_index; + key->local_addr.as_u64[0] = local_addr->as_u64[0]; + key->local_addr.as_u64[1] = local_addr->as_u64[1]; + key->peer_addr.as_u64[0] = peer_addr->as_u64[0]; + key->peer_addr.as_u64[1] = peer_addr->as_u64[1]; +} + static vnet_api_error_t bfd_udp_add_session_internal (bfd_udp_main_t * bum, u32 sw_if_index, u32 desired_min_tx_us, u32 required_min_rx_us, u8 detect_mult, const ip46_address_t * local_addr, const ip46_address_t * peer_addr, - u32 * bs_index) + bfd_session_t ** bs_out) { - vnet_sw_interface_t *sw_if = - vnet_get_sw_interface (vnet_get_main (), sw_if_index); /* get a pool entry and if we end up not needing it, give it back */ bfd_transport_t t = BFD_TRANSPORT_UDP4; if (!ip46_address_is_ip4 (local_addr)) @@ -145,19 +157,15 @@ bfd_udp_add_session_internal (bfd_udp_main_t * bum, u32 sw_if_index, bfd_udp_session_t *bus = &bs->udp; memset (bus, 0, sizeof (*bus)); bfd_udp_key_t *key = &bus->key; - key->sw_if_index = sw_if->sw_if_index; - key->local_addr.as_u64[0] = local_addr->as_u64[0]; - key->local_addr.as_u64[1] = local_addr->as_u64[1]; - key->peer_addr.as_u64[0] = peer_addr->as_u64[0]; - key->peer_addr.as_u64[1] = peer_addr->as_u64[1]; + bfd_udp_key_init (key, sw_if_index, local_addr, peer_addr); const bfd_session_t *tmp = bfd_lookup_session (bum, key); if (tmp) { - BFD_ERR ("duplicate bfd-udp session, existing bs_idx=%d", tmp->bs_idx); + clib_warning ("duplicate bfd-udp session, existing bs_idx=%d", + tmp->bs_idx); bfd_put_session (bum->bfd_main, bs); return VNET_API_ERROR_BFD_EEXIST; } - key->sw_if_index = sw_if->sw_if_index; mhash_set (&bum->bfd_session_idx_by_bfd_key, key, bs->bs_idx, NULL); BFD_DBG ("session created, bs_idx=%u, sw_if_index=%d, local=%U, peer=%U", bs->bs_idx, key->sw_if_index, format_ip46_address, @@ -185,8 +193,7 @@ bfd_udp_add_session_internal (bfd_udp_main_t * bum, u32 sw_if_index, bs->required_min_rx_us = required_min_rx_us; bs->required_min_echo_rx_us = required_min_rx_us; /* FIXME */ bs->local_detect_mult = detect_mult; - bfd_session_start (bum->bfd_main, bs); - *bs_index = bs->bs_idx; + *bs_out = bs; return 0; } @@ -201,14 +208,14 @@ bfd_udp_validate_api_input (u32 sw_if_index, ip_interface_address_t *ia = NULL; if (!sw_if) { - BFD_ERR ("got NULL sw_if"); + clib_warning ("got NULL sw_if"); return VNET_API_ERROR_INVALID_SW_IF_INDEX; } if (ip46_address_is_ip4 (local_addr)) { if (!ip46_address_is_ip4 (peer_addr)) { - BFD_ERR ("IP family mismatch"); + clib_warning ("IP family mismatch"); return VNET_API_ERROR_INVALID_ARGUMENT; } ip4_main_t *im = &ip4_main; @@ -231,7 +238,7 @@ bfd_udp_validate_api_input (u32 sw_if_index, { if (ip46_address_is_ip4 (peer_addr)) { - BFD_ERR ("IP family mismatch"); + clib_warning ("IP family mismatch"); return VNET_API_ERROR_INVALID_ARGUMENT; } ip6_main_t *im = &ip6_main; @@ -241,7 +248,7 @@ bfd_udp_validate_api_input (u32 sw_if_index, ip6_address_t *x = ip_interface_address_get_address (&im->lookup_main, ia); if (local_addr->ip6.as_u64[0] == x->as_u64[0] && - local_addr->ip6.as_u64[1] == x->as_u64[1]) + local_addr->ip6.as_u64[1] == x->as_u64[1]) { /* valid address for this interface */ local_ip_valid = 1; @@ -253,18 +260,48 @@ bfd_udp_validate_api_input (u32 sw_if_index, if (!local_ip_valid) { - BFD_ERR ("address not found on interface"); + clib_warning ("address not found on interface"); return VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE; } return 0; } -vnet_api_error_t -bfd_udp_add_session (u32 sw_if_index, u32 desired_min_tx_us, - u32 required_min_rx_us, u8 detect_mult, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, u32 * bs_index) +static vnet_api_error_t +bfd_udp_find_session_by_api_input (u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, + bfd_session_t ** bs_out) +{ + vnet_api_error_t rv = + bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); + if (!rv) + { + bfd_udp_main_t *bum = &bfd_udp_main; + bfd_udp_key_t key; + bfd_udp_key_init (&key, sw_if_index, local_addr, peer_addr); + bfd_session_t *bs = bfd_lookup_session (bum, &key); + if (bs) + { + *bs_out = bs; + } + else + { + clib_warning + ("BFD session not found (sw_if_index=%u, local=%U, peer=%U", + sw_if_index, format_ip46_address, local_addr, IP46_TYPE_ANY, + format_ip46_address, peer_addr, IP46_TYPE_ANY); + return VNET_API_ERROR_BFD_ENOENT; + } + } + return rv; +} + +static vnet_api_error_t +bfd_api_verify_common (u32 sw_if_index, u32 desired_min_tx_us, + u32 required_min_rx_us, u8 detect_mult, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr) { vnet_api_error_t rv = bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); @@ -274,18 +311,66 @@ bfd_udp_add_session (u32 sw_if_index, u32 desired_min_tx_us, } if (detect_mult < 1) { - BFD_ERR ("detect_mult < 1"); + clib_warning ("detect_mult < 1"); return VNET_API_ERROR_INVALID_ARGUMENT; } if (desired_min_tx_us < 1) { - BFD_ERR ("desired_min_tx_us < 1"); + clib_warning ("desired_min_tx_us < 1"); return VNET_API_ERROR_INVALID_ARGUMENT; } - return bfd_udp_add_session_internal (&bfd_udp_main, sw_if_index, - desired_min_tx_us, required_min_rx_us, - detect_mult, local_addr, peer_addr, - bs_index); + return 0; +} + +static void +bfd_udp_del_session_internal (bfd_session_t * bs) +{ + bfd_udp_main_t *bum = &bfd_udp_main; + BFD_DBG ("free bfd-udp session, bs_idx=%d", bs->bs_idx); + mhash_unset (&bum->bfd_session_idx_by_bfd_key, &bs->udp.key, NULL); + adj_unlock (bs->udp.adj_index); + bfd_put_session (bum->bfd_main, bs); +} + +vnet_api_error_t +bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, u32 desired_min_tx_us, + u32 required_min_rx_us, u8 detect_mult, + u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id) +{ + vnet_api_error_t rv = bfd_api_verify_common (sw_if_index, desired_min_tx_us, + required_min_rx_us, + detect_mult, + local_addr, peer_addr); + bfd_session_t *bs = NULL; + if (!rv) + { + rv = + bfd_udp_add_session_internal (&bfd_udp_main, sw_if_index, + desired_min_tx_us, required_min_rx_us, + detect_mult, local_addr, peer_addr, + &bs); + } + if (!rv && is_authenticated) + { +#if WITH_LIBSSL > 0 + rv = bfd_auth_activate (bs, conf_key_id, bfd_key_id, + 0 /* is not delayed */ ); +#else + clib_warning ("SSL missing, cannot add authenticated BFD session"); + rv = VNET_API_ERROR_BFD_NOTSUPP; +#endif + if (rv) + { + bfd_udp_del_session_internal (bs); + } + } + if (!rv) + { + bfd_session_start (bfd_udp_main.bfd_main, bs); + } + + return rv; } vnet_api_error_t @@ -293,36 +378,162 @@ bfd_udp_del_session (u32 sw_if_index, const ip46_address_t * local_addr, const ip46_address_t * peer_addr) { + bfd_session_t *bs = NULL; vnet_api_error_t rv = - bfd_udp_validate_api_input (sw_if_index, local_addr, peer_addr); + bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, + &bs); if (rv) { return rv; } - bfd_udp_main_t *bum = &bfd_udp_main; - vnet_sw_interface_t *sw_if = - vnet_get_sw_interface (vnet_get_main (), sw_if_index); - bfd_udp_key_t key; - memset (&key, 0, sizeof (key)); - key.sw_if_index = sw_if->sw_if_index; - key.local_addr.as_u64[0] = local_addr->as_u64[0]; - key.local_addr.as_u64[1] = local_addr->as_u64[1]; - key.peer_addr.as_u64[0] = peer_addr->as_u64[0]; - key.peer_addr.as_u64[1] = peer_addr->as_u64[1]; - bfd_session_t *tmp = bfd_lookup_session (bum, &key); - if (tmp) + bfd_udp_del_session_internal (bs); + return 0; +} + +vnet_api_error_t +bfd_udp_session_set_flags (u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, u8 admin_up_down) +{ + bfd_session_t *bs = NULL; + vnet_api_error_t rv = + bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, + &bs); + if (rv) + { + return rv; + } + bfd_session_set_flags (bs, admin_up_down); + return 0; +} + +vnet_api_error_t +bfd_auth_set_key (u32 conf_key_id, u8 auth_type, u8 key_len, + const u8 * key_data) +{ +#if WITH_LIBSSL > 0 + bfd_auth_key_t *auth_key = NULL; + if (!key_len || key_len > bfd_max_len_for_auth_type (auth_type)) + { + clib_warning ("Invalid authentication key length for auth_type=%d:%s " + "(key_len=%u, must be " + "non-zero, expected max=%u)", + auth_type, bfd_auth_type_str (auth_type), key_len, + (u32) bfd_max_len_for_auth_type (auth_type)); + return VNET_API_ERROR_INVALID_VALUE; + } + if (!bfd_auth_type_supported (auth_type)) + { + clib_warning ("Unsupported auth type=%d:%s", auth_type, + bfd_auth_type_str (auth_type)); + return VNET_API_ERROR_BFD_NOTSUPP; + } + bfd_main_t *bm = bfd_udp_main.bfd_main; + uword *key_idx_p = hash_get (bm->auth_key_by_conf_key_id, conf_key_id); + if (key_idx_p) + { + /* modifying existing key - must not be used */ + const uword key_idx = *key_idx_p; + auth_key = pool_elt_at_index (bm->auth_keys, key_idx); + if (auth_key->use_count > 0) + { + clib_warning ("Authentication key with conf ID %u in use by %u BFD " + "sessions - cannot modify", + conf_key_id, auth_key->use_count); + return VNET_API_ERROR_BFD_EINUSE; + } + } + else + { + /* adding new key */ + pool_get (bm->auth_keys, auth_key); + auth_key->conf_key_id = conf_key_id; + hash_set (bm->auth_key_by_conf_key_id, conf_key_id, + auth_key - bm->auth_keys); + } + auth_key->auth_type = auth_type; + memset (auth_key->key, 0, sizeof (auth_key->key)); + clib_memcpy (auth_key->key, key_data, key_len); + return 0; +#else + clib_warning ("SSL missing, cannot manipulate authentication keys"); + return VNET_API_ERROR_BFD_NOTSUPP; +#endif +} + +vnet_api_error_t +bfd_auth_del_key (u32 conf_key_id) +{ +#if WITH_LIBSSL > 0 + bfd_auth_key_t *auth_key = NULL; + bfd_main_t *bm = bfd_udp_main.bfd_main; + uword *key_idx_p = hash_get (bm->auth_key_by_conf_key_id, conf_key_id); + if (key_idx_p) { - BFD_DBG ("free bfd-udp session, bs_idx=%d", tmp->bs_idx); - mhash_unset (&bum->bfd_session_idx_by_bfd_key, &key, NULL); - adj_unlock (tmp->udp.adj_index); - bfd_put_session (bum->bfd_main, tmp); + /* deleting existing key - must not be used */ + const uword key_idx = *key_idx_p; + auth_key = pool_elt_at_index (bm->auth_keys, key_idx); + if (auth_key->use_count > 0) + { + clib_warning ("Authentication key with conf ID %u in use by %u BFD " + "sessions - cannot delete", + conf_key_id, auth_key->use_count); + return VNET_API_ERROR_BFD_EINUSE; + } + hash_unset (bm->auth_key_by_conf_key_id, conf_key_id); + memset (auth_key, 0, sizeof (*auth_key)); + pool_put (bm->auth_keys, auth_key); } else { - BFD_ERR ("no such session"); - return VNET_API_ERROR_BFD_NOENT; + /* no such key */ + clib_warning ("Authentication key with conf ID %u does not exist", + conf_key_id); + return VNET_API_ERROR_BFD_ENOENT; } return 0; +#else + clib_warning ("SSL missing, cannot manipulate authentication keys"); + return VNET_API_ERROR_BFD_NOTSUPP; +#endif +} + +vnet_api_error_t +bfd_udp_auth_activate (u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, + u32 conf_key_id, u8 key_id, u8 is_delayed) +{ +#if WITH_LIBSSL > 0 + bfd_session_t *bs = NULL; + vnet_api_error_t rv = + bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, + &bs); + if (rv) + { + return rv; + } + return bfd_auth_activate (bs, conf_key_id, key_id, is_delayed); +#else + clib_warning ("SSL missing, cannot activate BFD authentication"); + return VNET_API_ERROR_BFD_NOTSUPP; +#endif +} + +vnet_api_error_t +bfd_udp_auth_deactivate (u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, u8 is_delayed) +{ + bfd_session_t *bs = NULL; + vnet_api_error_t rv = + bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, + &bs); + if (rv) + { + return rv; + } + return bfd_auth_deactivate (bs, is_delayed); } typedef enum @@ -461,6 +672,14 @@ bfd_udp4_scan (vlib_main_t * vm, vlib_node_runtime_t * rt, BFD_ERR ("Couldn't find ip4 or udp header"); return BFD_UDP_ERROR_BAD; } + const u32 udp_payload_length = udp->length - sizeof (*udp); + if (pkt->head.length > udp_payload_length) + { + BFD_ERR + ("BFD packet length is larger than udp payload length (%u > %u)", + pkt->head.length, udp_payload_length); + return BFD_UDP_ERROR_BAD; + } if (!bfd_verify_pkt_common (pkt)) { return BFD_UDP_ERROR_BAD; @@ -491,8 +710,9 @@ bfd_udp4_scan (vlib_main_t * vm, vlib_node_runtime_t * rt, return BFD_UDP_ERROR_BAD; } BFD_DBG ("BFD session found, bs_idx=%u", bs->bs_idx); - if (!bfd_verify_pkt_session (pkt, b->current_length, bs)) + if (!bfd_verify_pkt_auth (pkt, b->current_length, bs)) { + BFD_ERR ("Packet verification failed, dropping packet"); return BFD_UDP_ERROR_BAD; } bfd_udp_error_t err; @@ -526,11 +746,10 @@ bfd_udp6_find_headers (vlib_buffer_t * b, const ip6_header_t ** ip6, *udp = NULL; return; } - /* FIXME skip extra headers when searching for UDP ? */ if ((*ip6)->protocol != IP_PROTOCOL_UDP) { BFD_ERR ("Unexpected protocol in IPv6 header '%u', expected '%u' (== " - "IP_PROTOCOL_UDP)" (*ip6)->protocol, IP_PROTOCOL_UDP); + "IP_PROTOCOL_UDP)", (*ip6)->protocol, IP_PROTOCOL_UDP); *ip6 = NULL; *udp = NULL; return; @@ -596,6 +815,14 @@ bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt, BFD_ERR ("Couldn't find ip6 or udp header"); return BFD_UDP_ERROR_BAD; } + const u32 udp_payload_length = udp->length - sizeof (*udp); + if (pkt->head.length > udp_payload_length) + { + BFD_ERR + ("BFD packet length is larger than udp payload length (%u > %u)", + pkt->head.length, udp_payload_length); + return BFD_UDP_ERROR_BAD; + } if (!bfd_verify_pkt_common (pkt)) { return BFD_UDP_ERROR_BAD; @@ -617,8 +844,9 @@ bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt, key.peer_addr.ip6.as_u64[0] = ip6->src_address.as_u64[0]; key.peer_addr.ip6.as_u64[1] = ip6->src_address.as_u64[1]; BFD_DBG ("Looking up BFD session using key (sw_if_index=%u, local=%U, " - "peer=%U)", key.sw_if_index, format_ip6_address, - &key.local_addr, format_ip6_address, &key.peer_addr); + "peer=%U)", + key.sw_if_index, format_ip6_address, &key.local_addr, + format_ip6_address, &key.peer_addr); bs = bfd_lookup_session (&bfd_udp_main, &key); } if (!bs) @@ -627,8 +855,9 @@ bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt, return BFD_UDP_ERROR_BAD; } BFD_DBG ("BFD session found, bs_idx=%u", bs->bs_idx); - if (!bfd_verify_pkt_session (pkt, b->current_length, bs)) + if (!bfd_verify_pkt_auth (pkt, b->current_length, bs)) { + BFD_ERR ("Packet verification failed, dropping packet"); return BFD_UDP_ERROR_BAD; } bfd_udp_error_t err; @@ -699,7 +928,7 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, const bfd_pkt_t *pkt = vlib_buffer_get_current (b0); if (bfd_pkt_get_poll (pkt)) { - bfd_send_final (vm, b0, bs); + bfd_init_final_control_frame (vm, b0, bs); if (is_ipv6) { vlib_node_increment_counter (vm, bfd_udp6_input_node.index, diff --git a/test/bfd.py b/test/bfd.py index 51716813..475a1707 100644 --- a/test/bfd.py +++ b/test/bfd.py @@ -1,3 +1,4 @@ +from random import randint from socket import AF_INET, AF_INET6 from scapy.all import * from scapy.packet import * @@ -53,11 +54,57 @@ class BFDState(NumericConstant): NumericConstant.__init__(self, value) +class BFDAuthType(NumericConstant): + """ BFD Authentication Type """ + no_auth = 0 + simple_pwd = 1 + keyed_md5 = 2 + meticulous_keyed_md5 = 3 + keyed_sha1 = 4 + meticulous_keyed_sha1 = 5 + + desc_dict = { + no_auth: "No authentication", + simple_pwd: "Simple Password", + keyed_md5: "Keyed MD5", + meticulous_keyed_md5: "Meticulous Keyed MD5", + keyed_sha1: "Keyed SHA1", + meticulous_keyed_sha1: "Meticulous Keyed SHA1", + } + + def __init__(self, value): + NumericConstant.__init__(self, value) + + +def bfd_is_auth_used(pkt): + return "A" in pkt.sprintf("%BFD.flags%") + + +def bfd_is_simple_pwd_used(pkt): + return bfd_is_auth_used(pkt) and pkt.auth_type == BFDAuthType.simple_pwd + + +def bfd_is_sha1_used(pkt): + return bfd_is_auth_used(pkt) and pkt.auth_type in \ + (BFDAuthType.keyed_sha1, BFDAuthType.meticulous_keyed_sha1) + + +def bfd_is_md5_used(pkt): + return bfd_is_auth_used(pkt) and pkt.auth_type in \ + (BFDAuthType.keyed_md5, BFDAuthType.meticulous_keyed_md5) + + +def bfd_is_md5_or_sha1_used(pkt): + return bfd_is_md5_used(pkt) or bfd_is_sha1_used(pkt) + + class BFD(Packet): udp_dport = 3784 #: BFD destination port per RFC 5881 udp_sport_min = 49152 #: BFD source port min value per RFC 5881 udp_sport_max = 65535 #: BFD source port max value per RFC 5881 + bfd_pkt_len = 24 # : length of BFD pkt without authentication section + sha1_auth_len = 28 # : length of authentication section if SHA1 used name = "BFD" @@ -65,14 +112,27 @@ class BFD(Packet): BitField("version", 1, 3), BitEnumField("diag", 0, 5, BFDDiagCode.desc_dict), BitEnumField("state", 0, 2, BFDState.desc_dict), - FlagsField("flags", 0, 6, ['P', 'F', 'C', 'A', 'D', 'M']), + FlagsField("flags", 0, 6, ['M', 'D', 'A', 'C', 'F', 'P']), XByteField("detect_mult", 0), - XByteField("length", 24), + BitField("length", bfd_pkt_len, 8), BitField("my_discriminator", 0, 32), BitField("your_discriminator", 0, 32), BitField("desired_min_tx_interval", 0, 32), BitField("required_min_rx_interval", 0, 32), - BitField("required_min_echo_rx_interval", 0, 32)] + BitField("required_min_echo_rx_interval", 0, 32), + ConditionalField( + BitEnumField("auth_type", 0, 8, BFDAuthType.desc_dict), + bfd_is_auth_used), + ConditionalField(BitField("auth_len", 0, 8), bfd_is_auth_used), + ConditionalField(BitField("auth_key_id", 0, 8), bfd_is_auth_used), + ConditionalField(BitField("auth_reserved", 0, 8), + bfd_is_md5_or_sha1_used), + ConditionalField( + BitField("auth_seq_num", 0, 32), bfd_is_md5_or_sha1_used), + ConditionalField(StrField("auth_key_hash", "0" * 16), bfd_is_md5_used), + ConditionalField( + StrField("auth_key_hash", "0" * 20), bfd_is_sha1_used), + ] def mysummary(self): return self.sprintf("BFD(my_disc=%BFD.my_discriminator%," @@ -82,9 +142,78 @@ class BFD(Packet): bind_layers(UDP, BFD, dport=BFD.udp_dport) +class VppBFDAuthKey(VppObject): + """ Represents BFD authentication key in VPP """ + + def __init__(self, test, conf_key_id, auth_type, key): + self._test = test + self._key = key + self._auth_type = auth_type + test.assertIn(auth_type, BFDAuthType.desc_dict) + self._conf_key_id = conf_key_id + + @property + def test(self): + """ Test which created this key """ + return self._test + + @property + def auth_type(self): + """ Authentication type for this key """ + return self._auth_type + + @property + def key(self): + return self._key + + @property + def conf_key_id(self): + return self._conf_key_id + + def add_vpp_config(self): + self.test.vapi.bfd_auth_set_key( + self._conf_key_id, self._auth_type, self._key) + self._test.registry.register(self, self.test.logger) + + def get_bfd_auth_keys_dump_entry(self): + """ get the entry in the auth keys dump corresponding to this key """ + result = self.test.vapi.bfd_auth_keys_dump() + for k in result: + if k.conf_key_id == self._conf_key_id: + return k + return None + + def query_vpp_config(self): + return self.get_bfd_auth_keys_dump_entry() is not None + + def remove_vpp_config(self): + self.test.vapi.bfd_auth_del_key(self._conf_key_id) + + def object_id(self): + return "bfd-auth-key-%s" % self._conf_key_id + + def __str__(self): + return self.object_id() + + class VppBFDUDPSession(VppObject): """ Represents BFD UDP session in VPP """ + def __init__(self, test, interface, peer_addr, local_addr=None, af=AF_INET, + desired_min_tx=100000, required_min_rx=100000, detect_mult=3, + sha1_key=None, bfd_key_id=None): + self._test = test + self._interface = interface + self._af = af + self._local_addr = local_addr + self._peer_addr = peer_addr + self._peer_addr_n = socket.inet_pton(af, peer_addr) + self._desired_min_tx = desired_min_tx + self._required_min_rx = required_min_rx + self._detect_mult = detect_mult + self._sha1_key = sha1_key + self._bfd_key_id = bfd_key_id if bfd_key_id else randint(0, 255) + @property def test(self): """ Test which created this session """ @@ -100,13 +229,6 @@ class VppBFDUDPSession(VppObject): """ Address family - AF_INET or AF_INET6 """ return self._af - @property - def bs_index(self): - """ BFD session index from VPP """ - if self._bs_index is not None: - return self._bs_index - raise NotConfiguredException("not configured") - @property def local_addr(self): """ BFD session local address (VPP address) """ @@ -141,19 +263,27 @@ class VppBFDUDPSession(VppObject): """ BFD session peer address - raw, suitable for API """ return self._peer_addr_n - @property - def state(self): - """ BFD session state """ + def get_bfd_udp_session_dump_entry(self): result = self.test.vapi.bfd_udp_session_dump() - session = None for s in result: + self.test.logger.debug("session entry: %s" % str(s)) if s.sw_if_index == self.interface.sw_if_index: if self.af == AF_INET \ and s.is_ipv6 == 0 \ and self.interface.local_ip4n == s.local_addr[:4] \ and self.interface.remote_ip4n == s.peer_addr[:4]: - session = s - break + return s + if self.af == AF_INET6 \ + and s.is_ipv6 == 1 \ + and self.interface.local_ip6n == s.local_addr \ + and self.interface.remote_ip6n == s.peer_addr: + return s + return None + + @property + def state(self): + """ BFD session state """ + session = self.get_bfd_udp_session_dump_entry() if session is None: raise Exception("Could not find BFD session in VPP response: %s" % repr(result)) @@ -171,61 +301,78 @@ class VppBFDUDPSession(VppObject): def detect_mult(self): return self._detect_mult - def __init__(self, test, interface, peer_addr, local_addr=None, af=AF_INET, - desired_min_tx=100000, required_min_rx=100000, detect_mult=3): - self._test = test - self._interface = interface - self._af = af - self._local_addr = local_addr - self._peer_addr = peer_addr - self._peer_addr_n = socket.inet_pton(af, peer_addr) - self._bs_index = None - self._desired_min_tx = desired_min_tx - self._required_min_rx = required_min_rx - self._detect_mult = detect_mult + @property + def sha1_key(self): + return self._sha1_key + + @property + def bfd_key_id(self): + return self._bfd_key_id + + def activate_auth(self, key, bfd_key_id=None, delayed=False): + self._bfd_key_id = bfd_key_id if bfd_key_id else randint(0, 255) + self._sha1_key = key + is_ipv6 = 1 if AF_INET6 == self.af else 0 + conf_key_id = self._sha1_key.conf_key_id + is_delayed = 1 if delayed else 0 + self.test.vapi.bfd_udp_auth_activate(self._interface.sw_if_index, + self.local_addr_n, + self.peer_addr_n, + is_ipv6=is_ipv6, + bfd_key_id=self._bfd_key_id, + conf_key_id=conf_key_id, + is_delayed=is_delayed) + + def deactivate_auth(self, delayed=False): + self._bfd_key_id = None + self._sha1_key = None + is_delayed = 1 if delayed else 0 + is_ipv6 = 1 if AF_INET6 == self.af else 0 + self.test.vapi.bfd_udp_auth_deactivate(self._interface.sw_if_index, + self.local_addr_n, + self.peer_addr_n, + is_ipv6=is_ipv6, + is_delayed=is_delayed) def add_vpp_config(self): is_ipv6 = 1 if AF_INET6 == self.af else 0 - result = self.test.vapi.bfd_udp_add( - self._interface.sw_if_index, - self.desired_min_tx, - self.required_min_rx, - self.detect_mult, - self.local_addr_n, - self.peer_addr_n, - is_ipv6=is_ipv6) - self._bs_index = result.bs_index + bfd_key_id = self._bfd_key_id if self._sha1_key else None + conf_key_id = self._sha1_key.conf_key_id if self._sha1_key else None + self.test.vapi.bfd_udp_add(self._interface.sw_if_index, + self.desired_min_tx, + self.required_min_rx, + self.detect_mult, + self.local_addr_n, + self.peer_addr_n, + is_ipv6=is_ipv6, + bfd_key_id=bfd_key_id, + conf_key_id=conf_key_id) self._test.registry.register(self, self.test.logger) def query_vpp_config(self): - result = self.test.vapi.bfd_udp_session_dump() - session = None - for s in result: - if s.sw_if_index == self.interface.sw_if_index: - if self.af == AF_INET \ - and s.is_ipv6 == 0 \ - and self.interface.local_ip4n == s.local_addr[:4] \ - and self.interface.remote_ip4n == s.peer_addr[:4]: - session = s - break - if session is None: - return False - return True + session = self.get_bfd_udp_session_dump_entry() + return session is not None def remove_vpp_config(self): - if self._bs_index is not None: - is_ipv6 = 1 if AF_INET6 == self._af else 0 - self.test.vapi.bfd_udp_del( - self._interface.sw_if_index, - self.local_addr_n, - self.peer_addr_n, - is_ipv6=is_ipv6) + is_ipv6 = 1 if AF_INET6 == self._af else 0 + self.test.vapi.bfd_udp_del(self._interface.sw_if_index, + self.local_addr_n, + self.peer_addr_n, + is_ipv6=is_ipv6) def object_id(self): - return "bfd-udp-%d" % self.bs_index + return "bfd-udp-%s-%s-%s-%s" % (self._interface.sw_if_index, + self.local_addr, + self.peer_addr, + self.af) def __str__(self): return self.object_id() def admin_up(self): - self.test.vapi.bfd_session_set_flags(self.bs_index, 1) + is_ipv6 = 1 if AF_INET6 == self._af else 0 + self.test.vapi.bfd_udp_session_set_flags(1, + self._interface.sw_if_index, + self.local_addr_n, + self.peer_addr_n, + is_ipv6=is_ipv6) diff --git a/test/framework.py b/test/framework.py index 02935604..8ceb33c3 100644 --- a/test/framework.py +++ b/test/framework.py @@ -89,8 +89,8 @@ class VppTestCase(unittest.TestCase): if dl == "core": if resource.getrlimit(resource.RLIMIT_CORE)[0] <= 0: # give a heads up if this is actually useless - cls.logger.critical("WARNING: core size limit is set 0, core " - "files will NOT be created") + print(colorize("WARNING: core size limit is set 0, core files " + "will NOT be created", RED)) cls.debug_core = True elif dl == "gdb": cls.debug_gdb = True @@ -533,12 +533,11 @@ class VppTestCase(unittest.TestCase): self.assertEqual(real_value, expected_value, msg) - def assert_in_range( - self, - real_value, - expected_min, - expected_max, - name=None): + def assert_in_range(self, + real_value, + expected_min, + expected_max, + name=None): if name is None: msg = None else: diff --git a/test/test_bfd.py b/test/test_bfd.py index d047b5a3..5f861477 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -1,6 +1,8 @@ #!/usr/bin/env python import unittest +import hashlib +import binascii import time from random import randint from bfd import * @@ -10,6 +12,22 @@ from util import ppp us_in_sec = 1000000 +class AuthKeyFactory(object): + """Factory class for creating auth keys with unique conf key ID""" + + def __init__(self): + self._conf_key_ids = {} + + def create_random_key(self, test, auth_type=BFDAuthType.keyed_sha1): + conf_key_id = randint(0, 0xFFFFFFFF) + while conf_key_id in self._conf_key_ids: + conf_key_id = randint(0, 0xFFFFFFFF) + self._conf_key_ids[conf_key_id] = 1 + key = str(bytearray([randint(0, 255) for j in range(randint(1, 20))])) + return VppBFDAuthKey(test=test, auth_type=auth_type, + conf_key_id=conf_key_id, key=key) + + class BFDAPITestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD) - API""" @@ -21,19 +39,23 @@ class BFDAPITestCase(VppTestCase): cls.create_pg_interfaces(range(2)) for i in cls.pg_interfaces: i.config_ip4() + i.config_ip6() i.resolve_arp() except Exception: super(BFDAPITestCase, cls).tearDownClass() raise + def setUp(self): + super(BFDAPITestCase, self).setUp() + self.factory = AuthKeyFactory() + def test_add_bfd(self): """ create a BFD session """ session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) session.add_vpp_config() self.logger.debug("Session state is %s" % str(session.state)) session.remove_vpp_config() - session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) session.add_vpp_config() self.logger.debug("Session state is %s" % str(session.state)) session.remove_vpp_config() @@ -48,25 +70,155 @@ class BFDAPITestCase(VppTestCase): session.remove_vpp_config() - def test_add_two(self): - """ create two BFD sessions """ - session1 = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) - session1.add_vpp_config() - session2 = VppBFDUDPSession(self, self.pg1, self.pg1.remote_ip4) - session2.add_vpp_config() - self.assertNotEqual(session1.bs_index, session2.bs_index, - "Different BFD sessions share bs_index (%s)" % - session1.bs_index) + def test_add_bfd6(self): + """ create IPv6 BFD session """ + session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip6, af=AF_INET6) + session.add_vpp_config() + self.logger.debug("Session state is %s" % str(session.state)) + session.remove_vpp_config() + session.add_vpp_config() + self.logger.debug("Session state is %s" % str(session.state)) + session.remove_vpp_config() + + def test_add_sha1_keys(self): + """ add SHA1 keys """ + key_count = 10 + keys = [self.factory.create_random_key( + self) for i in range(0, key_count)] + for key in keys: + self.assertFalse(key.query_vpp_config()) + for key in keys: + key.add_vpp_config() + for key in keys: + self.assertTrue(key.query_vpp_config()) + # remove randomly + indexes = range(key_count) + random.shuffle(indexes) + removed = [] + for i in indexes: + key = keys[i] + key.remove_vpp_config() + removed.append(i) + for j in range(key_count): + key = keys[j] + if j in removed: + self.assertFalse(key.query_vpp_config()) + else: + self.assertTrue(key.query_vpp_config()) + # should be removed now + for key in keys: + self.assertFalse(key.query_vpp_config()) + # add back and remove again + for key in keys: + key.add_vpp_config() + for key in keys: + self.assertTrue(key.query_vpp_config()) + for key in keys: + key.remove_vpp_config() + for key in keys: + self.assertFalse(key.query_vpp_config()) + + def test_add_bfd_sha1(self): + """ create a BFD session (SHA1) """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, + sha1_key=key) + session.add_vpp_config() + self.logger.debug("Session state is %s" % str(session.state)) + session.remove_vpp_config() + session.add_vpp_config() + self.logger.debug("Session state is %s" % str(session.state)) + session.remove_vpp_config() + + def test_double_add_sha1(self): + """ create the same BFD session twice (negative case) (SHA1) """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, + sha1_key=key) + session.add_vpp_config() + with self.assertRaises(Exception): + session.add_vpp_config() + + def test_add_authenticated_with_nonexistent_key(self): + """ create BFD session using non-existent SHA1 (negative case) """ + session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip4, + sha1_key=self.factory.create_random_key(self)) + with self.assertRaises(Exception): + session.add_vpp_config() + + def test_shared_sha1_key(self): + """ share single SHA1 key between multiple BFD sessions """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + sessions = [ + VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, + sha1_key=key), + VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip6, + sha1_key=key, af=AF_INET6), + VppBFDUDPSession(self, self.pg1, self.pg1.remote_ip4, + sha1_key=key), + VppBFDUDPSession(self, self.pg1, self.pg1.remote_ip6, + sha1_key=key, af=AF_INET6)] + for s in sessions: + s.add_vpp_config() + removed = 0 + for s in sessions: + e = key.get_bfd_auth_keys_dump_entry() + self.assert_equal(e.use_count, len(sessions) - removed, + "Use count for shared key") + s.remove_vpp_config() + removed += 1 + e = key.get_bfd_auth_keys_dump_entry() + self.assert_equal(e.use_count, len(sessions) - removed, + "Use count for shared key") + + def test_activate_auth(self): + """ activate SHA1 authentication """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) + session.add_vpp_config() + session.activate_auth(key) + + def test_deactivate_auth(self): + """ deactivate SHA1 authentication """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) + session.add_vpp_config() + session.activate_auth(key) + session.deactivate_auth() + + def test_change_key(self): + key1 = self.factory.create_random_key(self) + key2 = self.factory.create_random_key(self) + while key2.conf_key_id == key1.conf_key_id: + key2 = self.factory.create_random_key(self) + key1.add_vpp_config() + key2.add_vpp_config() + session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, + sha1_key=key1) + session.add_vpp_config() + session.activate_auth(key2) class BFDTestSession(object): """ BFD session as seen from test framework side """ - def __init__(self, test, interface, af, detect_mult=3): + def __init__(self, test, interface, af, detect_mult=3, sha1_key=None, + bfd_key_id=None, our_seq_number=0xFFFFFFFF - 4): self.test = test self.af = af + self.sha1_key = sha1_key + self.bfd_key_id = bfd_key_id self.interface = interface self.udp_sport = 50000 + self.our_seq_number = our_seq_number + self.vpp_seq_number = None self.bfd_values = { 'my_discriminator': 0, 'desired_min_tx_interval': 100000, @@ -74,10 +226,25 @@ class BFDTestSession(object): 'diag': BFDDiagCode.no_diagnostic, } + def inc_seq_num(self): + if self.our_seq_number == 0xFFFFFFFF: + self.our_seq_number = 0 + else: + self.our_seq_number += 1 + def update(self, **kwargs): self.bfd_values.update(kwargs) def create_packet(self): + if self.sha1_key: + bfd = BFD(flags="A") + bfd.auth_type = self.sha1_key.auth_type + bfd.auth_len = BFD.sha1_auth_len + bfd.auth_key_id = self.bfd_key_id + bfd.auth_seq_num = self.our_seq_number + bfd.length = BFD.sha1_auth_len + BFD.bfd_pkt_len + else: + bfd = BFD() if self.af == AF_INET6: packet = (Ether(src=self.interface.remote_mac, dst=self.interface.local_mac) / @@ -85,7 +252,7 @@ class BFDTestSession(object): dst=self.interface.local_ip6, hlim=255) / UDP(sport=self.udp_sport, dport=BFD.udp_dport) / - BFD()) + bfd) else: packet = (Ether(src=self.interface.remote_mac, dst=self.interface.local_mac) / @@ -93,11 +260,17 @@ class BFDTestSession(object): dst=self.interface.local_ip4, ttl=255) / UDP(sport=self.udp_sport, dport=BFD.udp_dport) / - BFD()) + bfd) self.test.logger.debug("BFD: Creating packet") for name, value in self.bfd_values.iteritems(): self.test.logger.debug("BFD: setting packet.%s=%s", name, value) packet[BFD].setfieldval(name, value) + if self.sha1_key: + hash_material = str(packet[BFD])[:32] + self.sha1_key.key + \ + "\0" * (20 - len(self.sha1_key.key)) + self.test.logger.debug("BFD: Calculated SHA1 hash: %s" % + hashlib.sha1(hash_material).hexdigest()) + packet[BFD].auth_key_hash = hashlib.sha1(hash_material).digest() return packet def send_packet(self): @@ -106,13 +279,60 @@ class BFDTestSession(object): self.test.pg0.add_stream([p]) self.test.pg_start() - def verify_packet(self, packet): + def verify_sha1_auth(self, packet): + """ Verify correctness of authentication in BFD layer. """ + bfd = packet[BFD] + self.test.assert_equal(bfd.auth_len, 28, "Auth section length") + self.test.assert_equal(bfd.auth_type, self.sha1_key.auth_type, + BFDAuthType) + self.test.assert_equal(bfd.auth_key_id, self.bfd_key_id, "Key ID") + self.test.assert_equal(bfd.auth_reserved, 0, "Reserved") + if self.vpp_seq_number is None: + self.vpp_seq_number = bfd.auth_seq_num + self.test.logger.debug("Received initial sequence number: %s" % + self.vpp_seq_number) + else: + recvd_seq_num = bfd.auth_seq_num + self.test.logger.debug("Received followup sequence number: %s" % + recvd_seq_num) + if self.vpp_seq_number < 0xffffffff: + if self.sha1_key.auth_type == \ + BFDAuthType.meticulous_keyed_sha1: + self.test.assert_equal(recvd_seq_num, + self.vpp_seq_number + 1, + "BFD sequence number") + else: + self.test.assert_in_range(recvd_seq_num, + self.vpp_seq_number, + self.vpp_seq_number + 1, + "BFD sequence number") + else: + if self.sha1_key.auth_type == \ + BFDAuthType.meticulous_keyed_sha1: + self.test.assert_equal(recvd_seq_num, 0, + "BFD sequence number") + else: + self.test.assertIn(recvd_seq_num, (self.vpp_seq_number, 0), + "BFD sequence number not one of " + "(%s, 0)" % self.vpp_seq_number) + self.vpp_seq_number = recvd_seq_num + # last 20 bytes represent the hash - so replace them with the key, + # pad the result with zeros and hash the result + hash_material = bfd.original[:-20] + self.sha1_key.key + \ + "\0" * (20 - len(self.sha1_key.key)) + expected_hash = hashlib.sha1(hash_material).hexdigest() + self.test.assert_equal(binascii.hexlify(bfd.auth_key_hash), + expected_hash, "Auth key hash") + + def verify_bfd(self, packet): """ Verify correctness of BFD layer. """ bfd = packet[BFD] self.test.assert_equal(bfd.version, 1, "BFD version") self.test.assert_equal(bfd.your_discriminator, self.bfd_values['my_discriminator'], "BFD - your discriminator") + if self.sha1_key: + self.verify_sha1_auth(packet) class BFDCommonCode: @@ -122,9 +342,9 @@ class BFDCommonCode: self.vapi.collect_events() # clear the event queue if not self.vpp_dead: self.vapi.want_bfd_events(enable_disable=0) - self.vpp_session.remove_vpp_config() def bfd_session_up(self): + """ Bring BFD session up """ self.pg_enable_capture([self.pg0]) self.logger.info("BFD: Waiting for slow hello") p, timeout = self.wait_for_bfd_packet(2) @@ -139,6 +359,18 @@ class BFDCommonCode: self.verify_event(e, expected_state=BFDState.up) self.logger.info("BFD: Session is Up") self.test_session.update(state=BFDState.up) + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + + def bfd_session_down(self): + """ Bring BFD session down """ + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + self.test_session.update(state=BFDState.down) + self.test_session.send_packet() + self.logger.info("BFD: Waiting for event") + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + self.verify_event(e, expected_state=BFDState.down) + self.logger.info("BFD: Session is Down") + self.assert_equal(self.vpp_session.state, BFDState.down, BFDState) def verify_ip(self, packet): """ Verify correctness of IP layer. """ @@ -166,12 +398,9 @@ class BFDCommonCode: """ Verify correctness of event values. """ e = event self.logger.debug("BFD: Event: %s" % repr(e)) - self.assert_equal(e.bs_index, self.vpp_session.bs_index, - "BFD session index") - self.assert_equal( - e.sw_if_index, - self.vpp_session.interface.sw_if_index, - "BFD interface index") + self.assert_equal(e.sw_if_index, + self.vpp_session.interface.sw_if_index, + "BFD interface index") is_ipv6 = 0 if self.vpp_session.af == AF_INET6: is_ipv6 = 1 @@ -199,7 +428,7 @@ class BFDCommonCode: before = time.time() p = self.pg0.wait_for_packet(timeout=timeout) after = time.time() - self.logger.debug(ppp("Got packet:", p)) + self.logger.debug(ppp("BFD: Got packet:", p)) bfd = p[BFD] if bfd is None: raise Exception(ppp("Unexpected or invalid BFD packet:", p)) @@ -207,20 +436,9 @@ class BFDCommonCode: raise Exception(ppp("Unexpected payload in BFD packet:", bfd)) self.verify_ip(p) self.verify_udp(p) - self.test_session.verify_packet(p) + self.test_session.verify_bfd(p) return p, after - before - def test_session_up(self): - """ bring BFD session up """ - self.bfd_session_up() - - def test_hold_up(self): - """ hold BFD session up """ - self.bfd_session_up() - for i in range(5): - self.wait_for_bfd_packet() - self.test_session.send_packet() - class BFD4TestCase(VppTestCase, BFDCommonCode): """Bidirectional Forwarding Detection (BFD)""" @@ -231,7 +449,6 @@ class BFD4TestCase(VppTestCase, BFDCommonCode): try: cls.create_pg_interfaces([0]) cls.pg0.config_ip4() - cls.pg0.generate_remote_hosts() cls.pg0.configure_ipv4_neighbors() cls.pg0.admin_up() cls.pg0.resolve_arp() @@ -242,6 +459,7 @@ class BFD4TestCase(VppTestCase, BFDCommonCode): def setUp(self): super(BFD4TestCase, self).setUp() + self.factory = AuthKeyFactory() self.vapi.want_bfd_events() try: self.vpp_session = VppBFDUDPSession(self, self.pg0, @@ -255,7 +473,25 @@ class BFD4TestCase(VppTestCase, BFDCommonCode): def tearDown(self): BFDCommonCode.tearDown(self) - super(BFD4TestCase, self).tearDown() + VppTestCase.tearDown(self) + + def test_session_up(self): + """ bring BFD session up """ + self.bfd_session_up() + + def test_session_down(self): + """ bring BFD session down """ + self.bfd_session_up() + self.bfd_session_down() + + def test_hold_up(self): + """ hold BFD session up """ + self.bfd_session_up() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") def test_slow_timer(self): """ verify slow periodic control frames while session down """ @@ -367,6 +603,7 @@ class BFD6TestCase(VppTestCase, BFDCommonCode): def setUp(self): super(BFD6TestCase, self).setUp() + self.factory = AuthKeyFactory() self.vapi.want_bfd_events() try: self.vpp_session = VppBFDUDPSession(self, self.pg0, @@ -382,7 +619,429 @@ class BFD6TestCase(VppTestCase, BFDCommonCode): def tearDown(self): BFDCommonCode.tearDown(self) - super(BFD6TestCase, self).tearDown() + VppTestCase.tearDown(self) + + def test_session_up(self): + """ bring BFD session up """ + self.bfd_session_up() + + def test_hold_up(self): + """ hold BFD session up """ + self.bfd_session_up() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + + +class BFDSHA1TestCase(VppTestCase, BFDCommonCode): + """Bidirectional Forwarding Detection (BFD) (SHA1 auth) """ + + @classmethod + def setUpClass(cls): + super(BFDSHA1TestCase, cls).setUpClass() + try: + cls.create_pg_interfaces([0]) + cls.pg0.config_ip4() + cls.pg0.admin_up() + cls.pg0.resolve_arp() + + except Exception: + super(BFDSHA1TestCase, cls).tearDownClass() + raise + + def setUp(self): + super(BFDSHA1TestCase, self).setUp() + self.factory = AuthKeyFactory() + self.vapi.want_bfd_events() + + def tearDown(self): + BFDCommonCode.tearDown(self) + VppTestCase.tearDown(self) + + def test_session_up(self): + """ bring BFD session up """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip4, + sha1_key=key) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key, + bfd_key_id=self.vpp_session.bfd_key_id) + self.bfd_session_up() + + def test_hold_up(self): + """ hold BFD session up """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip4, + sha1_key=key) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key, + bfd_key_id=self.vpp_session.bfd_key_id) + self.bfd_session_up() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + + def test_hold_up_meticulous(self): + """ hold BFD session up - meticulous auth """ + key = self.factory.create_random_key( + self, BFDAuthType.meticulous_keyed_sha1) + key.add_vpp_config() + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip4, sha1_key=key) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key, + bfd_key_id=self.vpp_session.bfd_key_id) + self.bfd_session_up() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.inc_seq_num() + self.test_session.send_packet() + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + + def test_send_bad_seq_number(self): + """ session is not kept alive by msgs with bad seq numbers""" + key = self.factory.create_random_key( + self, BFDAuthType.meticulous_keyed_sha1) + key.add_vpp_config() + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip4, sha1_key=key) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key, + bfd_key_id=self.vpp_session.bfd_key_id) + self.bfd_session_up() + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.wait_for_bfd_packet() + self.test_session.send_packet() + e = self.vapi.collect_events() + # session should be down now, because the sequence numbers weren't + # updated + self.assert_equal(len(e), 1, "number of bfd events") + self.verify_event(e[0], expected_state=BFDState.down) + + def execute_rogue_session_scenario(self, vpp_bfd_udp_session, + legitimate_test_session, + rogue_test_session, + rogue_bfd_values=None): + """ execute a rogue session interaction scenario + + 1. create vpp session, add config + 2. bring the legitimate session up + 3. copy the bfd values from legitimate session to rogue session + 4. apply rogue_bfd_values to rogue session + 5. set rogue session state to down + 6. send message to take the session down from the rogue session + 7. assert that the legitimate session is unaffected + """ + + self.vpp_session = vpp_bfd_udp_session + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = legitimate_test_session + # bring vpp session up + self.bfd_session_up() + # send packet from rogue session + rogue_test_session.bfd_values = self.test_session.bfd_values.copy() + if rogue_bfd_values: + rogue_test_session.update(**rogue_bfd_values) + rogue_test_session.update(state=BFDState.down) + rogue_test_session.send_packet() + self.wait_for_bfd_packet() + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + + def test_mismatch_auth(self): + """ session is not brought down by unauthenticated msg """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + vpp_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip4, sha1_key=key) + legitimate_test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key, + bfd_key_id=vpp_session.bfd_key_id) + rogue_test_session = BFDTestSession(self, self.pg0, AF_INET) + self.execute_rogue_session_scenario(vpp_session, + legitimate_test_session, + rogue_test_session) + + def test_mismatch_bfd_key_id(self): + """ session is not brought down by msg with non-existent key-id """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + vpp_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip4, sha1_key=key) + # pick a different random bfd key id + x = randint(0, 255) + while x == vpp_session.bfd_key_id: + x = randint(0, 255) + legitimate_test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key, + bfd_key_id=vpp_session.bfd_key_id) + rogue_test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=x) + self.execute_rogue_session_scenario(vpp_session, + legitimate_test_session, + rogue_test_session) + + def test_mismatched_auth_type(self): + """ session is not brought down by msg with wrong auth type """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + vpp_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip4, sha1_key=key) + legitimate_test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key, + bfd_key_id=vpp_session.bfd_key_id) + rogue_test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key, + bfd_key_id=vpp_session.bfd_key_id) + self.execute_rogue_session_scenario( + vpp_session, legitimate_test_session, rogue_test_session, + {'auth_type': BFDAuthType.keyed_md5}) + + def test_restart(self): + """ simulate remote peer restart and resynchronization """ + key = self.factory.create_random_key( + self, BFDAuthType.meticulous_keyed_sha1) + key.add_vpp_config() + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip4, sha1_key=key) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key, + bfd_key_id=self.vpp_session.bfd_key_id, our_seq_number=0) + self.bfd_session_up() + # now we need to not respond for 2*detection_time (4 packets) + self.wait_for_bfd_packet() + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + self.wait_for_bfd_packet() + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + self.verify_event(e, expected_state=BFDState.down) + self.test_session.update(state=BFDState.down) + self.wait_for_bfd_packet() + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + self.wait_for_bfd_packet() + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + # reset sequence number + self.test_session.our_seq_number = 0 + self.bfd_session_up() + + +class BFDAuthOnOffTestCase(VppTestCase, BFDCommonCode): + """Bidirectional Forwarding Detection (BFD) (changing auth) """ + + @classmethod + def setUpClass(cls): + super(BFDAuthOnOffTestCase, cls).setUpClass() + try: + cls.create_pg_interfaces([0]) + cls.pg0.config_ip4() + cls.pg0.admin_up() + cls.pg0.resolve_arp() + + except Exception: + super(BFDAuthOnOffTestCase, cls).tearDownClass() + raise + + def setUp(self): + super(BFDAuthOnOffTestCase, self).setUp() + self.factory = AuthKeyFactory() + self.vapi.want_bfd_events() + + def tearDown(self): + BFDCommonCode.tearDown(self) + VppTestCase.tearDown(self) + + def test_auth_on_immediate(self): + """ turn auth on without disturbing session state (immediate) """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip4) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession(self, self.pg0, AF_INET) + self.bfd_session_up() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.vpp_session.activate_auth(key) + self.test_session.bfd_key_id = self.vpp_session.bfd_key_id + self.test_session.sha1_key = key + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + + def test_auth_off_immediate(self): + """ turn auth off without disturbing session state (immediate) """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip4, sha1_key=key) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key, + bfd_key_id=self.vpp_session.bfd_key_id) + self.bfd_session_up() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.vpp_session.deactivate_auth() + self.test_session.bfd_key_id = None + self.test_session.sha1_key = None + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + + def test_auth_change_key_immediate(self): + """ change auth key without disturbing session state (immediate) """ + key1 = self.factory.create_random_key(self) + key1.add_vpp_config() + key2 = self.factory.create_random_key(self) + key2.add_vpp_config() + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip4, sha1_key=key1) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key1, + bfd_key_id=self.vpp_session.bfd_key_id) + self.bfd_session_up() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.vpp_session.activate_auth(key2) + self.test_session.bfd_key_id = self.vpp_session.bfd_key_id + self.test_session.sha1_key = key2 + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + + def test_auth_on_delayed(self): + """ turn auth on without disturbing session state (delayed) """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip4) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession(self, self.pg0, AF_INET) + self.bfd_session_up() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.vpp_session.activate_auth(key, delayed=True) + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.test_session.bfd_key_id = self.vpp_session.bfd_key_id + self.test_session.sha1_key = key + self.test_session.send_packet() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + + def test_auth_off_delayed(self): + """ turn auth off without disturbing session state (delayed) """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip4, sha1_key=key) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key, + bfd_key_id=self.vpp_session.bfd_key_id) + self.bfd_session_up() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.vpp_session.deactivate_auth(delayed=True) + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.test_session.bfd_key_id = None + self.test_session.sha1_key = None + self.test_session.send_packet() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + + def test_auth_change_key_delayed(self): + """ change auth key without disturbing session state (delayed) """ + key1 = self.factory.create_random_key(self) + key1.add_vpp_config() + key2 = self.factory.create_random_key(self) + key2.add_vpp_config() + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip4, sha1_key=key1) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession( + self, self.pg0, AF_INET, sha1_key=key1, + bfd_key_id=self.vpp_session.bfd_key_id) + self.bfd_session_up() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.vpp_session.activate_auth(key2, delayed=True) + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.test_session.bfd_key_id = self.vpp_session.bfd_key_id + self.test_session.sha1_key = key2 + self.test_session.send_packet() + for i in range(5): + self.wait_for_bfd_packet() + self.test_session.send_packet() + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 90c954dc..72c18e6c 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -992,16 +992,57 @@ class VppPapiProvider(object): self.api(self.papi.control_ping) def bfd_udp_add(self, sw_if_index, desired_min_tx, required_min_rx, - detect_mult, local_addr, peer_addr, is_ipv6=0): - return self.api(self.papi.bfd_udp_add, + detect_mult, local_addr, peer_addr, is_ipv6=0, + bfd_key_id=None, conf_key_id=None): + if bfd_key_id is None: + return self.api(self.papi.bfd_udp_add, + { + 'sw_if_index': sw_if_index, + 'desired_min_tx': desired_min_tx, + 'required_min_rx': required_min_rx, + 'local_addr': local_addr, + 'peer_addr': peer_addr, + 'is_ipv6': is_ipv6, + 'detect_mult': detect_mult, + }) + else: + return self.api(self.papi.bfd_udp_add, + { + 'sw_if_index': sw_if_index, + 'desired_min_tx': desired_min_tx, + 'required_min_rx': required_min_rx, + 'local_addr': local_addr, + 'peer_addr': peer_addr, + 'is_ipv6': is_ipv6, + 'detect_mult': detect_mult, + 'is_authenticated': 1, + 'bfd_key_id': bfd_key_id, + 'conf_key_id': conf_key_id, + }) + + def bfd_udp_auth_activate(self, sw_if_index, local_addr, peer_addr, + is_ipv6=0, bfd_key_id=None, conf_key_id=None, + is_delayed=False): + return self.api(self.papi.bfd_udp_auth_activate, { 'sw_if_index': sw_if_index, - 'desired_min_tx': desired_min_tx, - 'required_min_rx': required_min_rx, 'local_addr': local_addr, 'peer_addr': peer_addr, 'is_ipv6': is_ipv6, - 'detect_mult': detect_mult, + 'is_delayed': 1 if is_delayed else 0, + 'bfd_key_id': bfd_key_id, + 'conf_key_id': conf_key_id, + }) + + def bfd_udp_auth_deactivate(self, sw_if_index, local_addr, peer_addr, + is_ipv6=0, is_delayed=False): + return self.api(self.papi.bfd_udp_auth_deactivate, + { + 'sw_if_index': sw_if_index, + 'local_addr': local_addr, + 'peer_addr': peer_addr, + 'is_ipv6': is_ipv6, + 'is_delayed': 1 if is_delayed else 0, }) def bfd_udp_del(self, sw_if_index, local_addr, peer_addr, is_ipv6=0): @@ -1016,10 +1057,14 @@ class VppPapiProvider(object): def bfd_udp_session_dump(self): return self.api(self.papi.bfd_udp_session_dump, {}) - def bfd_session_set_flags(self, bs_idx, admin_up_down): - return self.api(self.papi.bfd_session_set_flags, { - 'bs_index': bs_idx, + def bfd_udp_session_set_flags(self, admin_up_down, sw_if_index, local_addr, + peer_addr, is_ipv6=0): + return self.api(self.papi.bfd_udp_session_set_flags, { 'admin_up_down': admin_up_down, + 'sw_if_index': sw_if_index, + 'local_addr': local_addr, + 'peer_addr': peer_addr, + 'is_ipv6': is_ipv6, }) def want_bfd_events(self, enable_disable=1): @@ -1028,6 +1073,22 @@ class VppPapiProvider(object): 'pid': os.getpid(), }) + def bfd_auth_set_key(self, conf_key_id, auth_type, key): + return self.api(self.papi.bfd_auth_set_key, { + 'conf_key_id': conf_key_id, + 'auth_type': auth_type, + 'key': key, + 'key_len': len(key), + }) + + def bfd_auth_del_key(self, conf_key_id): + return self.api(self.papi.bfd_auth_del_key, { + 'conf_key_id': conf_key_id, + }) + + def bfd_auth_keys_dump(self): + return self.api(self.papi.bfd_auth_keys_dump, {}) + def classify_add_del_table( self, is_add, -- cgit 1.2.3-korg From a57a970952be2c3403c57dd7a16cd3d73660ef79 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Thu, 2 Feb 2017 06:58:07 +0100 Subject: BFD: modify session parameters Change-Id: I666e5c0cc71a3693640960c93cdd1907f84fbe23 Signed-off-by: Klement Sekera --- src/vnet/api_errno.h | 3 +- src/vnet/bfd/bfd.api | 40 ++++++ src/vnet/bfd/bfd_api.c | 24 ++++ src/vnet/bfd/bfd_api.h | 14 ++- src/vnet/bfd/bfd_debug.h | 7 ++ src/vnet/bfd/bfd_main.c | 296 +++++++++++++++++++++++++++++++++----------- src/vnet/bfd/bfd_main.h | 57 ++++++--- src/vnet/bfd/bfd_protocol.c | 26 ++++ src/vnet/bfd/bfd_udp.c | 61 +++++---- test/bfd.py | 19 +++ test/framework.py | 4 + test/test_bfd.py | 164 ++++++++++++++++++++++-- test/vpp_papi_provider.py | 13 ++ 13 files changed, 601 insertions(+), 127 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 32880232..0daba169 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -95,7 +95,8 @@ _(BFD_EEXIST, -101, "Duplicate BFD object") \ _(BFD_ENOENT, -102, "No such BFD object") \ _(BFD_EINUSE, -103, "BFD object in use") \ _(BFD_NOTSUPP, -104, "BFD feature not supported") \ -_(LISP_RLOC_LOCAL, -105, "RLOC address is local") +_(LISP_RLOC_LOCAL, -105, "RLOC address is local") \ +_(BFD_EAGAIN, -106, "BFD object cannot be manipulated at this time") typedef enum { diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api index 17ca35b6..f307ed2a 100644 --- a/src/vnet/bfd/bfd.api +++ b/src/vnet/bfd/bfd.api @@ -107,6 +107,40 @@ define bfd_udp_add_reply i32 retval; }; +/** \brief Modify UDP BFD session on interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - sw index of the interface + @param desired_min_tx - desired min transmit interval (microseconds) + @param required_min_rx - required min receive interval (microseconds) + @param local_addr - local address + @param peer_addr - peer address + @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 + @param detect_mult - detect multiplier (# of packets missed before connection goes down) +*/ +define bfd_udp_mod +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 desired_min_tx; + u32 required_min_rx; + u8 local_addr[16]; + u8 peer_addr[16]; + u8 is_ipv6; + u8 detect_mult; +}; + +/** \brief Modify UDP BFD session response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define bfd_udp_mod_reply +{ + u32 context; + i32 retval; +}; + /** \brief Delete UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -155,6 +189,9 @@ define bfd_udp_session_dump @param is_authenticated - non-zero if authentication in-use, zero otherwise @param bfd_key_id - ID of key currently in-use if auth is on @param conf_key_id - configured key ID for this session + @param required_min_rx - required min receive interval (microseconds) + @param desired_min_tx - desired min transmit interval (microseconds) + @param detect_mult - detect multiplier (# of packets missed before connection goes down) */ define bfd_udp_session_details { @@ -167,6 +204,9 @@ define bfd_udp_session_details u8 is_authenticated; u8 bfd_key_id; u32 conf_key_id; + u32 required_min_rx; + u32 desired_min_tx; + u8 detect_mult; }; /** \brief Set flags of BFD UDP session diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c index cfc3a38d..af70f0ec 100644 --- a/src/vnet/bfd/bfd_api.c +++ b/src/vnet/bfd/bfd_api.c @@ -45,6 +45,7 @@ #define foreach_vpe_api_msg \ _ (BFD_UDP_ADD, bfd_udp_add) \ + _ (BFD_UDP_MOD, bfd_udp_mod) \ _ (BFD_UDP_DEL, bfd_udp_del) \ _ (BFD_UDP_SESSION_DUMP, bfd_udp_session_dump) \ _ (BFD_UDP_SESSION_SET_FLAGS, bfd_udp_session_set_flags) \ @@ -97,6 +98,25 @@ vl_api_bfd_udp_add_t_handler (vl_api_bfd_udp_add_t * mp) REPLY_MACRO (VL_API_BFD_UDP_ADD_REPLY); } +static void +vl_api_bfd_udp_mod_t_handler (vl_api_bfd_udp_mod_t * mp) +{ + vl_api_bfd_udp_mod_reply_t *rmp; + int rv; + + VALIDATE_SW_IF_INDEX (mp); + + BFD_UDP_API_PARAM_COMMON_CODE; + + rv = bfd_udp_mod_session (BFD_UDP_API_PARAM_FROM_MP (mp), + clib_net_to_host_u32 (mp->desired_min_tx), + clib_net_to_host_u32 (mp->required_min_rx), + mp->detect_mult); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_BFD_UDP_MOD_REPLY); +} + static void vl_api_bfd_udp_del_t_handler (vl_api_bfd_udp_del_t * mp) { @@ -146,6 +166,10 @@ send_bfd_udp_session_details (unix_shared_memory_queue_t * q, u32 context, sizeof (key->peer_addr.ip4.data)); } + mp->required_min_rx = + clib_host_to_net_u32 (bs->config_required_min_rx_usec); + mp->desired_min_tx = clib_host_to_net_u32 (bs->config_desired_min_tx_usec); + mp->detect_mult = bs->local_detect_mult; vl_msg_api_send_shmem (q, (u8 *) & mp); } diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h index 128a3dc4..f4486a79 100644 --- a/src/vnet/bfd/bfd_api.h +++ b/src/vnet/bfd/bfd_api.h @@ -26,9 +26,17 @@ vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, u32 desired_min_tx_us, - u32 required_min_rx_us, u8 detect_mult, - u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id); + const ip46_address_t * peer_addr, + u32 desired_min_tx_usec, u32 required_min_rx_usec, + u8 detect_mult, u8 is_authenticated, u32 conf_key_id, + u8 bfd_key_id); + +vnet_api_error_t bfd_udp_mod_session (u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, + u8 detect_mult); vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, const ip46_address_t * local_addr, diff --git a/src/vnet/bfd/bfd_debug.h b/src/vnet/bfd/bfd_debug.h index 707ebab2..a06e934f 100644 --- a/src/vnet/bfd/bfd_debug.h +++ b/src/vnet/bfd/bfd_debug.h @@ -63,6 +63,13 @@ } \ while (0); +#define BFD_CLK_FMT "%luus/%lu clocks/%.2fs" +#define BFD_CLK_PRN(clocks) \ + (u64) ((((f64)clocks) / vlib_get_main ()->clib_time.clocks_per_second) * \ + USEC_PER_SECOND), \ + (clocks), \ + (((f64)clocks) / vlib_get_main ()->clib_time.clocks_per_second) + #else #define BFD_DBG(...) #define BFD_ERR(...) diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 8f2fae2b..798d0631 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -30,11 +30,16 @@ #endif static u64 -bfd_us_to_clocks (bfd_main_t * bm, u64 us) +bfd_usec_to_clocks (const bfd_main_t * bm, u64 us) { return bm->cpu_cps * ((f64) us / USEC_PER_SECOND); } +// static u64 bfd_clocks_to_usec (const bfd_main_t *bm, u64 clocks) +//{ +// return (clocks / bm->cpu_cps) * USEC_PER_SECOND; +//} + static vlib_node_registration_t bfd_process_node; /* set to 0 here, real values filled at startup */ @@ -83,9 +88,11 @@ bfd_set_defaults (bfd_main_t * bm, bfd_session_t * bs) bs->remote_state = BFD_STATE_down; bs->local_demand = 0; bs->remote_discr = 0; - bs->desired_min_tx_us = BFD_DEFAULT_DESIRED_MIN_TX_US; - bs->desired_min_tx_clocks = bfd_us_to_clocks (bm, bs->desired_min_tx_us); - bs->remote_min_rx_us = 1; + bs->config_desired_min_tx_usec = BFD_DEFAULT_DESIRED_MIN_TX_US; + bs->config_desired_min_tx_clocks = bm->default_desired_min_tx_clocks; + bs->effective_desired_min_tx_clocks = bm->default_desired_min_tx_clocks; + bs->remote_min_rx_usec = 1; + bs->remote_min_rx_clocks = bfd_usec_to_clocks (bm, bs->remote_min_rx_usec); bs->remote_demand = 0; bs->auth.remote_seq_number = 0; bs->auth.remote_seq_number_known = 0; @@ -123,7 +130,8 @@ bfd_recalc_tx_interval (bfd_main_t * bm, bfd_session_t * bs) if (!bs->local_demand) { bs->transmit_interval_clocks = - clib_max (bs->desired_min_tx_clocks, bs->remote_min_rx_clocks); + clib_max (bs->effective_desired_min_tx_clocks, + bs->remote_min_rx_clocks); } else { @@ -193,18 +201,18 @@ bfd_recalc_detection_time (bfd_main_t * bm, bfd_session_t * bs) { if (!bs->local_demand) { + /* asynchronous mode */ bs->detection_time_clocks = bs->remote_detect_mult * - bfd_us_to_clocks (bm, clib_max (bs->required_min_rx_us, - bs->remote_desired_min_tx_us)); + clib_max (bs->effective_required_min_rx_clocks, + bs->remote_desired_min_tx_clocks); } else { + /* demand mode */ bs->detection_time_clocks = - bs->local_detect_mult * - bfd_us_to_clocks (bm, - clib_max (bs->desired_min_tx_us, - bs->remote_min_rx_us)); + bs->local_detect_mult * clib_max (bs->config_desired_min_tx_clocks, + bs->remote_min_rx_clocks); } BFD_DBG ("Recalculated detection time %lu clocks/%.2fs", bs->detection_time_clocks, @@ -259,30 +267,44 @@ bfd_set_timer (bfd_main_t * bm, bfd_session_t * bs, u64 now, } static void -bfd_set_desired_min_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now, - u32 desired_min_tx_us, int handling_wakeup) +bfd_set_effective_desired_min_tx (bfd_main_t * bm, + bfd_session_t * bs, u64 now, + u64 desired_min_tx_clocks, + int handling_wakeup) { - bs->desired_min_tx_us = desired_min_tx_us; - bs->desired_min_tx_clocks = bfd_us_to_clocks (bm, bs->desired_min_tx_us); - BFD_DBG ("Set desired min tx to %uus/%lu clocks/%.2fs", - bs->desired_min_tx_us, bs->desired_min_tx_clocks, - bs->desired_min_tx_clocks / bm->cpu_cps); + bs->effective_desired_min_tx_clocks = desired_min_tx_clocks; + BFD_DBG ("Set effective desired min tx to " BFD_CLK_FMT, + BFD_CLK_PRN (bs->effective_desired_min_tx_clocks)); bfd_recalc_detection_time (bm, bs); bfd_recalc_tx_interval (bm, bs); bfd_calc_next_tx (bm, bs, now); bfd_set_timer (bm, bs, now, handling_wakeup); } +static void +bfd_set_effective_required_min_rx (bfd_main_t * bm, + bfd_session_t * bs, u64 now, + u64 required_min_rx_clocks, + int handling_wakeup) +{ + bs->effective_required_min_rx_clocks = required_min_rx_clocks; + BFD_DBG ("Set effective required min rx to " BFD_CLK_FMT, + BFD_CLK_PRN (bs->effective_required_min_rx_clocks)); + bfd_recalc_detection_time (bm, bs); + bfd_set_timer (bm, bs, now, handling_wakeup); +} + static void bfd_set_remote_required_min_rx (bfd_main_t * bm, bfd_session_t * bs, u64 now, - u32 remote_required_min_rx_us, + u32 remote_required_min_rx_usec, int handling_wakeup) { - bs->remote_min_rx_us = remote_required_min_rx_us; - bs->remote_min_rx_clocks = bfd_us_to_clocks (bm, bs->remote_min_rx_us); - BFD_DBG ("Set remote min rx to %uus/%lu clocks/%.2fs", bs->remote_min_rx_us, - bs->remote_min_rx_clocks, bs->remote_min_rx_clocks / bm->cpu_cps); + bs->remote_min_rx_usec = remote_required_min_rx_usec; + bs->remote_min_rx_clocks = + bfd_usec_to_clocks (bm, remote_required_min_rx_usec); + BFD_DBG ("Set remote min rx to " BFD_CLK_FMT, + BFD_CLK_PRN (bs->remote_min_rx_clocks)); bfd_recalc_detection_time (bm, bs); bfd_recalc_tx_interval (bm, bs); bfd_calc_next_tx (bm, bs, now); @@ -316,32 +338,6 @@ bfd_del_session (uword bs_idx) return 0; } -const char * -bfd_diag_code_string (bfd_diag_code_e diag) -{ -#define F(n, t, s) \ - case BFD_DIAG_CODE_NAME (t): \ - return s; - switch (diag) - { - foreach_bfd_diag_code (F)} - return "UNKNOWN"; -#undef F -} - -const char * -bfd_state_string (bfd_state_e state) -{ -#define F(n, t, s) \ - case BFD_STATE_NAME (t): \ - return s; - switch (state) - { - foreach_bfd_state (F)} - return "UNKNOWN"; -#undef F -} - void bfd_session_set_flags (bfd_session_t * bs, u8 admin_up_down) { @@ -404,30 +400,62 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, switch (bs->local_state) { case BFD_STATE_admin_down: - bfd_set_desired_min_tx (bm, bs, now, - clib_max (bs->config_desired_min_tx_us, - BFD_DEFAULT_DESIRED_MIN_TX_US), - handling_wakeup); + bfd_set_effective_required_min_rx (bm, bs, now, + bs->config_required_min_rx_clocks, + handling_wakeup); + bfd_set_effective_desired_min_tx (bm, bs, now, + clib_max + (bs->config_desired_min_tx_clocks, + bm->default_desired_min_tx_clocks), + handling_wakeup); break; case BFD_STATE_down: - bfd_set_desired_min_tx (bm, bs, now, - clib_max (bs->config_desired_min_tx_us, - BFD_DEFAULT_DESIRED_MIN_TX_US), - handling_wakeup); + bfd_set_effective_required_min_rx (bm, bs, now, + bs->config_required_min_rx_clocks, + handling_wakeup); + bfd_set_effective_desired_min_tx (bm, bs, now, + clib_max + (bs->config_desired_min_tx_clocks, + bm->default_desired_min_tx_clocks), + handling_wakeup); break; case BFD_STATE_init: - bfd_set_desired_min_tx (bm, bs, now, - clib_max (bs->config_desired_min_tx_us, - BFD_DEFAULT_DESIRED_MIN_TX_US), - handling_wakeup); + bfd_set_effective_desired_min_tx (bm, bs, now, + clib_max + (bs->config_desired_min_tx_clocks, + bm->default_desired_min_tx_clocks), + handling_wakeup); break; case BFD_STATE_up: - bfd_set_desired_min_tx (bm, bs, now, bs->config_desired_min_tx_us, - handling_wakeup); + if (POLL_NOT_NEEDED == bs->poll_state) + { + bfd_set_effective_required_min_rx (bm, bs, now, + bs->config_required_min_rx_clocks, + handling_wakeup); + } + bfd_set_effective_desired_min_tx (bm, bs, now, + bs->config_desired_min_tx_clocks, + handling_wakeup); break; } } +static void +bfd_on_config_change (vlib_main_t * vm, vlib_node_runtime_t * rt, + bfd_main_t * bm, bfd_session_t * bs, u64 now) +{ + if (bs->remote_demand) + { + /* TODO - initiate poll sequence here */ + } + else + { + /* asynchronous - poll is part of periodic - nothing to do here */ + } + bfd_recalc_detection_time (bm, bs); + bfd_set_timer (bm, bs, now, 0); +} + static void bfd_add_transport_layer (vlib_main_t * vm, vlib_buffer_t * b, bfd_session_t * bs) @@ -557,11 +585,10 @@ bfd_init_control_frame (vlib_buffer_t * b, bfd_session_t * bs) pkt->head.length = clib_host_to_net_u32 (bfd_length); pkt->my_disc = bs->local_discr; pkt->your_disc = bs->remote_discr; - pkt->des_min_tx = clib_host_to_net_u32 (bs->desired_min_tx_us); - pkt->req_min_rx = clib_host_to_net_u32 (bs->required_min_rx_us); - pkt->req_min_echo_rx = clib_host_to_net_u32 (bs->required_min_echo_rx_us); + pkt->des_min_tx = clib_host_to_net_u32 (bs->config_desired_min_tx_usec); + pkt->req_min_rx = clib_host_to_net_u32 (bs->config_required_min_rx_usec); + pkt->req_min_echo_rx = clib_host_to_net_u32 (1); b->current_length = bfd_length; - bfd_add_auth_section (b, bs); } static void @@ -569,7 +596,7 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm, bfd_session_t * bs, u64 now, int handling_wakeup) { - if (!bs->remote_min_rx_us) + if (!bs->remote_min_rx_usec) { BFD_DBG ("bfd.RemoteMinRxInterval is zero, not sending periodic control " @@ -593,6 +620,14 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, return; } bfd_init_control_frame (b, bs); + if (POLL_NOT_NEEDED != bs->poll_state) + { + /* here we are either beginning a new poll sequence or retrying .. */ + bfd_pkt_set_poll (vlib_buffer_get_current (b)); + bs->poll_state = POLL_IN_PROGRESS; + BFD_DBG ("Setting poll bit in packet, bs_idx=%u", bs->bs_idx); + } + bfd_add_auth_section (b, bs); bfd_add_transport_layer (vm, b, bs); bs->last_tx_clocks = now; bfd_calc_next_tx (bm, bs, now); @@ -613,8 +648,14 @@ bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b, BFD_DBG ("Send final control frame for bs_idx=%lu", bs->bs_idx); bfd_init_control_frame (b, bs); bfd_pkt_set_final (vlib_buffer_get_current (b)); + bfd_add_auth_section (b, bs); bfd_add_transport_layer (vm, b, bs); bs->last_tx_clocks = clib_cpu_time_now (); + /* + * RFC allows to include changes in final frame, so if there were any + * pending, we already did that, thus we can clear any pending poll needs + */ + bs->poll_state = POLL_NOT_NEEDED; } static void @@ -629,6 +670,14 @@ bfd_check_rx_timeout (bfd_main_t * bm, bfd_session_t * bs, u64 now, BFD_DBG ("Rx timeout, session goes down"); bfd_set_diag (bs, BFD_DIAG_CODE_det_time_exp); bfd_set_state (bm, bs, BFD_STATE_down, handling_wakeup); + /* + * If the remote system does not receive any + * BFD Control packets for a Detection Time, it SHOULD reset + * bfd.RemoteMinRxInterval to its initial value of 1 (per section 6.8.1, + * since it is no longer required to maintain previous session state) + * and then can transmit at its own rate. + */ + bfd_set_remote_required_min_rx (bm, bs, now, 1, handling_wakeup); } } @@ -722,6 +771,19 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) (u32) * event_data); } break; + case BFD_EVENT_CONFIG_CHANGED: + if (!pool_is_free_index (bm->sessions, *event_data)) + { + bfd_session_t *bs = + pool_elt_at_index (bm->sessions, *event_data); + bfd_on_config_change (vm, rt, bm, bs, now); + } + else + { + BFD_DBG ("Ignoring event for non-existent session index %u", + (u32) * event_data); + } + break; default: clib_warning ("BUG: event type 0x%wx", event_type); break; @@ -810,6 +872,8 @@ bfd_main_init (vlib_main_t * vm) memset (&bm->wheel, 0, sizeof (bm->wheel)); bm->cpu_cps = vm->clib_time.clocks_per_second; BFD_DBG ("cps is %.2f", bm->cpu_cps); + bm->default_desired_min_tx_clocks = + bfd_usec_to_clocks (bm, BFD_DEFAULT_DESIRED_MIN_TX_US); const u64 now = clib_cpu_time_now (); timing_wheel_init (&bm->wheel, now, bm->cpu_cps); bm->wheel_inaccuracy = 2 << bm->wheel.log2_clocks_per_bin; @@ -1283,7 +1347,8 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) while (0); } } - bs->remote_desired_min_tx_us = clib_net_to_host_u32 (pkt->des_min_tx); + bs->remote_desired_min_tx_clocks = + bfd_usec_to_clocks (bm, clib_net_to_host_u32 (pkt->des_min_tx)); bs->remote_detect_mult = pkt->head.detect_mult; bfd_set_remote_required_min_rx (bm, bs, now, clib_net_to_host_u32 (pkt->req_min_rx), 0); @@ -1297,6 +1362,18 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) */ /* FIXME 6.8.2 */ /* FIXME 6.8.4 */ + if (bs->poll_state == POLL_IN_PROGRESS && bfd_pkt_get_final (pkt)) + { + bs->poll_state = POLL_NOT_NEEDED; + BFD_DBG ("Poll sequence terminated, bs_idx=%u", bs->bs_idx); + if (BFD_STATE_up == bs->local_state) + { + bfd_set_effective_required_min_rx (bm, bs, now, + bs->config_required_min_rx_clocks, + 0); + bfd_recalc_detection_time (bm, bs); + } + } if (BFD_STATE_admin_down == bs->local_state) return; if (BFD_STATE_admin_down == bs->remote_state) @@ -1333,6 +1410,20 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) } } +static const char * +bfd_poll_state_string (bfd_poll_state_e state) +{ + switch (state) + { +#define F(x) \ + case POLL_##x: \ + return "POLL_" #x; + foreach_bfd_poll_state (F) +#undef F + } + return "UNKNOWN"; +} + u8 * format_bfd_session (u8 * s, va_list * args) { @@ -1353,17 +1444,18 @@ format_bfd_session (u8 * s, va_list * args) "remote-seq-num=%u, " "is-delayed=%s, " "curr-key=%U, " - "next-key=%U}", + "next-key=%U}," + "poll-state: %s", bs->bs_idx, bfd_state_string (bs->local_state), bfd_state_string (bs->remote_state), bs->local_discr, bs->remote_discr, bfd_diag_code_string (bs->local_diag), - bs->desired_min_tx_us, bs->required_min_rx_us, - bs->required_min_echo_rx_us, bs->remote_min_rx_us, - (bs->local_demand ? "yes" : "no"), + bs->config_desired_min_tx_usec, bs->config_required_min_rx_usec, + 1, bs->remote_min_rx_usec, (bs->local_demand ? "yes" : "no"), (bs->remote_demand ? "yes" : "no"), bs->local_detect_mult, bs->auth.local_seq_number, bs->auth.remote_seq_number, (bs->auth.is_delayed ? "yes" : "no"), format_bfd_auth_key, - bs->auth.curr_key, format_bfd_auth_key, bs->auth.next_key); + bs->auth.curr_key, format_bfd_auth_key, bs->auth.next_key, + bfd_poll_state_string (bs->poll_state)); return s; } @@ -1462,6 +1554,62 @@ bfd_auth_deactivate (bfd_session_t * bs, u8 is_delayed) #endif } +vnet_api_error_t +bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, u8 detect_mult) +{ + if (bs->local_detect_mult != detect_mult || + bs->config_desired_min_tx_usec != desired_min_tx_usec || + bs->config_required_min_rx_usec != required_min_rx_usec) + { + BFD_DBG ("Changing session params: %U", format_bfd_session, bs); + switch (bs->poll_state) + { + case POLL_NOT_NEEDED: + if (BFD_STATE_up == bs->local_state || + BFD_STATE_init == bs->local_state) + { + /* poll sequence is not needed for detect multiplier change */ + if (bs->config_desired_min_tx_usec != desired_min_tx_usec || + bs->config_required_min_rx_usec != required_min_rx_usec) + { + bs->poll_state = POLL_NEEDED; + BFD_DBG ("Set poll state=%s, bs_idx=%u", + bfd_poll_state_string (bs->poll_state), + bs->bs_idx); + } + } + break; + case POLL_NEEDED: + /* nothing to do */ + break; + case POLL_IN_PROGRESS: + /* can't change params now ... */ + BFD_ERR ("Poll in progress, cannot change params for session with " + "bs_idx=%u", bs->bs_idx); + return VNET_API_ERROR_BFD_EAGAIN; + } + + bs->local_detect_mult = detect_mult; + bs->config_desired_min_tx_usec = desired_min_tx_usec; + bs->config_desired_min_tx_clocks = + bfd_usec_to_clocks (bm, desired_min_tx_usec); + bs->config_required_min_rx_usec = required_min_rx_usec; + bs->config_required_min_rx_clocks = + bfd_usec_to_clocks (bm, required_min_rx_usec); + BFD_DBG ("Changed session params: %U", format_bfd_session, bs); + + vlib_process_signal_event (bm->vlib_main, bm->bfd_process_node_index, + BFD_EVENT_CONFIG_CHANGED, bs->bs_idx); + } + else + { + BFD_DBG ("Ignore parameter change - no change, bs_idx=%u", bs->bs_idx); + } + return 0; +} + bfd_main_t bfd_main; /* diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index b66b79e7..361ff0b7 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -64,6 +64,18 @@ typedef struct bfd_auth_type_e auth_type; } bfd_auth_key_t; +#define foreach_bfd_poll_state(F)\ + F(NOT_NEEDED)\ +F(NEEDED)\ +F(IN_PROGRESS) + +typedef enum +{ +#define F(x) POLL_##x, + foreach_bfd_poll_state (F) +#undef F +} bfd_poll_state_e; + typedef struct { /* index in bfd_main.sessions pool */ @@ -85,28 +97,34 @@ typedef struct u32 remote_discr; /* configured desired min tx interval (microseconds) */ - u32 config_desired_min_tx_us; + u32 config_desired_min_tx_usec; + + /* configured desired min tx interval (clocks) */ + u64 config_desired_min_tx_clocks; - /* desired min tx interval (microseconds) */ - u32 desired_min_tx_us; + /* effective desired min tx interval (clocks) */ + u64 effective_desired_min_tx_clocks; - /* desired min tx interval (clocks) */ - u64 desired_min_tx_clocks; + /* configured required min rx interval (microseconds) */ + u32 config_required_min_rx_usec; - /* required min rx interval (microseconds) */ - u32 required_min_rx_us; + /* configured required min rx interval (clocks) */ + u64 config_required_min_rx_clocks; - /* required min echo rx interval (microseconds) */ - u32 required_min_echo_rx_us; + /* effective required min rx interval (clocks) */ + u64 effective_required_min_rx_clocks; /* remote min rx interval (microseconds) */ - u32 remote_min_rx_us; + u64 remote_min_rx_usec; /* remote min rx interval (clocks) */ u64 remote_min_rx_clocks; - /* remote desired min tx interval (microseconds) */ - u32 remote_desired_min_tx_us; + /* remote desired min tx interval (clocks) */ + u64 remote_desired_min_tx_clocks; + + /* configured detect multiplier */ + u8 local_detect_mult; /* 1 if in demand mode, 0 otherwise */ u8 local_demand; @@ -114,9 +132,6 @@ typedef struct /* 1 if remote system sets demand mode, 0 otherwise */ u8 remote_demand; - /* local detect multiplier */ - u8 local_detect_mult; - /* remote detect multiplier */ u8 remote_detect_mult; @@ -138,6 +153,9 @@ typedef struct /* detection time */ u64 detection_time_clocks; + /* state info regarding poll sequence */ + bfd_poll_state_e poll_state; + /* authentication information */ struct { @@ -175,6 +193,7 @@ typedef struct /* transport type for this session */ bfd_transport_t transport; + /* union of transport-specific data */ union { bfd_udp_session_t udp; @@ -205,6 +224,9 @@ typedef struct /* cpu clocks per second */ f64 cpu_cps; + /* default desired min tx in clocks */ + u64 default_desired_min_tx_clocks; + /* for generating random numbers */ u32 random_seed; @@ -243,6 +265,7 @@ enum { BFD_EVENT_RESCHEDULE = 1, BFD_EVENT_NEW_SESSION, + BFD_EVENT_CONFIG_CHANGED, } bfd_process_event_e; u8 *bfd_input_format_trace (u8 * s, va_list * args); @@ -265,6 +288,10 @@ unsigned bfd_auth_type_supported (bfd_auth_type_e auth_type); vnet_api_error_t bfd_auth_activate (bfd_session_t * bs, u32 conf_key_id, u8 bfd_key_id, u8 is_delayed); vnet_api_error_t bfd_auth_deactivate (bfd_session_t * bs, u8 is_delayed); +vnet_api_error_t +bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, u8 detect_mult); #define USEC_PER_MS 1000LL #define USEC_PER_SECOND (1000 * USEC_PER_MS) diff --git a/src/vnet/bfd/bfd_protocol.c b/src/vnet/bfd/bfd_protocol.c index 180fc6df..92b226bd 100644 --- a/src/vnet/bfd/bfd_protocol.c +++ b/src/vnet/bfd/bfd_protocol.c @@ -150,6 +150,32 @@ bfd_auth_type_str (bfd_auth_type_e auth_type) return "UNKNOWN"; } +const char * +bfd_diag_code_string (bfd_diag_code_e diag) +{ +#define F(n, t, s) \ + case BFD_DIAG_CODE_NAME (t): \ + return s; + switch (diag) + { + foreach_bfd_diag_code (F)} + return "UNKNOWN"; +#undef F +} + +const char * +bfd_state_string (bfd_state_e state) +{ +#define F(n, t, s) \ + case BFD_STATE_NAME (t): \ + return s; + switch (state) + { + foreach_bfd_state (F)} + return "UNKNOWN"; +#undef F +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 443f4253..e21b887c 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -141,8 +141,8 @@ bfd_udp_key_init (bfd_udp_key_t * key, u32 sw_if_index, static vnet_api_error_t bfd_udp_add_session_internal (bfd_udp_main_t * bum, u32 sw_if_index, - u32 desired_min_tx_us, u32 required_min_rx_us, - u8 detect_mult, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, u8 detect_mult, const ip46_address_t * local_addr, const ip46_address_t * peer_addr, bfd_session_t ** bs_out) @@ -189,12 +189,9 @@ bfd_udp_add_session_internal (bfd_udp_main_t * bum, u32 sw_if_index, "returns %d", format_ip46_address, &key->peer_addr, IP46_TYPE_ANY, key->sw_if_index, bus->adj_index); } - bs->config_desired_min_tx_us = desired_min_tx_us; - bs->required_min_rx_us = required_min_rx_us; - bs->required_min_echo_rx_us = required_min_rx_us; /* FIXME */ - bs->local_detect_mult = detect_mult; *bs_out = bs; - return 0; + return bfd_session_set_params (bum->bfd_main, bs, desired_min_tx_usec, + required_min_rx_usec, detect_mult); } static vnet_api_error_t @@ -298,8 +295,8 @@ bfd_udp_find_session_by_api_input (u32 sw_if_index, } static vnet_api_error_t -bfd_api_verify_common (u32 sw_if_index, u32 desired_min_tx_us, - u32 required_min_rx_us, u8 detect_mult, +bfd_api_verify_common (u32 sw_if_index, u32 desired_min_tx_usec, + u32 required_min_rx_usec, u8 detect_mult, const ip46_address_t * local_addr, const ip46_address_t * peer_addr) { @@ -314,9 +311,9 @@ bfd_api_verify_common (u32 sw_if_index, u32 desired_min_tx_us, clib_warning ("detect_mult < 1"); return VNET_API_ERROR_INVALID_ARGUMENT; } - if (desired_min_tx_us < 1) + if (desired_min_tx_usec < 1) { - clib_warning ("desired_min_tx_us < 1"); + clib_warning ("desired_min_tx_usec < 1"); return VNET_API_ERROR_INVALID_ARGUMENT; } return 0; @@ -334,22 +331,23 @@ bfd_udp_del_session_internal (bfd_session_t * bs) vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, u32 desired_min_tx_us, - u32 required_min_rx_us, u8 detect_mult, - u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id) + const ip46_address_t * peer_addr, + u32 desired_min_tx_usec, u32 required_min_rx_usec, + u8 detect_mult, u8 is_authenticated, u32 conf_key_id, + u8 bfd_key_id) { - vnet_api_error_t rv = bfd_api_verify_common (sw_if_index, desired_min_tx_us, - required_min_rx_us, - detect_mult, - local_addr, peer_addr); + vnet_api_error_t rv = + bfd_api_verify_common (sw_if_index, desired_min_tx_usec, + required_min_rx_usec, detect_mult, + local_addr, peer_addr); bfd_session_t *bs = NULL; if (!rv) { rv = bfd_udp_add_session_internal (&bfd_udp_main, sw_if_index, - desired_min_tx_us, required_min_rx_us, - detect_mult, local_addr, peer_addr, - &bs); + desired_min_tx_usec, + required_min_rx_usec, detect_mult, + local_addr, peer_addr, &bs); } if (!rv && is_authenticated) { @@ -373,6 +371,27 @@ bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, return rv; } +vnet_api_error_t +bfd_udp_mod_session (u32 sw_if_index, + const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, u8 detect_mult) +{ + bfd_session_t *bs = NULL; + vnet_api_error_t rv = + bfd_udp_find_session_by_api_input (sw_if_index, local_addr, peer_addr, + &bs); + if (rv) + { + return rv; + } + + return bfd_session_set_params (bfd_udp_main.bfd_main, bs, + desired_min_tx_usec, required_min_rx_usec, + detect_mult); +} + vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, const ip46_address_t * local_addr, diff --git a/test/bfd.py b/test/bfd.py index 475a1707..dc6f9674 100644 --- a/test/bfd.py +++ b/test/bfd.py @@ -334,6 +334,25 @@ class VppBFDUDPSession(VppObject): is_ipv6=is_ipv6, is_delayed=is_delayed) + def modify_parameters(self, + detect_mult=None, + desired_min_tx=None, + required_min_rx=None): + if detect_mult: + self._detect_mult = detect_mult + if desired_min_tx: + self._desired_min_tx = desired_min_tx + if required_min_rx: + self._required_min_rx = required_min_rx + is_ipv6 = 1 if AF_INET6 == self.af else 0 + self.test.vapi.bfd_udp_mod(self._interface.sw_if_index, + self.desired_min_tx, + self.required_min_rx, + self.detect_mult, + self.local_addr_n, + self.peer_addr_n, + is_ipv6=is_ipv6) + def add_vpp_config(self): is_ipv6 = 1 if AF_INET6 == self.af else 0 bfd_key_id = self._bfd_key_id if self._sha1_key else None diff --git a/test/framework.py b/test/framework.py index 889a3046..4185dbfb 100644 --- a/test/framework.py +++ b/test/framework.py @@ -545,6 +545,10 @@ class VppTestCase(unittest.TestCase): name, real_value, expected_min, expected_max) self.assertTrue(expected_min <= real_value <= expected_max, msg) + def sleep(self, timeout): + self.logger.debug("Sleeping for %ss" % timeout) + time.sleep(timeout) + class VppTestResult(unittest.TestResult): """ diff --git a/test/test_bfd.py b/test/test_bfd.py index 5f861477..aedd56e4 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +from __future__ import division import unittest import hashlib import binascii @@ -81,6 +82,33 @@ class BFDAPITestCase(VppTestCase): self.logger.debug("Session state is %s" % str(session.state)) session.remove_vpp_config() + def test_mod_bfd(self): + """ modify BFD session parameters """ + session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, + desired_min_tx=50000, + required_min_rx=10000, + detect_mult=1) + session.add_vpp_config() + e = session.get_bfd_udp_session_dump_entry() + self.assert_equal(session.desired_min_tx, + e.desired_min_tx, + "desired min transmit interval") + self.assert_equal(session.required_min_rx, + e.required_min_rx, + "required min receive interval") + self.assert_equal(session.detect_mult, e.detect_mult, "detect mult") + session.modify_parameters(desired_min_tx=session.desired_min_tx * 2, + required_min_rx=session.required_min_rx * 2, + detect_mult=session.detect_mult * 2) + e = session.get_bfd_udp_session_dump_entry() + self.assert_equal(session.desired_min_tx, + e.desired_min_tx, + "desired min transmit interval") + self.assert_equal(session.required_min_rx, + e.required_min_rx, + "required min receive interval") + self.assert_equal(session.detect_mult, e.detect_mult, "detect mult") + def test_add_sha1_keys(self): """ add SHA1 keys """ key_count = 10 @@ -194,6 +222,7 @@ class BFDAPITestCase(VppTestCase): session.deactivate_auth() def test_change_key(self): + """ change SHA1 key """ key1 = self.factory.create_random_key(self) key2 = self.factory.create_random_key(self) while key2.conf_key_id == key1.conf_key_id: @@ -273,10 +302,11 @@ class BFDTestSession(object): packet[BFD].auth_key_hash = hashlib.sha1(hash_material).digest() return packet - def send_packet(self): - p = self.create_packet() - self.test.logger.debug(ppp("Sending packet:", p)) - self.test.pg0.add_stream([p]) + def send_packet(self, packet=None): + if packet is None: + packet = self.create_packet() + self.test.logger.debug(ppp("Sending packet:", packet)) + self.test.pg0.add_stream([packet]) self.test.pg_start() def verify_sha1_auth(self, packet): @@ -521,11 +551,25 @@ class BFD4TestCase(VppTestCase, BFDCommonCode): e = self.vapi.wait_for_event(1, "bfd_udp_session_details") self.verify_event(e, expected_state=BFDState.up) - try: - p = self.pg0.wait_for_packet(timeout=1) - except: - return - raise Exception(ppp("Received unexpected BFD packet:", p)) + cap = 2 * self.vpp_session.desired_min_tx *\ + self.vpp_session.detect_mult + now = time.time() + count = 0 + # busy wait here, trying to collect a packet or event, vpp is not + # allowed to send packets and the session will timeout first - so the + # Up->Down event must arrive before any packets do + while time.time() < now + cap / us_in_sec: + try: + p, ttp = self.wait_for_bfd_packet(timeout=0) + self.logger.error(ppp("Received unexpected packet:", p)) + count += 1 + except: + pass + events = self.vapi.collect_events() + if len(events) > 0: + self.verify_event(events[0], BFDState.down) + break + self.assert_equal(count, 0, "number of packets received") def test_conn_down(self): """ verify session goes down after inactivity """ @@ -542,20 +586,27 @@ class BFD4TestCase(VppTestCase, BFDCommonCode): def test_large_required_min_rx(self): """ large remote RequiredMinRxInterval """ self.bfd_session_up() + self.wait_for_bfd_packet() interval = 3000000 self.test_session.update(required_min_rx_interval=interval) self.test_session.send_packet() now = time.time() count = 0 + # busy wait here, trying to collect a packet or event, vpp is not + # allowed to send packets and the session will timeout first - so the + # Up->Down event must arrive before any packets do while time.time() < now + interval / us_in_sec: try: - p = self.wait_for_bfd_packet() - if count > 1: - self.logger.error(ppp("Received unexpected packet:", p)) + p, ttp = self.wait_for_bfd_packet(timeout=0) + self.logger.error(ppp("Received unexpected packet:", p)) count += 1 except: pass - self.assert_in_range(count, 0, 1, "number of packets received") + events = self.vapi.collect_events() + if len(events) > 0: + self.verify_event(events[0], BFDState.down) + break + self.assert_equal(count, 0, "number of packets received") def test_immediate_remote_min_rx_reduce(self): """ immediately honor remote min rx reduction """ @@ -583,6 +634,93 @@ class BFD4TestCase(VppTestCase, BFDCommonCode): 1.10 * interval / us_in_sec, "time between BFD packets") + def test_modify_req_min_rx_double(self): + """ modify session - double required min rx """ + self.bfd_session_up() + self.wait_for_bfd_packet() + self.test_session.update(desired_min_tx_interval=10000, + required_min_rx_interval=10000) + self.test_session.send_packet() + # first double required min rx + self.vpp_session.modify_parameters( + required_min_rx=2 * self.vpp_session.required_min_rx) + p, ttp = self.wait_for_bfd_packet() + # poll bit needs to be set + self.assertIn("P", p.sprintf("%BFD.flags%"), + "Poll bit not set in BFD packet") + # finish poll sequence with final packet + final = self.test_session.create_packet() + final[BFD].flags = "F" + self.test_session.send_packet(final) + # now we can wait 0.9*3*req-min-rx and the session should still be up + self.sleep(0.9 * self.vpp_session.detect_mult * + self.vpp_session.required_min_rx / us_in_sec) + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + + def test_modify_req_min_rx_halve(self): + """ modify session - halve required min rx """ + self.vpp_session.modify_parameters( + required_min_rx=2 * self.vpp_session.required_min_rx) + self.bfd_session_up() + self.wait_for_bfd_packet() + self.test_session.update(desired_min_tx_interval=10000, + required_min_rx_interval=10000) + self.test_session.send_packet() + p, ttp = self.wait_for_bfd_packet() + # halve required min rx + old_required_min_rx = self.vpp_session.required_min_rx + self.vpp_session.modify_parameters( + required_min_rx=0.5 * self.vpp_session.required_min_rx) + # now we wait 0.8*3*old-req-min-rx and the session should still be up + self.sleep(0.8 * self.vpp_session.detect_mult * + old_required_min_rx / us_in_sec) + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + p, ttp = self.wait_for_bfd_packet() + # poll bit needs to be set + self.assertIn("P", p.sprintf("%BFD.flags%"), + "Poll bit not set in BFD packet") + # finish poll sequence with final packet + final = self.test_session.create_packet() + final[BFD].flags = "F" + self.test_session.send_packet(final) + # now the session should time out under new conditions + before = time.time() + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + after = time.time() + detection_time = self.vpp_session.detect_mult *\ + self.vpp_session.required_min_rx / us_in_sec + self.assert_in_range(after - before, + 0.9 * detection_time, + 1.1 * detection_time, + "time before bfd session goes down") + self.verify_event(e, expected_state=BFDState.down) + + def test_modify_des_min_tx(self): + """ modify desired min tx interval """ + pass + + def test_modify_detect_mult(self): + """ modify detect multiplier """ + self.bfd_session_up() + self.vpp_session.modify_parameters(detect_mult=1) + p, ttp = self.wait_for_bfd_packet() + self.assert_equal(self.vpp_session.detect_mult, + p[BFD].detect_mult, + "detect mult") + # poll bit must not be set + self.assertNotIn("P", p.sprintf("%BFD.flags%"), + "Poll bit not set in BFD packet") + self.vpp_session.modify_parameters(detect_mult=10) + p, ttp = self.wait_for_bfd_packet() + self.assert_equal(self.vpp_session.detect_mult, + p[BFD].detect_mult, + "detect mult") + # poll bit must not be set + self.assertNotIn("P", p.sprintf("%BFD.flags%"), + "Poll bit not set in BFD packet") + class BFD6TestCase(VppTestCase, BFDCommonCode): """Bidirectional Forwarding Detection (BFD) (IPv6) """ diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 2cd02cc7..39efa9e4 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -1023,6 +1023,19 @@ class VppPapiProvider(object): 'conf_key_id': conf_key_id, }) + def bfd_udp_mod(self, sw_if_index, desired_min_tx, required_min_rx, + detect_mult, local_addr, peer_addr, is_ipv6=0): + return self.api(self.papi.bfd_udp_mod, + { + 'sw_if_index': sw_if_index, + 'desired_min_tx': desired_min_tx, + 'required_min_rx': required_min_rx, + 'local_addr': local_addr, + 'peer_addr': peer_addr, + 'is_ipv6': is_ipv6, + 'detect_mult': detect_mult, + }) + def bfd_udp_auth_activate(self, sw_if_index, local_addr, peer_addr, is_ipv6=0, bfd_key_id=None, conf_key_id=None, is_delayed=False): -- cgit 1.2.3-korg From 6f96649d1b6ade7e4730a3b3d0f479c33fd4d764 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Wed, 8 Feb 2017 07:42:08 +0100 Subject: BFD: minor fixes Change-Id: I1c93f96a752eb2ffd1117a656552131cde1fa489 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_main.c | 2 +- src/vnet/bfd/bfd_udp.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 798d0631..0c5f1986 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -526,8 +526,8 @@ bfd_add_sha1_auth_section (vlib_buffer_t * b, bfd_session_t * bs) SHA1 ((unsigned char *) pkt, sizeof (*pkt), hash); BFD_DBG ("hashing: %U", format_hex_bytes, pkt, sizeof (*pkt)); clib_memcpy (auth->hash, hash, sizeof (hash)); -#endif } +#endif static void bfd_add_auth_section (vlib_buffer_t * b, bfd_session_t * bs) diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index e21b887c..e1ff8a9d 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -637,8 +637,7 @@ bfd_udp4_verify_transport (const ip4_header_t * ip4, expected_ttl); return BFD_UDP_ERROR_BAD; } - if (clib_net_to_host_u16 (udp->src_port) < 49152 || - clib_net_to_host_u16 (udp->src_port) > 65535) + if (clib_net_to_host_u16 (udp->src_port) < 49152) { BFD_ERR ("Invalid UDP src port %u, out of range <49152,65535>", udp->src_port); @@ -805,8 +804,7 @@ bfd_udp6_verify_transport (const ip6_header_t * ip6, ip6->hop_limit, expected_hop_limit); return BFD_UDP_ERROR_BAD; } - if (clib_net_to_host_u16 (udp->src_port) < 49152 || - clib_net_to_host_u16 (udp->src_port) > 65535) + if (clib_net_to_host_u16 (udp->src_port) < 49152) { BFD_ERR ("Invalid UDP src port %u, out of range <49152,65535>", udp->src_port); -- cgit 1.2.3-korg From 2bce0332d368901ea66c7e582119719757e37e42 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Thu, 9 Feb 2017 06:03:46 +0100 Subject: BFD: set per session UDP source port per RFC Change-Id: Id294dbbd6499ae8221cc8143e1027adc08866ae6 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_main.c | 9 ++------- src/vnet/bfd/bfd_main.h | 2 +- src/vnet/bfd/bfd_udp.c | 40 ++++++++++++++++++++++++++++++++++++---- src/vnet/bfd/bfd_udp.h | 8 ++++---- 4 files changed, 43 insertions(+), 16 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 0c5f1986..7e06962a 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -35,11 +35,6 @@ bfd_usec_to_clocks (const bfd_main_t * bm, u64 us) return bm->cpu_cps * ((f64) us / USEC_PER_SECOND); } -// static u64 bfd_clocks_to_usec (const bfd_main_t *bm, u64 clocks) -//{ -// return (clocks / bm->cpu_cps) * USEC_PER_SECOND; -//} - static vlib_node_registration_t bfd_process_node; /* set to 0 here, real values filled at startup */ @@ -464,11 +459,11 @@ bfd_add_transport_layer (vlib_main_t * vm, vlib_buffer_t * b, { case BFD_TRANSPORT_UDP4: BFD_DBG ("Transport bfd via udp4, bs_idx=%u", bs->bs_idx); - bfd_add_udp4_transport (vm, b, &bs->udp); + bfd_add_udp4_transport (vm, b, bs); break; case BFD_TRANSPORT_UDP6: BFD_DBG ("Transport bfd via udp6, bs_idx=%u", bs->bs_idx); - bfd_add_udp6_transport (vm, b, &bs->udp); + bfd_add_udp6_transport (vm, b, bs); break; } } diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index 361ff0b7..14a54d6f 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -76,7 +76,7 @@ typedef enum #undef F } bfd_poll_state_e; -typedef struct +typedef struct bfd_session_s { /* index in bfd_main.sessions pool */ u32 bs_idx; diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index e1ff8a9d..75b35974 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -1,3 +1,17 @@ +/* + * Copyright (c) 2011-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include #include #include @@ -31,10 +45,25 @@ static vlib_node_registration_t bfd_udp6_input_node; bfd_udp_main_t bfd_udp_main; +static u16 +bfd_udp_bs_idx_to_sport (u32 bs_idx) +{ + /* The source port MUST be in the range 49152 through 65535. The same UDP + * source port number MUST be used for all BFD Control packets associated + * with a particular session. The source port number SHOULD be unique among + * all BFD sessions on the system. If more than 16384 BFD sessions are + * simultaneously active, UDP source port numbers MAY be reused on + * multiple sessions, but the number of distinct uses of the same UDP + * source port number SHOULD be minimized. + */ + return 49152 + bs_idx % (65535 - 49152 + 1); +} + void bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, - bfd_udp_session_t * bus) + const bfd_session_t * bs) { + const bfd_udp_session_t *bus = &bs->udp; const bfd_udp_key_t *key = &bus->key; b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; @@ -55,7 +84,8 @@ bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, headers->ip4.src_address.as_u32 = key->local_addr.ip4.as_u32; headers->ip4.dst_address.as_u32 = key->peer_addr.ip4.as_u32; - headers->udp.src_port = clib_host_to_net_u16 (50000); /* FIXME */ + headers->udp.src_port = + clib_host_to_net_u16 (bfd_udp_bs_idx_to_sport (bs->bs_idx)); headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); /* fix ip length, checksum and udp length */ @@ -70,8 +100,9 @@ bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, void bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, - bfd_udp_session_t * bus) + const bfd_session_t * bs) { + const bfd_udp_session_t *bus = &bs->udp; const bfd_udp_key_t *key = &bus->key; b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; @@ -95,7 +126,8 @@ bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, clib_memcpy (&headers->ip6.dst_address, &key->peer_addr.ip6, sizeof (headers->ip6.dst_address)); - headers->udp.src_port = clib_host_to_net_u16 (50000); /* FIXME */ + headers->udp.src_port = + clib_host_to_net_u16 (bfd_udp_bs_idx_to_sport (bs->bs_idx)); headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); /* fix ip payload length and udp length */ diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h index 2cd89ca2..26e89851 100644 --- a/src/vnet/bfd/bfd_udp.h +++ b/src/vnet/bfd/bfd_udp.h @@ -23,8 +23,6 @@ #include #include -#define BFD_UDP_KEY_BODY - /* *INDENT-OFF* */ typedef CLIB_PACKED (struct { @@ -42,10 +40,12 @@ typedef struct adj_index_t adj_index; } bfd_udp_session_t; +struct bfd_session_s; + void bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, - bfd_udp_session_t * bs); + const struct bfd_session_s *bs); void bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, - bfd_udp_session_t * bs); + const struct bfd_session_s *bs); #endif /* __included_bfd_udp_h__ */ -- cgit 1.2.3-korg From d3ba515d99551ee87096931b34a5b8d4222b385d Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Tue, 14 Feb 2017 03:09:17 +0100 Subject: BFD: respect remote demand mode Change-Id: I5063d31f5305c848043afb32fcacff6e61aed79f Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_main.c | 55 +++- test/bfd.py | 46 ++- test/test_bfd.py | 837 +++++++++++++++++++++++++++++------------------- 3 files changed, 590 insertions(+), 348 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 7e06962a..8bb8de33 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -372,8 +372,10 @@ bfd_input_format_trace (u8 * s, va_list * args) if (t->len >= sizeof (bfd_pkt_t) && pkt->head.length >= sizeof (bfd_pkt_t)) { - s = format (s, " my discriminator: %u\n", pkt->my_disc); - s = format (s, " your discriminator: %u\n", pkt->your_disc); + s = format (s, " my discriminator: %u\n", + clib_net_to_host_u32 (pkt->my_disc)); + s = format (s, " your discriminator: %u\n", + clib_net_to_host_u32 (pkt->your_disc)); s = format (s, " desired min tx interval: %u\n", clib_net_to_host_u32 (pkt->des_min_tx)); s = format (s, " required min rx interval: %u\n", @@ -381,6 +383,33 @@ bfd_input_format_trace (u8 * s, va_list * args) s = format (s, " required min echo rx interval: %u", clib_net_to_host_u32 (pkt->req_min_echo_rx)); } + if (t->len >= sizeof (bfd_pkt_with_common_auth_t) && + pkt->head.length >= sizeof (bfd_pkt_with_common_auth_t) && + bfd_pkt_get_auth_present (pkt)) + { + const bfd_pkt_with_common_auth_t *with_auth = (void *) pkt; + const bfd_auth_common_t *common = &with_auth->common_auth; + s = format (s, "\n auth len: %u\n", common->len); + s = format (s, " auth type: %u:%s\n", common->type, + bfd_auth_type_str (common->type)); + if (t->len >= sizeof (bfd_pkt_with_sha1_auth_t) && + pkt->head.length >= sizeof (bfd_pkt_with_sha1_auth_t) && + (BFD_AUTH_TYPE_keyed_sha1 == common->type || + BFD_AUTH_TYPE_meticulous_keyed_sha1 == common->type)) + { + const bfd_pkt_with_sha1_auth_t *with_sha1 = (void *) pkt; + const bfd_auth_sha1_t *sha1 = &with_sha1->sha1_auth; + s = format (s, " seq num: %u\n", + clib_net_to_host_u32 (sha1->seq_num)); + s = format (s, " key id: %u\n", sha1->key_id); + s = format (s, " hash: %U", format_hex_bytes, sha1->hash, + sizeof (sha1->hash)); + } + } + else + { + s = format (s, "\n"); + } } return s; @@ -598,17 +627,25 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, "frame"); return; } - /* FIXME - A system MUST NOT periodically transmit BFD Control packets if Demand - mode is active on the remote system (bfd.RemoteDemandMode is 1, - bfd.SessionState is Up, and bfd.RemoteSessionState is Up) and a Poll - Sequence is not being transmitted. - */ + if (POLL_NOT_NEEDED == bs->poll_state && bs->remote_demand && + BFD_STATE_up == bs->local_state && BFD_STATE_up == bs->remote_state) + { + /* + * A system MUST NOT periodically transmit BFD Control packets if Demand + * mode is active on the remote system (bfd.RemoteDemandMode is 1, + * bfd.SessionState is Up, and bfd.RemoteSessionState is Up) and a Poll + * Sequence is not being transmitted. + */ + BFD_DBG ("bfd.RemoteDemand is non-zero, not sending periodic control " + "frame"); + return; + } /* sometimes the wheel expires an event a bit sooner than requested, account for that here */ if (now + bm->wheel_inaccuracy >= bs->tx_timeout_clocks) { - BFD_DBG ("Send periodic control frame for bs_idx=%lu", bs->bs_idx); + BFD_DBG ("Send periodic control frame for bs_idx=%lu: %U", bs->bs_idx, + format_bfd_session, bs); vlib_buffer_t *b = bfd_create_frame_to_next_node (vm, bs); if (!b) { diff --git a/test/bfd.py b/test/bfd.py index dc6f9674..09a7681c 100644 --- a/test/bfd.py +++ b/test/bfd.py @@ -1,10 +1,13 @@ +""" BFD protocol implementation """ + from random import randint -from socket import AF_INET, AF_INET6 -from scapy.all import * -from scapy.packet import * -from scapy.fields import * -from framework import * -from vpp_object import * +from socket import AF_INET, AF_INET6, inet_pton +from scapy.all import bind_layers +from scapy.layers.inet import UDP +from scapy.packet import Packet +from scapy.fields import BitField, BitEnumField, XByteField, FlagsField,\ + ConditionalField, StrField +from vpp_object import VppObject from util import NumericConstant @@ -77,28 +80,34 @@ class BFDAuthType(NumericConstant): def bfd_is_auth_used(pkt): + """ is packet authenticated? """ return "A" in pkt.sprintf("%BFD.flags%") def bfd_is_simple_pwd_used(pkt): + """ is simple password authentication used? """ return bfd_is_auth_used(pkt) and pkt.auth_type == BFDAuthType.simple_pwd def bfd_is_sha1_used(pkt): + """ is sha1 authentication used? """ return bfd_is_auth_used(pkt) and pkt.auth_type in \ (BFDAuthType.keyed_sha1, BFDAuthType.meticulous_keyed_sha1) def bfd_is_md5_used(pkt): + """ is md5 authentication used? """ return bfd_is_auth_used(pkt) and pkt.auth_type in \ (BFDAuthType.keyed_md5, BFDAuthType.meticulous_keyed_md5) def bfd_is_md5_or_sha1_used(pkt): + """ is md5 or sha1 used? """ return bfd_is_md5_used(pkt) or bfd_is_sha1_used(pkt) class BFD(Packet): + """ BFD protocol layer for scapy """ udp_dport = 3784 #: BFD destination port per RFC 5881 udp_sport_min = 49152 #: BFD source port min value per RFC 5881 @@ -164,10 +173,12 @@ class VppBFDAuthKey(VppObject): @property def key(self): + """ key data """ return self._key @property def conf_key_id(self): + """ configuration key ID """ return self._conf_key_id def add_vpp_config(self): @@ -206,8 +217,12 @@ class VppBFDUDPSession(VppObject): self._interface = interface self._af = af self._local_addr = local_addr + if local_addr is not None: + self._local_addr_n = inet_pton(af, local_addr) + else: + self._local_addr_n = None self._peer_addr = peer_addr - self._peer_addr_n = socket.inet_pton(af, peer_addr) + self._peer_addr_n = inet_pton(af, peer_addr) self._desired_min_tx = desired_min_tx self._required_min_rx = required_min_rx self._detect_mult = detect_mult @@ -238,7 +253,7 @@ class VppBFDUDPSession(VppObject): elif self.af == AF_INET6: return self._interface.local_ip6 else: - raise Exception("Unexpected af %s' % af" % self.af) + raise Exception("Unexpected af '%s'" % self.af) return self._local_addr @property @@ -250,7 +265,7 @@ class VppBFDUDPSession(VppObject): elif self.af == AF_INET6: return self._interface.local_ip6n else: - raise Exception("Unexpected af %s' % af" % self.af) + raise Exception("Unexpected af '%s'" % self.af) return self._local_addr_n @property @@ -264,6 +279,7 @@ class VppBFDUDPSession(VppObject): return self._peer_addr_n def get_bfd_udp_session_dump_entry(self): + """ get the namedtuple entry from bfd udp session dump """ result = self.test.vapi.bfd_udp_session_dump() for s in result: self.test.logger.debug("session entry: %s" % str(s)) @@ -285,31 +301,36 @@ class VppBFDUDPSession(VppObject): """ BFD session state """ session = self.get_bfd_udp_session_dump_entry() if session is None: - raise Exception("Could not find BFD session in VPP response: %s" % - repr(result)) + raise Exception("Could not find BFD session in VPP response") return session.state @property def desired_min_tx(self): + """ desired minimum tx interval """ return self._desired_min_tx @property def required_min_rx(self): + """ required minimum rx interval """ return self._required_min_rx @property def detect_mult(self): + """ detect multiplier """ return self._detect_mult @property def sha1_key(self): + """ sha1 key """ return self._sha1_key @property def bfd_key_id(self): + """ bfd key id in use """ return self._bfd_key_id def activate_auth(self, key, bfd_key_id=None, delayed=False): + """ activate authentication for this session """ self._bfd_key_id = bfd_key_id if bfd_key_id else randint(0, 255) self._sha1_key = key is_ipv6 = 1 if AF_INET6 == self.af else 0 @@ -324,6 +345,7 @@ class VppBFDUDPSession(VppObject): is_delayed=is_delayed) def deactivate_auth(self, delayed=False): + """ deactivate authentication """ self._bfd_key_id = None self._sha1_key = None is_delayed = 1 if delayed else 0 @@ -338,6 +360,7 @@ class VppBFDUDPSession(VppObject): detect_mult=None, desired_min_tx=None, required_min_rx=None): + """ modify session parameters """ if detect_mult: self._detect_mult = detect_mult if desired_min_tx: @@ -389,6 +412,7 @@ class VppBFDUDPSession(VppObject): return self.object_id() def admin_up(self): + """ set bfd session admin-up """ is_ipv6 = 1 if AF_INET6 == self._af else 0 self.test.vapi.bfd_udp_session_set_flags(1, self._interface.sw_if_index, diff --git a/test/test_bfd.py b/test/test_bfd.py index b4f082a5..0ba0b46d 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -1,16 +1,23 @@ #!/usr/bin/env python +""" BFD tests """ from __future__ import division import unittest import hashlib import binascii import time -from random import randint -from bfd import * -from framework import * +from random import randint, shuffle +from socket import AF_INET, AF_INET6 +from scapy.layers.l2 import Ether +from scapy.layers.inet import UDP, IP +from scapy.layers.inet6 import IPv6 +from bfd import VppBFDAuthKey, BFD, BFDAuthType, VppBFDUDPSession, \ + BFDDiagCode, BFDState +from framework import VppTestCase, VppTestRunner +from vpp_pg_interface import CaptureTimeoutError from util import ppp -us_in_sec = 1000000 +USEC_IN_SEC = 1000000 class AuthKeyFactory(object): @@ -20,11 +27,12 @@ class AuthKeyFactory(object): self._conf_key_ids = {} def create_random_key(self, test, auth_type=BFDAuthType.keyed_sha1): + """ create a random key with unique conf key id """ conf_key_id = randint(0, 0xFFFFFFFF) while conf_key_id in self._conf_key_ids: conf_key_id = randint(0, 0xFFFFFFFF) self._conf_key_ids[conf_key_id] = 1 - key = str(bytearray([randint(0, 255) for j in range(randint(1, 20))])) + key = str(bytearray([randint(0, 255) for _ in range(randint(1, 20))])) return VppBFDAuthKey(test=test, auth_type=auth_type, conf_key_id=conf_key_id, key=key) @@ -32,6 +40,9 @@ class AuthKeyFactory(object): class BFDAPITestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD) - API""" + pg0 = None + pg1 = None + @classmethod def setUpClass(cls): super(BFDAPITestCase, cls).setUpClass() @@ -55,10 +66,10 @@ class BFDAPITestCase(VppTestCase): """ create a BFD session """ session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) session.add_vpp_config() - self.logger.debug("Session state is %s" % str(session.state)) + self.logger.debug("Session state is %s", session.state) session.remove_vpp_config() session.add_vpp_config() - self.logger.debug("Session state is %s" % str(session.state)) + self.logger.debug("Session state is %s", session.state) session.remove_vpp_config() def test_double_add(self): @@ -76,10 +87,10 @@ class BFDAPITestCase(VppTestCase): session = VppBFDUDPSession( self, self.pg0, self.pg0.remote_ip6, af=AF_INET6) session.add_vpp_config() - self.logger.debug("Session state is %s" % str(session.state)) + self.logger.debug("Session state is %s", session.state) session.remove_vpp_config() session.add_vpp_config() - self.logger.debug("Session state is %s" % str(session.state)) + self.logger.debug("Session state is %s", session.state) session.remove_vpp_config() def test_mod_bfd(self): @@ -89,25 +100,25 @@ class BFDAPITestCase(VppTestCase): required_min_rx=10000, detect_mult=1) session.add_vpp_config() - e = session.get_bfd_udp_session_dump_entry() + s = session.get_bfd_udp_session_dump_entry() self.assert_equal(session.desired_min_tx, - e.desired_min_tx, + s.desired_min_tx, "desired min transmit interval") self.assert_equal(session.required_min_rx, - e.required_min_rx, + s.required_min_rx, "required min receive interval") - self.assert_equal(session.detect_mult, e.detect_mult, "detect mult") + self.assert_equal(session.detect_mult, s.detect_mult, "detect mult") session.modify_parameters(desired_min_tx=session.desired_min_tx * 2, required_min_rx=session.required_min_rx * 2, detect_mult=session.detect_mult * 2) - e = session.get_bfd_udp_session_dump_entry() + s = session.get_bfd_udp_session_dump_entry() self.assert_equal(session.desired_min_tx, - e.desired_min_tx, + s.desired_min_tx, "desired min transmit interval") self.assert_equal(session.required_min_rx, - e.required_min_rx, + s.required_min_rx, "required min receive interval") - self.assert_equal(session.detect_mult, e.detect_mult, "detect mult") + self.assert_equal(session.detect_mult, s.detect_mult, "detect mult") def test_add_sha1_keys(self): """ add SHA1 keys """ @@ -122,7 +133,7 @@ class BFDAPITestCase(VppTestCase): self.assertTrue(key.query_vpp_config()) # remove randomly indexes = range(key_count) - random.shuffle(indexes) + shuffle(indexes) removed = [] for i in indexes: key = keys[i] @@ -154,10 +165,10 @@ class BFDAPITestCase(VppTestCase): session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, sha1_key=key) session.add_vpp_config() - self.logger.debug("Session state is %s" % str(session.state)) + self.logger.debug("Session state is %s", session.state) session.remove_vpp_config() session.add_vpp_config() - self.logger.debug("Session state is %s" % str(session.state)) + self.logger.debug("Session state is %s", session.state) session.remove_vpp_config() def test_double_add_sha1(self): @@ -170,7 +181,7 @@ class BFDAPITestCase(VppTestCase): with self.assertRaises(Exception): session.add_vpp_config() - def test_add_authenticated_with_nonexistent_key(self): + def test_add_auth_nonexistent_key(self): """ create BFD session using non-existent SHA1 (negative case) """ session = VppBFDUDPSession( self, self.pg0, self.pg0.remote_ip4, @@ -239,32 +250,94 @@ class BFDTestSession(object): """ BFD session as seen from test framework side """ def __init__(self, test, interface, af, detect_mult=3, sha1_key=None, - bfd_key_id=None, our_seq_number=0xFFFFFFFF - 4): + bfd_key_id=None, our_seq_number=None): self.test = test self.af = af self.sha1_key = sha1_key self.bfd_key_id = bfd_key_id self.interface = interface - self.udp_sport = 50000 - self.our_seq_number = our_seq_number + self.udp_sport = randint(49152, 65535) + if our_seq_number is None: + self.our_seq_number = randint(0, 40000000) + else: + self.our_seq_number = our_seq_number self.vpp_seq_number = None - self.bfd_values = { - 'my_discriminator': 0, - 'desired_min_tx_interval': 100000, - 'detect_mult': detect_mult, - 'diag': BFDDiagCode.no_diagnostic, - } + self.my_discriminator = 0 + self.desired_min_tx = 100000 + self.required_min_rx = 100000 + self.detect_mult = detect_mult + self.diag = BFDDiagCode.no_diagnostic + self.your_discriminator = None + self.state = BFDState.down + self.auth_type = BFDAuthType.no_auth def inc_seq_num(self): + """ increment sequence number, wrapping if needed """ if self.our_seq_number == 0xFFFFFFFF: self.our_seq_number = 0 else: self.our_seq_number += 1 - def update(self, **kwargs): - self.bfd_values.update(kwargs) + def update(self, my_discriminator=None, your_discriminator=None, + desired_min_tx=None, required_min_rx=None, detect_mult=None, + diag=None, state=None, auth_type=None): + """ update BFD parameters associated with session """ + if my_discriminator: + self.my_discriminator = my_discriminator + if your_discriminator: + self.your_discriminator = your_discriminator + if required_min_rx: + self.required_min_rx = required_min_rx + if desired_min_tx: + self.desired_min_tx = desired_min_tx + if detect_mult: + self.detect_mult = detect_mult + if diag: + self.diag = diag + if state: + self.state = state + if auth_type: + self.auth_type = auth_type + + def fill_packet_fields(self, packet): + """ set packet fields with known values in packet """ + bfd = packet[BFD] + if self.my_discriminator: + self.test.logger.debug("BFD: setting packet.my_discriminator=%s", + self.my_discriminator) + bfd.my_discriminator = self.my_discriminator + if self.your_discriminator: + self.test.logger.debug("BFD: setting packet.your_discriminator=%s", + self.your_discriminator) + bfd.your_discriminator = self.your_discriminator + if self.required_min_rx: + self.test.logger.debug( + "BFD: setting packet.required_min_rx_interval=%s", + self.required_min_rx) + bfd.required_min_rx_interval = self.required_min_rx + if self.desired_min_tx: + self.test.logger.debug( + "BFD: setting packet.desired_min_tx_interval=%s", + self.desired_min_tx) + bfd.desired_min_tx_interval = self.desired_min_tx + if self.detect_mult: + self.test.logger.debug( + "BFD: setting packet.detect_mult=%s", self.detect_mult) + bfd.detect_mult = self.detect_mult + if self.diag: + self.test.logger.debug("BFD: setting packet.diag=%s", self.diag) + bfd.diag = self.diag + if self.state: + self.test.logger.debug("BFD: setting packet.state=%s", self.state) + bfd.state = self.state + if self.auth_type: + # this is used by a negative test-case + self.test.logger.debug("BFD: setting packet.auth_type=%s", + self.auth_type) + bfd.auth_type = self.auth_type def create_packet(self): + """ create a BFD packet, reflecting the current state of session """ if self.sha1_key: bfd = BFD(flags="A") bfd.auth_type = self.sha1_key.auth_type @@ -291,9 +364,7 @@ class BFDTestSession(object): UDP(sport=self.udp_sport, dport=BFD.udp_dport) / bfd) self.test.logger.debug("BFD: Creating packet") - for name, value in self.bfd_values.iteritems(): - self.test.logger.debug("BFD: setting packet.%s=%s", name, value) - packet[BFD].setfieldval(name, value) + self.fill_packet_fields(packet) if self.sha1_key: hash_material = str(packet[BFD])[:32] + self.sha1_key.key + \ "\0" * (20 - len(self.sha1_key.key)) @@ -302,11 +373,14 @@ class BFDTestSession(object): packet[BFD].auth_key_hash = hashlib.sha1(hash_material).digest() return packet - def send_packet(self, packet=None): + def send_packet(self, packet=None, interface=None): + """ send packet on interface, creating the packet if needed """ if packet is None: packet = self.create_packet() + if interface is None: + interface = self.test.pg0 self.test.logger.debug(ppp("Sending packet:", packet)) - self.test.pg0.add_stream([packet]) + interface.add_stream(packet) self.test.pg_start() def verify_sha1_auth(self, packet): @@ -359,119 +433,146 @@ class BFDTestSession(object): bfd = packet[BFD] self.test.assert_equal(bfd.version, 1, "BFD version") self.test.assert_equal(bfd.your_discriminator, - self.bfd_values['my_discriminator'], + self.my_discriminator, "BFD - your discriminator") if self.sha1_key: self.verify_sha1_auth(packet) -class BFDCommonCode: - """Common code used by both IPv4 and IPv6 Test Cases""" - - def tearDown(self): - self.vapi.collect_events() # clear the event queue - if not self.vpp_dead: - self.vapi.want_bfd_events(enable_disable=0) - - def bfd_session_up(self): - """ Bring BFD session up """ - self.logger.info("BFD: Waiting for slow hello") - p, timeout = self.wait_for_bfd_packet(2) - self.logger.info("BFD: Sending Init") - self.test_session.update(my_discriminator=randint(0, 40000000), - your_discriminator=p[BFD].my_discriminator, - state=BFDState.init, - required_min_rx_interval=100000) - self.test_session.send_packet() - self.logger.info("BFD: Waiting for event") - e = self.vapi.wait_for_event(1, "bfd_udp_session_details") - self.verify_event(e, expected_state=BFDState.up) - self.logger.info("BFD: Session is Up") - self.test_session.update(state=BFDState.up) - self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) - - def bfd_session_down(self): - """ Bring BFD session down """ - self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) - self.test_session.update(state=BFDState.down) - self.test_session.send_packet() - self.logger.info("BFD: Waiting for event") - e = self.vapi.wait_for_event(1, "bfd_udp_session_details") - self.verify_event(e, expected_state=BFDState.down) - self.logger.info("BFD: Session is Down") - self.assert_equal(self.vpp_session.state, BFDState.down, BFDState) - - def verify_ip(self, packet): - """ Verify correctness of IP layer. """ - if self.vpp_session.af == AF_INET6: - ip = packet[IPv6] - local_ip = self.pg0.local_ip6 - remote_ip = self.pg0.remote_ip6 - self.assert_equal(ip.hlim, 255, "IPv6 hop limit") - else: - ip = packet[IP] - local_ip = self.pg0.local_ip4 - remote_ip = self.pg0.remote_ip4 - self.assert_equal(ip.ttl, 255, "IPv4 TTL") - self.assert_equal(ip.src, local_ip, "IP source address") - self.assert_equal(ip.dst, remote_ip, "IP destination address") - - def verify_udp(self, packet): - """ Verify correctness of UDP layer. """ - udp = packet[UDP] - self.assert_equal(udp.dport, BFD.udp_dport, "UDP destination port") - self.assert_in_range(udp.sport, BFD.udp_sport_min, BFD.udp_sport_max, - "UDP source port") - - def verify_event(self, event, expected_state): - """ Verify correctness of event values. """ - e = event - self.logger.debug("BFD: Event: %s" % repr(e)) - self.assert_equal(e.sw_if_index, - self.vpp_session.interface.sw_if_index, - "BFD interface index") - is_ipv6 = 0 - if self.vpp_session.af == AF_INET6: - is_ipv6 = 1 - self.assert_equal(e.is_ipv6, is_ipv6, "is_ipv6") - if self.vpp_session.af == AF_INET: - self.assert_equal(e.local_addr[:4], self.vpp_session.local_addr_n, - "Local IPv4 address") - self.assert_equal(e.peer_addr[:4], self.vpp_session.peer_addr_n, - "Peer IPv4 address") +def bfd_session_up(test): + """ Bring BFD session up """ + test.logger.info("BFD: Waiting for slow hello") + p = wait_for_bfd_packet(test, 2) + old_offset = None + if hasattr(test, 'vpp_clock_offset'): + old_offset = test.vpp_clock_offset + test.vpp_clock_offset = time.time() - p.time + test.logger.debug("BFD: Calculated vpp clock offset: %s", + test.vpp_clock_offset) + if old_offset: + test.assertAlmostEqual( + old_offset, test.vpp_clock_offset, delta=0.1, + msg="vpp clock offset not stable (new: %s, old: %s)" % + (test.vpp_clock_offset, old_offset)) + test.logger.info("BFD: Sending Init") + test.test_session.update(my_discriminator=randint(0, 40000000), + your_discriminator=p[BFD].my_discriminator, + state=BFDState.init) + test.test_session.send_packet() + test.logger.info("BFD: Waiting for event") + e = test.vapi.wait_for_event(1, "bfd_udp_session_details") + verify_event(test, e, expected_state=BFDState.up) + test.logger.info("BFD: Session is Up") + test.test_session.update(state=BFDState.up) + test.test_session.send_packet() + test.assert_equal(test.vpp_session.state, BFDState.up, BFDState) + + +def bfd_session_down(test): + """ Bring BFD session down """ + test.assert_equal(test.vpp_session.state, BFDState.up, BFDState) + test.test_session.update(state=BFDState.down) + test.test_session.send_packet() + test.logger.info("BFD: Waiting for event") + e = test.vapi.wait_for_event(1, "bfd_udp_session_details") + verify_event(test, e, expected_state=BFDState.down) + test.logger.info("BFD: Session is Down") + test.assert_equal(test.vpp_session.state, BFDState.down, BFDState) + + +def verify_ip(test, packet): + """ Verify correctness of IP layer. """ + if test.vpp_session.af == AF_INET6: + ip = packet[IPv6] + local_ip = test.pg0.local_ip6 + remote_ip = test.pg0.remote_ip6 + test.assert_equal(ip.hlim, 255, "IPv6 hop limit") + else: + ip = packet[IP] + local_ip = test.pg0.local_ip4 + remote_ip = test.pg0.remote_ip4 + test.assert_equal(ip.ttl, 255, "IPv4 TTL") + test.assert_equal(ip.src, local_ip, "IP source address") + test.assert_equal(ip.dst, remote_ip, "IP destination address") + + +def verify_udp(test, packet): + """ Verify correctness of UDP layer. """ + udp = packet[UDP] + test.assert_equal(udp.dport, BFD.udp_dport, "UDP destination port") + test.assert_in_range(udp.sport, BFD.udp_sport_min, BFD.udp_sport_max, + "UDP source port") + + +def verify_event(test, event, expected_state): + """ Verify correctness of event values. """ + e = event + test.logger.debug("BFD: Event: %s" % repr(e)) + test.assert_equal(e.sw_if_index, + test.vpp_session.interface.sw_if_index, + "BFD interface index") + is_ipv6 = 0 + if test.vpp_session.af == AF_INET6: + is_ipv6 = 1 + test.assert_equal(e.is_ipv6, is_ipv6, "is_ipv6") + if test.vpp_session.af == AF_INET: + test.assert_equal(e.local_addr[:4], test.vpp_session.local_addr_n, + "Local IPv4 address") + test.assert_equal(e.peer_addr[:4], test.vpp_session.peer_addr_n, + "Peer IPv4 address") + else: + test.assert_equal(e.local_addr, test.vpp_session.local_addr_n, + "Local IPv6 address") + test.assert_equal(e.peer_addr, test.vpp_session.peer_addr_n, + "Peer IPv6 address") + test.assert_equal(e.state, expected_state, BFDState) + + +def wait_for_bfd_packet(test, timeout=1, pcap_time_min=None): + """ wait for BFD packet and verify its correctness + + :param timeout: how long to wait + :param pcap_time_min: ignore packets with pcap timestamp lower than this + + :returns: tuple (packet, time spent waiting for packet) + """ + test.logger.info("BFD: Waiting for BFD packet") + deadline = time.time() + timeout + counter = 0 + while True: + counter += 1 + # sanity check + test.assert_in_range(counter, 0, 100, "number of packets ignored") + time_left = deadline - time.time() + if time_left < 0: + raise CaptureTimeoutError("Packet did not arrive within timeout") + p = test.pg0.wait_for_packet(timeout=time_left) + test.logger.debug(ppp("BFD: Got packet:", p)) + if pcap_time_min is not None and p.time < pcap_time_min: + test.logger.debug(ppp("BFD: ignoring packet (pcap time %s < " + "pcap time min %s):" % + (p.time, pcap_time_min), p)) else: - self.assert_equal(e.local_addr, self.vpp_session.local_addr_n, - "Local IPv6 address") - self.assert_equal(e.peer_addr, self.vpp_session.peer_addr_n, - "Peer IPv6 address") - self.assert_equal(e.state, expected_state, BFDState) - - def wait_for_bfd_packet(self, timeout=1): - """ wait for BFD packet - - :param timeout: how long to wait max - - :returns: tuple (packet, time spent waiting for packet) - """ - self.logger.info("BFD: Waiting for BFD packet") - before = time.time() - p = self.pg0.wait_for_packet(timeout=timeout) - after = time.time() - self.logger.debug(ppp("BFD: Got packet:", p)) - bfd = p[BFD] - if bfd is None: - raise Exception(ppp("Unexpected or invalid BFD packet:", p)) - if bfd.payload: - raise Exception(ppp("Unexpected payload in BFD packet:", bfd)) - self.verify_ip(p) - self.verify_udp(p) - self.test_session.verify_bfd(p) - return p, after - before - - -class BFD4TestCase(VppTestCase, BFDCommonCode): + break + bfd = p[BFD] + if bfd is None: + raise Exception(ppp("Unexpected or invalid BFD packet:", p)) + if bfd.payload: + raise Exception(ppp("Unexpected payload in BFD packet:", bfd)) + verify_ip(test, p) + verify_udp(test, p) + test.test_session.verify_bfd(p) + return p + + +class BFD4TestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD)""" + pg0 = None + vpp_clock_offset = None + vpp_session = None + test_session = None + @classmethod def setUpClass(cls): super(BFD4TestCase, cls).setUpClass() @@ -490,7 +591,7 @@ class BFD4TestCase(VppTestCase, BFDCommonCode): super(BFD4TestCase, self).setUp() self.factory = AuthKeyFactory() self.vapi.want_bfd_events() - self.pg_enable_capture([self.pg0]) + self.pg0.enable_capture() try: self.vpp_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) @@ -502,182 +603,191 @@ class BFD4TestCase(VppTestCase, BFDCommonCode): raise def tearDown(self): - BFDCommonCode.tearDown(self) - VppTestCase.tearDown(self) + if not self.vpp_dead: + self.vapi.want_bfd_events(enable_disable=0) + self.vapi.collect_events() # clear the event queue + super(BFD4TestCase, self).tearDown() def test_session_up(self): """ bring BFD session up """ - self.bfd_session_up() + bfd_session_up(self) def test_session_down(self): """ bring BFD session down """ - self.bfd_session_up() - self.bfd_session_down() + bfd_session_up(self) + bfd_session_down(self) def test_hold_up(self): """ hold BFD session up """ - self.bfd_session_up() - for i in range(5): - self.wait_for_bfd_packet() + bfd_session_up(self) + for dummy in range(self.test_session.detect_mult * 2): + wait_for_bfd_packet(self) self.test_session.send_packet() self.assert_equal(len(self.vapi.collect_events()), 0, "number of bfd events") def test_slow_timer(self): """ verify slow periodic control frames while session down """ - expected_packets = 3 - self.logger.info("BFD: Waiting for %d BFD packets" % expected_packets) - self.wait_for_bfd_packet(2) - for i in range(expected_packets): - before = time.time() - self.wait_for_bfd_packet(2) - after = time.time() + packet_count = 3 + self.logger.info("BFD: Waiting for %d BFD packets", packet_count) + prev_packet = wait_for_bfd_packet(self, 2) + for dummy in range(packet_count): + next_packet = wait_for_bfd_packet(self, 2) + time_diff = next_packet.time - prev_packet.time # spec says the range should be <0.75, 1>, allow extra 0.05 margin # to work around timing issues self.assert_in_range( - after - before, 0.70, 1.05, "time between slow packets") - before = after + time_diff, 0.70, 1.05, "time between slow packets") + prev_packet = next_packet def test_zero_remote_min_rx(self): - """ no packets when zero BFD RemoteMinRxInterval """ - self.pg_enable_capture([self.pg0]) - p, timeout = self.wait_for_bfd_packet(2) - self.test_session.update(my_discriminator=randint(0, 40000000), - your_discriminator=p[BFD].my_discriminator, - state=BFDState.init, - required_min_rx_interval=0) + """ no packets when zero remote required min rx interval """ + bfd_session_up(self) + self.test_session.update(required_min_rx=0) self.test_session.send_packet() - e = self.vapi.wait_for_event(1, "bfd_udp_session_details") - self.verify_event(e, expected_state=BFDState.up) - cap = 2 * self.vpp_session.desired_min_tx *\ - self.vpp_session.detect_mult - now = time.time() + self.test_session.detect_mult + time_mark = time.time() count = 0 # busy wait here, trying to collect a packet or event, vpp is not # allowed to send packets and the session will timeout first - so the # Up->Down event must arrive before any packets do - while time.time() < now + cap / us_in_sec: + while time.time() < time_mark + cap / USEC_IN_SEC: try: - p, ttp = self.wait_for_bfd_packet(timeout=0) + p = wait_for_bfd_packet( + self, timeout=0, + pcap_time_min=time_mark - self.vpp_clock_offset) self.logger.error(ppp("Received unexpected packet:", p)) count += 1 - except: + except CaptureTimeoutError: pass events = self.vapi.collect_events() if len(events) > 0: - self.verify_event(events[0], BFDState.down) + verify_event(self, events[0], BFDState.down) break self.assert_equal(count, 0, "number of packets received") def test_conn_down(self): """ verify session goes down after inactivity """ - self.bfd_session_up() - self.wait_for_bfd_packet() - self.assert_equal(len(self.vapi.collect_events()), 0, - "number of bfd events") - self.wait_for_bfd_packet() - self.assert_equal(len(self.vapi.collect_events()), 0, - "number of bfd events") + bfd_session_up(self) + for dummy in range(self.test_session.detect_mult): + wait_for_bfd_packet(self) + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") e = self.vapi.wait_for_event(1, "bfd_udp_session_details") - self.verify_event(e, expected_state=BFDState.down) + verify_event(self, e, expected_state=BFDState.down) def test_large_required_min_rx(self): - """ large remote RequiredMinRxInterval """ - self.bfd_session_up() - self.wait_for_bfd_packet() + """ large remote required min rx interval """ + bfd_session_up(self) + p = wait_for_bfd_packet(self) interval = 3000000 - self.test_session.update(required_min_rx_interval=interval) + self.test_session.update(required_min_rx=interval) self.test_session.send_packet() - now = time.time() + time_mark = time.time() count = 0 # busy wait here, trying to collect a packet or event, vpp is not # allowed to send packets and the session will timeout first - so the # Up->Down event must arrive before any packets do - while time.time() < now + interval / us_in_sec: + while time.time() < time_mark + interval / USEC_IN_SEC: try: - p, ttp = self.wait_for_bfd_packet(timeout=0) + p = wait_for_bfd_packet(self, timeout=0) + # if vpp managed to send a packet before we did the session + # session update, then that's fine, ignore it + if p.time < time_mark - self.vpp_clock_offset: + continue self.logger.error(ppp("Received unexpected packet:", p)) count += 1 - except: + except CaptureTimeoutError: pass events = self.vapi.collect_events() if len(events) > 0: - self.verify_event(events[0], BFDState.down) + verify_event(self, events[0], BFDState.down) break self.assert_equal(count, 0, "number of packets received") - def test_immediate_remote_min_rx_reduce(self): - """ immediately honor remote min rx reduction """ + def test_immediate_remote_min_rx_reduction(self): + """ immediately honor remote required min rx reduction """ self.vpp_session.remove_vpp_config() self.vpp_session = VppBFDUDPSession( self, self.pg0, self.pg0.remote_ip4, desired_min_tx=10000) - self.pg_enable_capture([self.pg0]) + self.pg0.enable_capture() self.vpp_session.add_vpp_config() - self.test_session.update(desired_min_tx_interval=1000000, - required_min_rx_interval=1000000) - self.bfd_session_up() - self.wait_for_bfd_packet() - interval = 100000 - self.test_session.update(required_min_rx_interval=interval) + self.test_session.update(desired_min_tx=1000000, + required_min_rx=1000000) + bfd_session_up(self) + reference_packet = wait_for_bfd_packet(self) + time_mark = time.time() + interval = 300000 + self.test_session.update(required_min_rx=interval) self.test_session.send_packet() - p, ttp = self.wait_for_bfd_packet() - # allow extra 10% to work around timing issues, first packet is special - self.assert_in_range(ttp, 0, 1.10 * interval / us_in_sec, - "time between BFD packets") - p, ttp = self.wait_for_bfd_packet() - self.assert_in_range(ttp, .9 * .75 * interval / us_in_sec, - 1.10 * interval / us_in_sec, - "time between BFD packets") - p, ttp = self.wait_for_bfd_packet() - self.assert_in_range(ttp, .9 * .75 * interval / us_in_sec, - 1.10 * interval / us_in_sec, + extra_time = time.time() - time_mark + p = wait_for_bfd_packet(self) + # first packet is allowed to be late by time we spent doing the update + # calculated in extra_time + self.assert_in_range(p.time - reference_packet.time, + .95 * 0.75 * interval / USEC_IN_SEC, + 1.05 * interval / USEC_IN_SEC + extra_time, "time between BFD packets") + reference_packet = p + for dummy in range(3): + p = wait_for_bfd_packet(self) + diff = p.time - reference_packet.time + self.assert_in_range(diff, .95 * .75 * interval / USEC_IN_SEC, + 1.05 * interval / USEC_IN_SEC, + "time between BFD packets") + reference_packet = p def test_modify_req_min_rx_double(self): """ modify session - double required min rx """ - self.bfd_session_up() - self.wait_for_bfd_packet() - self.test_session.update(desired_min_tx_interval=10000, - required_min_rx_interval=10000) + bfd_session_up(self) + p = wait_for_bfd_packet(self) + self.test_session.update(desired_min_tx=10000, + required_min_rx=10000) self.test_session.send_packet() - # first double required min rx + # double required min rx self.vpp_session.modify_parameters( required_min_rx=2 * self.vpp_session.required_min_rx) - p, ttp = self.wait_for_bfd_packet() + p = wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) # poll bit needs to be set self.assertIn("P", p.sprintf("%BFD.flags%"), "Poll bit not set in BFD packet") # finish poll sequence with final packet final = self.test_session.create_packet() final[BFD].flags = "F" + timeout = self.test_session.detect_mult * \ + max(self.test_session.desired_min_tx, + self.vpp_session.required_min_rx) / USEC_IN_SEC self.test_session.send_packet(final) - # now we can wait 0.9*3*req-min-rx and the session should still be up - self.sleep(0.9 * self.vpp_session.detect_mult * - self.vpp_session.required_min_rx / us_in_sec) - self.assert_equal(len(self.vapi.collect_events()), 0, - "number of bfd events") + time_mark = time.time() + e = self.vapi.wait_for_event(2 * timeout, "bfd_udp_session_details") + verify_event(self, e, expected_state=BFDState.down) + time_to_event = time.time() - time_mark + self.assert_in_range(time_to_event, .9 * timeout, + 1.1 * timeout, "session timeout") def test_modify_req_min_rx_halve(self): """ modify session - halve required min rx """ self.vpp_session.modify_parameters( required_min_rx=2 * self.vpp_session.required_min_rx) - self.bfd_session_up() - self.wait_for_bfd_packet() - self.test_session.update(desired_min_tx_interval=10000, - required_min_rx_interval=10000) + bfd_session_up(self) + p = wait_for_bfd_packet(self) + self.test_session.update(desired_min_tx=10000, + required_min_rx=10000) self.test_session.send_packet() - p, ttp = self.wait_for_bfd_packet() + p = wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) # halve required min rx old_required_min_rx = self.vpp_session.required_min_rx self.vpp_session.modify_parameters( required_min_rx=0.5 * self.vpp_session.required_min_rx) # now we wait 0.8*3*old-req-min-rx and the session should still be up self.sleep(0.8 * self.vpp_session.detect_mult * - old_required_min_rx / us_in_sec) + old_required_min_rx / USEC_IN_SEC) self.assert_equal(len(self.vapi.collect_events()), 0, "number of bfd events") - p, ttp = self.wait_for_bfd_packet() + p = wait_for_bfd_packet(self) # poll bit needs to be set self.assertIn("P", p.sprintf("%BFD.flags%"), "Poll bit not set in BFD packet") @@ -690,12 +800,12 @@ class BFD4TestCase(VppTestCase, BFDCommonCode): e = self.vapi.wait_for_event(1, "bfd_udp_session_details") after = time.time() detection_time = self.vpp_session.detect_mult *\ - self.vpp_session.required_min_rx / us_in_sec + self.vpp_session.required_min_rx / USEC_IN_SEC self.assert_in_range(after - before, 0.9 * detection_time, 1.1 * detection_time, "time before bfd session goes down") - self.verify_event(e, expected_state=BFDState.down) + verify_event(self, e, expected_state=BFDState.down) def test_modify_des_min_tx(self): """ modify desired min tx interval """ @@ -703,9 +813,11 @@ class BFD4TestCase(VppTestCase, BFDCommonCode): def test_modify_detect_mult(self): """ modify detect multiplier """ - self.bfd_session_up() + bfd_session_up(self) + p = wait_for_bfd_packet(self) self.vpp_session.modify_parameters(detect_mult=1) - p, ttp = self.wait_for_bfd_packet() + p = wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) self.assert_equal(self.vpp_session.detect_mult, p[BFD].detect_mult, "detect mult") @@ -713,7 +825,8 @@ class BFD4TestCase(VppTestCase, BFDCommonCode): self.assertNotIn("P", p.sprintf("%BFD.flags%"), "Poll bit not set in BFD packet") self.vpp_session.modify_parameters(detect_mult=10) - p, ttp = self.wait_for_bfd_packet() + p = wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) self.assert_equal(self.vpp_session.detect_mult, p[BFD].detect_mult, "detect mult") @@ -721,10 +834,42 @@ class BFD4TestCase(VppTestCase, BFDCommonCode): self.assertNotIn("P", p.sprintf("%BFD.flags%"), "Poll bit not set in BFD packet") + def test_no_periodic_if_remote_demand(self): + """ no periodic frames outside poll sequence if remote demand set """ + self.test_session.update(detect_mult=10) + bfd_session_up(self) + demand = self.test_session.create_packet() + demand[BFD].flags = "D" + self.test_session.send_packet(demand) + transmit_time = 0.9 \ + * max(self.vpp_session.required_min_rx, + self.test_session.desired_min_tx) \ + / USEC_IN_SEC + count = 0 + for dummy in range(self.test_session.detect_mult): + time.sleep(transmit_time) + self.test_session.send_packet(demand) + try: + p = wait_for_bfd_packet(self, timeout=0) + self.logger.error(ppp("Received unexpected packet:", p)) + count += 1 + except CaptureTimeoutError: + pass + events = self.vapi.collect_events() + for e in events: + self.logger.error("Received unexpected event: %s", e) + self.assert_equal(count, 0, "number of packets received") + self.assert_equal(len(events), 0, "number of events received") + -class BFD6TestCase(VppTestCase, BFDCommonCode): +class BFD6TestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD) (IPv6) """ + pg0 = None + vpp_clock_offset = None + vpp_session = None + test_session = None + @classmethod def setUpClass(cls): super(BFD6TestCase, cls).setUpClass() @@ -743,7 +888,7 @@ class BFD6TestCase(VppTestCase, BFDCommonCode): super(BFD6TestCase, self).setUp() self.factory = AuthKeyFactory() self.vapi.want_bfd_events() - self.pg_enable_capture([self.pg0]) + self.pg0.enable_capture() try: self.vpp_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip6, @@ -757,27 +902,34 @@ class BFD6TestCase(VppTestCase, BFDCommonCode): raise def tearDown(self): - BFDCommonCode.tearDown(self) - VppTestCase.tearDown(self) + if not self.vpp_dead: + self.vapi.want_bfd_events(enable_disable=0) + self.vapi.collect_events() # clear the event queue + super(BFD6TestCase, self).tearDown() def test_session_up(self): """ bring BFD session up """ - self.bfd_session_up() + bfd_session_up(self) def test_hold_up(self): """ hold BFD session up """ - self.bfd_session_up() - for i in range(5): - self.wait_for_bfd_packet() + bfd_session_up(self) + for dummy in range(self.test_session.detect_mult*2): + wait_for_bfd_packet(self) self.test_session.send_packet() self.assert_equal(len(self.vapi.collect_events()), 0, "number of bfd events") self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) -class BFDSHA1TestCase(VppTestCase, BFDCommonCode): +class BFDSHA1TestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD) (SHA1 auth) """ + pg0 = None + vpp_clock_offset = None + vpp_session = None + test_session = None + @classmethod def setUpClass(cls): super(BFDSHA1TestCase, cls).setUpClass() @@ -795,11 +947,13 @@ class BFDSHA1TestCase(VppTestCase, BFDCommonCode): super(BFDSHA1TestCase, self).setUp() self.factory = AuthKeyFactory() self.vapi.want_bfd_events() - self.pg_enable_capture([self.pg0]) + self.pg0.enable_capture() def tearDown(self): - BFDCommonCode.tearDown(self) - VppTestCase.tearDown(self) + if not self.vpp_dead: + self.vapi.want_bfd_events(enable_disable=0) + self.vapi.collect_events() # clear the event queue + super(BFDSHA1TestCase, self).tearDown() def test_session_up(self): """ bring BFD session up """ @@ -813,7 +967,7 @@ class BFDSHA1TestCase(VppTestCase, BFDCommonCode): self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id) - self.bfd_session_up() + bfd_session_up(self) def test_hold_up(self): """ hold BFD session up """ @@ -827,9 +981,9 @@ class BFDSHA1TestCase(VppTestCase, BFDCommonCode): self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id) - self.bfd_session_up() - for i in range(5): - self.wait_for_bfd_packet() + bfd_session_up(self) + for dummy in range(self.test_session.detect_mult*2): + wait_for_bfd_packet(self) self.test_session.send_packet() self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) @@ -842,12 +996,14 @@ class BFDSHA1TestCase(VppTestCase, BFDCommonCode): self.pg0.remote_ip4, sha1_key=key) self.vpp_session.add_vpp_config() self.vpp_session.admin_up() + # specify sequence number so that it wraps self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key, - bfd_key_id=self.vpp_session.bfd_key_id) - self.bfd_session_up() - for i in range(5): - self.wait_for_bfd_packet() + bfd_key_id=self.vpp_session.bfd_key_id, + our_seq_number=0xFFFFFFFF - 4) + bfd_session_up(self) + for dummy in range(30): + wait_for_bfd_packet(self) self.test_session.inc_seq_num() self.test_session.send_packet() self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) @@ -864,24 +1020,22 @@ class BFDSHA1TestCase(VppTestCase, BFDCommonCode): self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id) - self.bfd_session_up() - self.wait_for_bfd_packet() - self.test_session.send_packet() - self.assert_equal(len(self.vapi.collect_events()), 0, - "number of bfd events") - self.wait_for_bfd_packet() - self.test_session.send_packet() - self.assert_equal(len(self.vapi.collect_events()), 0, - "number of bfd events") - self.wait_for_bfd_packet() - self.test_session.send_packet() - self.wait_for_bfd_packet() + bfd_session_up(self) + detection_time = self.vpp_session.detect_mult *\ + self.vpp_session.required_min_rx / USEC_IN_SEC + session_timeout = time.time() + detection_time + while time.time() < session_timeout: + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + wait_for_bfd_packet(self) + self.test_session.send_packet() + wait_for_bfd_packet(self) self.test_session.send_packet() e = self.vapi.collect_events() # session should be down now, because the sequence numbers weren't # updated self.assert_equal(len(e), 1, "number of bfd events") - self.verify_event(e[0], expected_state=BFDState.down) + verify_event(self, e[0], expected_state=BFDState.down) def execute_rogue_session_scenario(self, vpp_bfd_udp_session, legitimate_test_session, @@ -903,14 +1057,22 @@ class BFDSHA1TestCase(VppTestCase, BFDCommonCode): self.vpp_session.admin_up() self.test_session = legitimate_test_session # bring vpp session up - self.bfd_session_up() + bfd_session_up(self) # send packet from rogue session - rogue_test_session.bfd_values = self.test_session.bfd_values.copy() + rogue_test_session.update( + my_discriminator=self.test_session.my_discriminator, + your_discriminator=self.test_session.your_discriminator, + desired_min_tx=self.test_session.desired_min_tx, + required_min_rx=self.test_session.required_min_rx, + detect_mult=self.test_session.detect_mult, + diag=self.test_session.diag, + state=self.test_session.state, + auth_type=self.test_session.auth_type) if rogue_bfd_values: rogue_test_session.update(**rogue_bfd_values) rogue_test_session.update(state=BFDState.down) rogue_test_session.send_packet() - self.wait_for_bfd_packet() + wait_for_bfd_packet(self) self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) def test_mismatch_auth(self): @@ -974,31 +1136,30 @@ class BFDSHA1TestCase(VppTestCase, BFDCommonCode): self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id, our_seq_number=0) - self.bfd_session_up() - # now we need to not respond for 2*detection_time (4 packets) - self.wait_for_bfd_packet() - self.assert_equal(len(self.vapi.collect_events()), 0, - "number of bfd events") - self.wait_for_bfd_packet() - self.assert_equal(len(self.vapi.collect_events()), 0, - "number of bfd events") - e = self.vapi.wait_for_event(1, "bfd_udp_session_details") - self.verify_event(e, expected_state=BFDState.down) + bfd_session_up(self) + # don't send any packets for 2*detection_time + detection_time = self.vpp_session.detect_mult *\ + self.vpp_session.required_min_rx / USEC_IN_SEC + self.sleep(detection_time, "simulating peer restart") + events = self.vapi.collect_events() + self.assert_equal(len(events), 1, "number of bfd events") + verify_event(self, events[0], expected_state=BFDState.down) self.test_session.update(state=BFDState.down) - self.wait_for_bfd_packet() - self.assert_equal(len(self.vapi.collect_events()), 0, - "number of bfd events") - self.wait_for_bfd_packet() - self.assert_equal(len(self.vapi.collect_events()), 0, - "number of bfd events") # reset sequence number self.test_session.our_seq_number = 0 - self.bfd_session_up() + self.test_session.vpp_seq_number = None + # now throw away any pending packets + self.pg0.enable_capture() + bfd_session_up(self) -class BFDAuthOnOffTestCase(VppTestCase, BFDCommonCode): +class BFDAuthOnOffTestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD) (changing auth) """ + pg0 = None + vpp_session = None + test_session = None + @classmethod def setUpClass(cls): super(BFDAuthOnOffTestCase, cls).setUpClass() @@ -1016,10 +1177,13 @@ class BFDAuthOnOffTestCase(VppTestCase, BFDCommonCode): super(BFDAuthOnOffTestCase, self).setUp() self.factory = AuthKeyFactory() self.vapi.want_bfd_events() + self.pg0.enable_capture() def tearDown(self): - BFDCommonCode.tearDown(self) - VppTestCase.tearDown(self) + if not self.vpp_dead: + self.vapi.want_bfd_events(enable_disable=0) + self.vapi.collect_events() # clear the event queue + super(BFDAuthOnOffTestCase, self).tearDown() def test_auth_on_immediate(self): """ turn auth on without disturbing session state (immediate) """ @@ -1030,15 +1194,17 @@ class BFDAuthOnOffTestCase(VppTestCase, BFDCommonCode): self.vpp_session.add_vpp_config() self.vpp_session.admin_up() self.test_session = BFDTestSession(self, self.pg0, AF_INET) - self.bfd_session_up() - for i in range(5): - self.wait_for_bfd_packet() + bfd_session_up(self) + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.vpp_session.activate_auth(key) self.test_session.bfd_key_id = self.vpp_session.bfd_key_id self.test_session.sha1_key = key - for i in range(5): - self.wait_for_bfd_packet() + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) self.assert_equal(len(self.vapi.collect_events()), 0, @@ -1055,15 +1221,20 @@ class BFDAuthOnOffTestCase(VppTestCase, BFDCommonCode): self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id) - self.bfd_session_up() - for i in range(5): - self.wait_for_bfd_packet() + bfd_session_up(self) + # self.vapi.want_bfd_events(enable_disable=0) + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) + self.test_session.inc_seq_num() self.test_session.send_packet() self.vpp_session.deactivate_auth() self.test_session.bfd_key_id = None self.test_session.sha1_key = None - for i in range(5): - self.wait_for_bfd_packet() + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) + self.test_session.inc_seq_num() self.test_session.send_packet() self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) self.assert_equal(len(self.vapi.collect_events()), 0, @@ -1082,15 +1253,17 @@ class BFDAuthOnOffTestCase(VppTestCase, BFDCommonCode): self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key1, bfd_key_id=self.vpp_session.bfd_key_id) - self.bfd_session_up() - for i in range(5): - self.wait_for_bfd_packet() + bfd_session_up(self) + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.vpp_session.activate_auth(key2) self.test_session.bfd_key_id = self.vpp_session.bfd_key_id self.test_session.sha1_key = key2 - for i in range(5): - self.wait_for_bfd_packet() + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) self.assert_equal(len(self.vapi.collect_events()), 0, @@ -1105,19 +1278,21 @@ class BFDAuthOnOffTestCase(VppTestCase, BFDCommonCode): self.vpp_session.add_vpp_config() self.vpp_session.admin_up() self.test_session = BFDTestSession(self, self.pg0, AF_INET) - self.bfd_session_up() - for i in range(5): - self.wait_for_bfd_packet() + bfd_session_up(self) + for dummy in range(self.test_session.detect_mult*2): + wait_for_bfd_packet(self) self.test_session.send_packet() self.vpp_session.activate_auth(key, delayed=True) - for i in range(5): - self.wait_for_bfd_packet() + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.test_session.bfd_key_id = self.vpp_session.bfd_key_id self.test_session.sha1_key = key self.test_session.send_packet() - for i in range(5): - self.wait_for_bfd_packet() + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) self.assert_equal(len(self.vapi.collect_events()), 0, @@ -1134,19 +1309,22 @@ class BFDAuthOnOffTestCase(VppTestCase, BFDCommonCode): self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id) - self.bfd_session_up() - for i in range(5): - self.wait_for_bfd_packet() + bfd_session_up(self) + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.vpp_session.deactivate_auth(delayed=True) - for i in range(5): - self.wait_for_bfd_packet() + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.test_session.bfd_key_id = None self.test_session.sha1_key = None self.test_session.send_packet() - for i in range(5): - self.wait_for_bfd_packet() + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) self.assert_equal(len(self.vapi.collect_events()), 0, @@ -1165,19 +1343,22 @@ class BFDAuthOnOffTestCase(VppTestCase, BFDCommonCode): self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key1, bfd_key_id=self.vpp_session.bfd_key_id) - self.bfd_session_up() - for i in range(5): - self.wait_for_bfd_packet() + bfd_session_up(self) + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.vpp_session.activate_auth(key2, delayed=True) - for i in range(5): - self.wait_for_bfd_packet() + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.test_session.bfd_key_id = self.vpp_session.bfd_key_id self.test_session.sha1_key = key2 self.test_session.send_packet() - for i in range(5): - self.wait_for_bfd_packet() + for dummy in range(self.test_session.detect_mult*2): + p = wait_for_bfd_packet(self) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) self.assert_equal(len(self.vapi.collect_events()), 0, -- cgit 1.2.3-korg From aeeac3bf4429235c952ce54abad7d3729be6a2f5 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Tue, 14 Feb 2017 07:11:52 +0100 Subject: BFD: loop back echo packets Change-Id: I772b63ac25ebfccaff9ab9d8d0b1445e85f21df7 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_main.c | 43 ++++----- src/vnet/bfd/bfd_udp.c | 229 +++++++++++++++++++++++++++++++++++++++++++++--- src/vnet/bfd/bfd_udp.h | 7 ++ src/vnet/ip/udp.h | 6 +- test/bfd.py | 3 +- test/framework.py | 1 + test/test_bfd.py | 122 ++++++++++++++++++++++---- 7 files changed, 353 insertions(+), 58 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 8bb8de33..0959d0e0 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -264,8 +264,7 @@ bfd_set_timer (bfd_main_t * bm, bfd_session_t * bs, u64 now, static void bfd_set_effective_desired_min_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now, - u64 desired_min_tx_clocks, - int handling_wakeup) + u64 desired_min_tx_clocks) { bs->effective_desired_min_tx_clocks = desired_min_tx_clocks; BFD_DBG ("Set effective desired min tx to " BFD_CLK_FMT, @@ -273,20 +272,17 @@ bfd_set_effective_desired_min_tx (bfd_main_t * bm, bfd_recalc_detection_time (bm, bs); bfd_recalc_tx_interval (bm, bs); bfd_calc_next_tx (bm, bs, now); - bfd_set_timer (bm, bs, now, handling_wakeup); } static void bfd_set_effective_required_min_rx (bfd_main_t * bm, bfd_session_t * bs, u64 now, - u64 required_min_rx_clocks, - int handling_wakeup) + u64 required_min_rx_clocks) { bs->effective_required_min_rx_clocks = required_min_rx_clocks; BFD_DBG ("Set effective required min rx to " BFD_CLK_FMT, BFD_CLK_PRN (bs->effective_required_min_rx_clocks)); bfd_recalc_detection_time (bm, bs); - bfd_set_timer (bm, bs, now, handling_wakeup); } static void @@ -424,42 +420,39 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, switch (bs->local_state) { case BFD_STATE_admin_down: - bfd_set_effective_required_min_rx (bm, bs, now, - bs->config_required_min_rx_clocks, - handling_wakeup); bfd_set_effective_desired_min_tx (bm, bs, now, clib_max (bs->config_desired_min_tx_clocks, - bm->default_desired_min_tx_clocks), - handling_wakeup); + bm->default_desired_min_tx_clocks)); + bfd_set_effective_required_min_rx (bm, bs, now, + bs->config_required_min_rx_clocks); + bfd_set_timer (bm, bs, now, handling_wakeup); break; case BFD_STATE_down: - bfd_set_effective_required_min_rx (bm, bs, now, - bs->config_required_min_rx_clocks, - handling_wakeup); bfd_set_effective_desired_min_tx (bm, bs, now, clib_max (bs->config_desired_min_tx_clocks, - bm->default_desired_min_tx_clocks), - handling_wakeup); + bm->default_desired_min_tx_clocks)); + bfd_set_effective_required_min_rx (bm, bs, now, + bs->config_required_min_rx_clocks); + bfd_set_timer (bm, bs, now, handling_wakeup); break; case BFD_STATE_init: bfd_set_effective_desired_min_tx (bm, bs, now, clib_max (bs->config_desired_min_tx_clocks, - bm->default_desired_min_tx_clocks), - handling_wakeup); + bm->default_desired_min_tx_clocks)); + bfd_set_timer (bm, bs, now, handling_wakeup); break; case BFD_STATE_up: + bfd_set_effective_desired_min_tx (bm, bs, now, + bs->config_desired_min_tx_clocks); if (POLL_NOT_NEEDED == bs->poll_state) { bfd_set_effective_required_min_rx (bm, bs, now, - bs->config_required_min_rx_clocks, - handling_wakeup); + bs->config_required_min_rx_clocks); } - bfd_set_effective_desired_min_tx (bm, bs, now, - bs->config_desired_min_tx_clocks, - handling_wakeup); + bfd_set_timer (bm, bs, now, handling_wakeup); break; } } @@ -1401,9 +1394,9 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) if (BFD_STATE_up == bs->local_state) { bfd_set_effective_required_min_rx (bm, bs, now, - bs->config_required_min_rx_clocks, - 0); + bs->config_required_min_rx_clocks); bfd_recalc_detection_time (bm, bs); + bfd_set_timer (bm, bs, now, 0); } } if (BFD_STATE_admin_down == bs->local_state) diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 75b35974..8519009d 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -42,6 +42,8 @@ typedef struct static vlib_node_registration_t bfd_udp4_input_node; static vlib_node_registration_t bfd_udp6_input_node; +static vlib_node_registration_t bfd_udp_echo4_input_node; +static vlib_node_registration_t bfd_udp_echo6_input_node; bfd_udp_main_t bfd_udp_main; @@ -594,11 +596,10 @@ typedef enum BFD_UDP_INPUT_N_NEXT, } bfd_udp_input_next_t; -/* Packet counters */ +/* Packet counters - BFD control frames */ #define foreach_bfd_udp_error(F) \ F (NONE, "good bfd packets (processed)") \ - F (BAD, "invalid bfd packets") \ - F (DISABLED, "bfd packets received on disabled interfaces") + F (BAD, "invalid bfd packets") #define F(sym, string) static char BFD_UDP_ERR_##sym##_STR[] = string; foreach_bfd_udp_error (F); @@ -618,9 +619,32 @@ typedef enum BFD_UDP_N_ERROR, } bfd_udp_error_t; +/* Packet counters - BFD ECHO packets */ +#define foreach_bfd_udp_echo_error(F) \ + F (NONE, "good bfd echo packets (processed)") \ + F (BAD, "invalid bfd echo packets") + +#define F(sym, string) static char BFD_UDP_ECHO_ERR_##sym##_STR[] = string; +foreach_bfd_udp_echo_error (F); +#undef F + +static char *bfd_udp_echo_error_strings[] = { +#define F(sym, string) BFD_UDP_ECHO_ERR_##sym##_STR, + foreach_bfd_udp_echo_error (F) +#undef F +}; + +typedef enum +{ +#define F(sym, str) BFD_UDP_ECHO_ERROR_##sym, + foreach_bfd_udp_echo_error (F) +#undef F + BFD_UDP_ECHO_N_ERROR, +} bfd_udp_echo_error_t; + static void -bfd_udp4_find_headers (vlib_buffer_t * b, const ip4_header_t ** ip4, - const udp_header_t ** udp) +bfd_udp4_find_headers (vlib_buffer_t * b, ip4_header_t ** ip4, + udp_header_t ** udp) { /* sanity check first */ const i32 start = vnet_buffer (b)->ip.start_of_ip_header; @@ -714,8 +738,8 @@ bfd_udp4_scan (vlib_main_t * vm, vlib_node_runtime_t * rt, b->current_length, sizeof (*pkt)); return BFD_UDP_ERROR_BAD; } - const ip4_header_t *ip4; - const udp_header_t *udp; + ip4_header_t *ip4; + udp_header_t *udp; bfd_udp4_find_headers (b, &ip4, &udp); if (!ip4 || !udp) { @@ -776,8 +800,8 @@ bfd_udp4_scan (vlib_main_t * vm, vlib_node_runtime_t * rt, } static void -bfd_udp6_find_headers (vlib_buffer_t * b, const ip6_header_t ** ip6, - const udp_header_t ** udp) +bfd_udp6_find_headers (vlib_buffer_t * b, ip6_header_t ** ip6, + udp_header_t ** udp) { /* sanity check first */ const i32 start = vnet_buffer (b)->ip.start_of_ip_header; @@ -856,8 +880,8 @@ bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt, b->current_length, sizeof (*pkt)); return BFD_UDP_ERROR_BAD; } - const ip6_header_t *ip6; - const udp_header_t *udp; + ip6_header_t *ip6; + udp_header_t *udp; bfd_udp6_find_headers (b, &ip6, &udp); if (!ip6 || !udp) { @@ -1057,6 +1081,185 @@ VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = { }; /* *INDENT-ON* */ +/** + * @brief swap the source and destination IP addresses in the packet + */ +static int +bfd_echo_address_swap (vlib_buffer_t * b, int is_ipv6) +{ + udp_header_t *dummy = NULL; + if (is_ipv6) + { + ip6_header_t *ip6 = NULL; + bfd_udp6_find_headers (b, &ip6, &dummy); + if (!ip6) + { + return 0; + } + ip6_address_t tmp = ip6->dst_address; + ip6->dst_address = ip6->src_address; + ip6->src_address = tmp; + vlib_buffer_advance (b, + (u8 *) ip6 - (u8 *) vlib_buffer_get_current (b)); + } + else + { + ip4_header_t *ip4 = NULL; + bfd_udp4_find_headers (b, &ip4, &dummy); + if (!ip4) + { + return 0; + } + ip4_address_t tmp = ip4->dst_address; + ip4->dst_address = ip4->src_address; + ip4->src_address = tmp; + vlib_buffer_advance (b, + (u8 *) ip4 - (u8 *) vlib_buffer_get_current (b)); + } + return 1; +} + +/* + * Process a frame of bfd echo packets + * Expect 1 packet / frame + */ +static uword +bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f, int is_ipv6) +{ + u32 n_left_from, *from; + bfd_input_trace_t *t0; + + from = vlib_frame_vector_args (f); /* array of buffer indices */ + n_left_from = f->n_vectors; /* number of buffer indices */ + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + + /* If this pkt is traced, snapshot the data */ + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + int len; + t0 = vlib_add_trace (vm, rt, b0, sizeof (*t0)); + len = (b0->current_length < sizeof (t0->data)) ? b0->current_length + : sizeof (t0->data); + t0->len = len; + clib_memcpy (t0->data, vlib_buffer_get_current (b0), len); + } + + if (bfd_echo_address_swap (b0, is_ipv6)) + { + /* loop back the packet */ + b0->error = rt->errors[BFD_UDP_ERROR_NONE]; + if (is_ipv6) + { + vlib_node_increment_counter (vm, bfd_udp_echo6_input_node.index, + b0->error, 1); + } + else + { + vlib_node_increment_counter (vm, bfd_udp_echo4_input_node.index, + b0->error, 1); + } + next0 = BFD_UDP_INPUT_NEXT_REPLY; + } + else + { + b0->error = rt->errors[BFD_UDP_ERROR_BAD]; + next0 = BFD_UDP_INPUT_NEXT_NORMAL; + } + + vlib_set_next_frame_buffer (vm, rt, next0, bi0); + + from += 1; + n_left_from -= 1; + } + + return f->n_vectors; +} + +static uword +bfd_udp_echo4_input (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + return bfd_udp_echo_input (vm, rt, f, 0); +} + +u8 * +bfd_echo_input_format_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + const bfd_udp_echo_input_trace_t *t = + va_arg (*args, bfd_udp_echo_input_trace_t *); + if (t->len > STRUCT_SIZE_OF (bfd_pkt_t, head)) + { + s = format (s, "BFD ECHO:\n"); + s = format (s, " data: %U", format_hexdump, t->data, t->len); + } + + return s; +} + +/* + * bfd input graph node declaration + */ +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (bfd_udp_echo4_input_node, static) = { + .function = bfd_udp_echo4_input, + .name = "bfd-udp-echo4-input", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = BFD_UDP_ECHO_N_ERROR, + .error_strings = bfd_udp_error_strings, + + .format_trace = bfd_echo_input_format_trace, + + .n_next_nodes = BFD_UDP_INPUT_N_NEXT, + .next_nodes = + { + [BFD_UDP_INPUT_NEXT_NORMAL] = "error-drop", + [BFD_UDP_INPUT_NEXT_REPLY] = "ip4-lookup", + }, +}; +/* *INDENT-ON* */ + +static uword +bfd_udp_echo6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + return bfd_udp_echo_input (vm, rt, f, 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (bfd_udp_echo6_input_node, static) = { + .function = bfd_udp_echo6_input, + .name = "bfd-udp-echo6-input", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = BFD_UDP_ECHO_N_ERROR, + .error_strings = bfd_udp_echo_error_strings, + + .format_trace = bfd_echo_input_format_trace, + + .n_next_nodes = BFD_UDP_INPUT_N_NEXT, + .next_nodes = + { + [BFD_UDP_INPUT_NEXT_NORMAL] = "error-drop", + [BFD_UDP_INPUT_NEXT_REPLY] = "ip6-lookup", + }, +}; + +/* *INDENT-ON* */ + static clib_error_t * bfd_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) { @@ -1093,6 +1296,10 @@ bfd_udp_init (vlib_main_t * vm) bfd_udp_main.bfd_main = &bfd_main; udp_register_dst_port (vm, UDP_DST_PORT_bfd4, bfd_udp4_input_node.index, 1); udp_register_dst_port (vm, UDP_DST_PORT_bfd6, bfd_udp6_input_node.index, 0); + udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo4, + bfd_udp_echo4_input_node.index, 1); + udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo6, + bfd_udp_echo6_input_node.index, 0); return 0; } diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h index 26e89851..502e2314 100644 --- a/src/vnet/bfd/bfd_udp.h +++ b/src/vnet/bfd/bfd_udp.h @@ -40,6 +40,13 @@ typedef struct adj_index_t adj_index; } bfd_udp_session_t; +/* bfd udp echo packet trace capture */ +typedef struct +{ + u32 len; + u8 data[400]; +} bfd_udp_echo_input_trace_t; + struct bfd_session_s; void bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, diff --git a/src/vnet/ip/udp.h b/src/vnet/ip/udp.h index 03c62e0b..bad58b5d 100644 --- a/src/vnet/ip/udp.h +++ b/src/vnet/ip/udp.h @@ -38,7 +38,8 @@ typedef enum _ (67, dhcp_to_server) \ _ (68, dhcp_to_client) \ _ (500, ikev2) \ -_ (3784, bfd4) \ +_ (3784, bfd4) \ +_ (3785, bfd_echo4) \ _ (4341, lisp_gpe) \ _ (4342, lisp_cp) \ _ (4739, ipfix) \ @@ -51,7 +52,8 @@ _ (6633, vpath_3) #define foreach_udp6_dst_port \ _ (547, dhcpv6_to_server) \ _ (546, dhcpv6_to_client) \ -_ (3784, bfd6) \ +_ (3784, bfd6) \ +_ (3785, bfd_echo6) \ _ (4341, lisp_gpe6) \ _ (4342, lisp_cp6) \ _ (4790, vxlan6_gpe) \ diff --git a/test/bfd.py b/test/bfd.py index 09a7681c..8eb3b36a 100644 --- a/test/bfd.py +++ b/test/bfd.py @@ -6,7 +6,7 @@ from scapy.all import bind_layers from scapy.layers.inet import UDP from scapy.packet import Packet from scapy.fields import BitField, BitEnumField, XByteField, FlagsField,\ - ConditionalField, StrField + ConditionalField, StrField from vpp_object import VppObject from util import NumericConstant @@ -110,6 +110,7 @@ class BFD(Packet): """ BFD protocol layer for scapy """ udp_dport = 3784 #: BFD destination port per RFC 5881 + udp_dport_echo = 3785 # : BFD destination port for ECHO per RFC 5881 udp_sport_min = 49152 #: BFD source port min value per RFC 5881 udp_sport_max = 65535 #: BFD source port max value per RFC 5881 bfd_pkt_len = 24 # : length of BFD pkt without authentication section diff --git a/test/framework.py b/test/framework.py index 8dd61aa1..beed1803 100644 --- a/test/framework.py +++ b/test/framework.py @@ -236,6 +236,7 @@ class VppTestCase(unittest.TestCase): cls.pump_thread_stop_flag = Event() cls.pump_thread_wakeup_pipe = os.pipe() cls.pump_thread = Thread(target=pump_output, args=(cls,)) + cls.pump_thread.daemon = True cls.pump_thread.start() cls.vapi = VppPapiProvider(cls.shm_prefix, cls.shm_prefix, cls) if cls.step: diff --git a/test/test_bfd.py b/test/test_bfd.py index 0ba0b46d..64e9301a 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -8,6 +8,7 @@ import binascii import time from random import randint, shuffle from socket import AF_INET, AF_INET6 +from scapy.packet import Raw from scapy.layers.l2 import Ether from scapy.layers.inet import UDP, IP from scapy.layers.inet6 import IPv6 @@ -836,7 +837,6 @@ class BFD4TestCase(VppTestCase): def test_no_periodic_if_remote_demand(self): """ no periodic frames outside poll sequence if remote demand set """ - self.test_session.update(detect_mult=10) bfd_session_up(self) demand = self.test_session.create_packet() demand[BFD].flags = "D" @@ -846,7 +846,7 @@ class BFD4TestCase(VppTestCase): self.test_session.desired_min_tx) \ / USEC_IN_SEC count = 0 - for dummy in range(self.test_session.detect_mult): + for dummy in range(self.test_session.detect_mult * 2): time.sleep(transmit_time) self.test_session.send_packet(demand) try: @@ -861,6 +861,48 @@ class BFD4TestCase(VppTestCase): self.assert_equal(count, 0, "number of packets received") self.assert_equal(len(events), 0, "number of events received") + def test_echo_looped_back(self): + """ echo packets looped back """ + # don't need a session in this case.. + self.vpp_session.remove_vpp_config() + self.pg0.enable_capture() + echo_packet_count = 10 + # random source port low enough to increment a few times.. + udp_sport_tx = randint(1, 50000) + udp_sport_rx = udp_sport_tx + echo_packet = (Ether(src=self.pg0.remote_mac, + dst=self.pg0.local_mac) / + IP(src=self.pg0.remote_ip4, + dst=self.pg0.local_ip4) / + UDP(dport=BFD.udp_dport_echo) / + Raw("this should be looped back")) + for dummy in range(echo_packet_count): + self.sleep(.01, "delay between echo packets") + echo_packet[UDP].sport = udp_sport_tx + udp_sport_tx += 1 + self.logger.debug(ppp("Sending packet:", echo_packet)) + self.pg0.add_stream(echo_packet) + self.pg_start() + for dummy in range(echo_packet_count): + p = self.pg0.wait_for_packet(1) + self.logger.debug(ppp("Got packet:", p)) + ether = p[Ether] + self.assert_equal(self.pg0.remote_mac, + ether.dst, "Destination MAC") + self.assert_equal(self.pg0.local_mac, ether.src, "Source MAC") + ip = p[IP] + self.assert_equal(self.pg0.remote_ip4, ip.dst, "Destination IP") + self.assert_equal(self.pg0.local_ip4, ip.src, "Destination IP") + udp = p[UDP] + self.assert_equal(udp.dport, BFD.udp_dport_echo, + "UDP destination port") + self.assert_equal(udp.sport, udp_sport_rx, "UDP source port") + udp_sport_rx += 1 + self.assertTrue(p.haslayer(Raw) and p[Raw] == echo_packet[Raw], + "Received packet is not the echo packet sent") + self.assert_equal(udp_sport_tx, udp_sport_rx, "UDP source port (== " + "ECHO packet identifier for test purposes)") + class BFD6TestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD) (IPv6) """ @@ -914,13 +956,55 @@ class BFD6TestCase(VppTestCase): def test_hold_up(self): """ hold BFD session up """ bfd_session_up(self) - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): wait_for_bfd_packet(self) self.test_session.send_packet() self.assert_equal(len(self.vapi.collect_events()), 0, "number of bfd events") self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + def test_echo_looped_back(self): + """ echo packets looped back """ + # don't need a session in this case.. + self.vpp_session.remove_vpp_config() + self.pg0.enable_capture() + echo_packet_count = 10 + # random source port low enough to increment a few times.. + udp_sport_tx = randint(1, 50000) + udp_sport_rx = udp_sport_tx + echo_packet = (Ether(src=self.pg0.remote_mac, + dst=self.pg0.local_mac) / + IPv6(src=self.pg0.remote_ip6, + dst=self.pg0.local_ip6) / + UDP(dport=BFD.udp_dport_echo) / + Raw("this should be looped back")) + for dummy in range(echo_packet_count): + self.sleep(.01, "delay between echo packets") + echo_packet[UDP].sport = udp_sport_tx + udp_sport_tx += 1 + self.logger.debug(ppp("Sending packet:", echo_packet)) + self.pg0.add_stream(echo_packet) + self.pg_start() + for dummy in range(echo_packet_count): + p = self.pg0.wait_for_packet(1) + self.logger.debug(ppp("Got packet:", p)) + ether = p[Ether] + self.assert_equal(self.pg0.remote_mac, + ether.dst, "Destination MAC") + self.assert_equal(self.pg0.local_mac, ether.src, "Source MAC") + ip = p[IPv6] + self.assert_equal(self.pg0.remote_ip6, ip.dst, "Destination IP") + self.assert_equal(self.pg0.local_ip6, ip.src, "Destination IP") + udp = p[UDP] + self.assert_equal(udp.dport, BFD.udp_dport_echo, + "UDP destination port") + self.assert_equal(udp.sport, udp_sport_rx, "UDP source port") + udp_sport_rx += 1 + self.assertTrue(p.haslayer(Raw) and p[Raw] == echo_packet[Raw], + "Received packet is not the echo packet sent") + self.assert_equal(udp_sport_tx, udp_sport_rx, "UDP source port (== " + "ECHO packet identifier for test purposes)") + class BFDSHA1TestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD) (SHA1 auth) """ @@ -982,7 +1066,7 @@ class BFDSHA1TestCase(VppTestCase): self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id) bfd_session_up(self) - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): wait_for_bfd_packet(self) self.test_session.send_packet() self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) @@ -1195,14 +1279,14 @@ class BFDAuthOnOffTestCase(VppTestCase): self.vpp_session.admin_up() self.test_session = BFDTestSession(self, self.pg0, AF_INET) bfd_session_up(self) - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.vpp_session.activate_auth(key) self.test_session.bfd_key_id = self.vpp_session.bfd_key_id self.test_session.sha1_key = key - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() @@ -1223,7 +1307,7 @@ class BFDAuthOnOffTestCase(VppTestCase): bfd_key_id=self.vpp_session.bfd_key_id) bfd_session_up(self) # self.vapi.want_bfd_events(enable_disable=0) - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.inc_seq_num() @@ -1231,7 +1315,7 @@ class BFDAuthOnOffTestCase(VppTestCase): self.vpp_session.deactivate_auth() self.test_session.bfd_key_id = None self.test_session.sha1_key = None - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.inc_seq_num() @@ -1254,14 +1338,14 @@ class BFDAuthOnOffTestCase(VppTestCase): self, self.pg0, AF_INET, sha1_key=key1, bfd_key_id=self.vpp_session.bfd_key_id) bfd_session_up(self) - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.vpp_session.activate_auth(key2) self.test_session.bfd_key_id = self.vpp_session.bfd_key_id self.test_session.sha1_key = key2 - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() @@ -1279,18 +1363,18 @@ class BFDAuthOnOffTestCase(VppTestCase): self.vpp_session.admin_up() self.test_session = BFDTestSession(self, self.pg0, AF_INET) bfd_session_up(self) - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): wait_for_bfd_packet(self) self.test_session.send_packet() self.vpp_session.activate_auth(key, delayed=True) - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.test_session.bfd_key_id = self.vpp_session.bfd_key_id self.test_session.sha1_key = key self.test_session.send_packet() - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() @@ -1310,19 +1394,19 @@ class BFDAuthOnOffTestCase(VppTestCase): self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id) bfd_session_up(self) - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.vpp_session.deactivate_auth(delayed=True) - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.test_session.bfd_key_id = None self.test_session.sha1_key = None self.test_session.send_packet() - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() @@ -1344,19 +1428,19 @@ class BFDAuthOnOffTestCase(VppTestCase): self, self.pg0, AF_INET, sha1_key=key1, bfd_key_id=self.vpp_session.bfd_key_id) bfd_session_up(self) - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.vpp_session.activate_auth(key2, delayed=True) - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() self.test_session.bfd_key_id = self.vpp_session.bfd_key_id self.test_session.sha1_key = key2 self.test_session.send_packet() - for dummy in range(self.test_session.detect_mult*2): + for dummy in range(self.test_session.detect_mult * 2): p = wait_for_bfd_packet(self) self.assert_equal(p[BFD].state, BFDState.up, BFDState) self.test_session.send_packet() -- cgit 1.2.3-korg From c48829bb0a29e7b53a5e0b6bcecd13a328b19dcf Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Tue, 14 Feb 2017 07:55:57 +0100 Subject: BFD: put session admin-up/admin-down Change-Id: I7d8889dce8495607106593ad83320c9af0f2fa07 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_main.c | 24 ++++++++++++------------ test/bfd.py | 9 +++++++++ test/test_bfd.py | 44 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 57 insertions(+), 20 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 0959d0e0..c0fd18df 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -335,11 +335,14 @@ bfd_session_set_flags (bfd_session_t * bs, u8 admin_up_down) bfd_main_t *bm = &bfd_main; if (admin_up_down) { + BFD_DBG ("Session set admin-up, bs-idx=%u", bs->bs_idx); bfd_set_state (bm, bs, BFD_STATE_down, 0); + bfd_set_diag (bs, BFD_DIAG_CODE_no_diag); } else { - bfd_set_diag (bs, BFD_DIAG_CODE_neighbor_sig_down); + BFD_DBG ("Session set admin-down, bs-idx=%u", bs->bs_idx); + bfd_set_diag (bs, BFD_DIAG_CODE_admin_down); bfd_set_state (bm, bs, BFD_STATE_admin_down, 0); } } @@ -439,9 +442,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, break; case BFD_STATE_init: bfd_set_effective_desired_min_tx (bm, bs, now, - clib_max - (bs->config_desired_min_tx_clocks, - bm->default_desired_min_tx_clocks)); + bs->config_desired_min_tx_clocks); bfd_set_timer (bm, bs, now, handling_wakeup); break; case BFD_STATE_up: @@ -714,18 +715,13 @@ bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm, switch (bs->local_state) { case BFD_STATE_admin_down: - BFD_ERR ("Unexpected timeout when in %s state", - bfd_state_string (bs->local_state)); - abort (); + bfd_send_periodic (vm, rt, bm, bs, now, 1); break; case BFD_STATE_down: bfd_send_periodic (vm, rt, bm, bs, now, 1); break; case BFD_STATE_init: - BFD_ERR ("Unexpected timeout when in %s state", - bfd_state_string (bs->local_state)); - abort (); - break; + /* fallthrough */ case BFD_STATE_up: bfd_check_rx_timeout (bm, bs, now, 1); bfd_send_periodic (vm, rt, bm, bs, now, 1); @@ -1400,7 +1396,11 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) } } if (BFD_STATE_admin_down == bs->local_state) - return; + { + BFD_DBG ("Session is admin-down, ignoring packet, bs_idx=%u", + bs->bs_idx); + return; + } if (BFD_STATE_admin_down == bs->remote_state) { bfd_set_diag (bs, BFD_DIAG_CODE_neighbor_sig_down); diff --git a/test/bfd.py b/test/bfd.py index 8eb3b36a..8bd9f9a3 100644 --- a/test/bfd.py +++ b/test/bfd.py @@ -420,3 +420,12 @@ class VppBFDUDPSession(VppObject): self.local_addr_n, self.peer_addr_n, is_ipv6=is_ipv6) + + def admin_down(self): + """ set bfd session admin-down """ + is_ipv6 = 1 if AF_INET6 == self._af else 0 + self.test.vapi.bfd_udp_session_set_flags(0, + self._interface.sw_if_index, + self.local_addr_n, + self.peer_addr_n, + is_ipv6=is_ipv6) diff --git a/test/test_bfd.py b/test/test_bfd.py index 64e9301a..68baf837 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -671,10 +671,9 @@ class BFD4TestCase(VppTestCase): def test_conn_down(self): """ verify session goes down after inactivity """ bfd_session_up(self) - for dummy in range(self.test_session.detect_mult): - wait_for_bfd_packet(self) - self.assert_equal(len(self.vapi.collect_events()), 0, - "number of bfd events") + detection_time = self.vpp_session.detect_mult *\ + self.vpp_session.required_min_rx / USEC_IN_SEC + self.sleep(detection_time, "waiting for BFD session time-out") e = self.vapi.wait_for_event(1, "bfd_udp_session_details") verify_event(self, e, expected_state=BFDState.down) @@ -808,10 +807,6 @@ class BFD4TestCase(VppTestCase): "time before bfd session goes down") verify_event(self, e, expected_state=BFDState.down) - def test_modify_des_min_tx(self): - """ modify desired min tx interval """ - pass - def test_modify_detect_mult(self): """ modify detect multiplier """ bfd_session_up(self) @@ -903,6 +898,39 @@ class BFD4TestCase(VppTestCase): self.assert_equal(udp_sport_tx, udp_sport_rx, "UDP source port (== " "ECHO packet identifier for test purposes)") + def test_admin_up_down(self): + bfd_session_up(self) + self.vpp_session.admin_down() + self.pg0.enable_capture() + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + verify_event(self, e, expected_state=BFDState.admin_down) + for dummy in range(2): + p = wait_for_bfd_packet(self) + self.assert_equal(BFDState.admin_down, p[BFD].state, BFDState) + # try to bring session up - shouldn't be possible + self.test_session.update(state=BFDState.init) + self.test_session.send_packet() + for dummy in range(2): + p = wait_for_bfd_packet(self) + self.assert_equal(BFDState.admin_down, p[BFD].state, BFDState) + self.vpp_session.admin_up() + self.test_session.update(state=BFDState.down) + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + verify_event(self, e, expected_state=BFDState.down) + p = wait_for_bfd_packet(self) + self.assert_equal(BFDState.down, p[BFD].state, BFDState) + self.test_session.send_packet() + p = wait_for_bfd_packet(self) + self.assert_equal(BFDState.init, p[BFD].state, BFDState) + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + verify_event(self, e, expected_state=BFDState.init) + self.test_session.update(state=BFDState.up) + self.test_session.send_packet() + p = wait_for_bfd_packet(self) + self.assert_equal(BFDState.up, p[BFD].state, BFDState) + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + verify_event(self, e, expected_state=BFDState.up) + class BFD6TestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD) (IPv6) """ -- cgit 1.2.3-korg From 239790fd91b3f62e5eda1042a97f9216fe59856e Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Thu, 16 Feb 2017 10:53:53 +0100 Subject: BFD: echo function Change-Id: Ib1e301d62b687d4e42434239e7cd412065c28da0 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd.api | 38 ++- src/vnet/bfd/bfd_api.c | 31 ++- src/vnet/bfd/bfd_api.h | 26 +- src/vnet/bfd/bfd_debug.h | 2 +- src/vnet/bfd/bfd_main.c | 689 ++++++++++++++++++++++++++++++++-------------- src/vnet/bfd/bfd_main.h | 94 +++++-- src/vnet/bfd/bfd_udp.c | 286 +++++++++++++++---- src/vnet/bfd/bfd_udp.h | 16 +- test/bfd.py | 21 ++ test/framework.py | 2 +- test/test_bfd.py | 477 ++++++++++++++++++++++++++++---- test/vpp_papi_provider.py | 4 + 12 files changed, 1319 insertions(+), 367 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api index f307ed2a..93bf0fb9 100644 --- a/src/vnet/bfd/bfd.api +++ b/src/vnet/bfd/bfd.api @@ -13,29 +13,43 @@ * limitations under the License. */ -/** \brief Configure BFD feature +/** \brief Set BFD echo source @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param slow_timer - slow timer (seconds) - @param min_tx - desired min tx interval - @param min_rx - desired min rx interval - @param detect_mult - desired detection multiplier + @param sw_if_index - interface to use as echo source */ -define bfd_set_config +define bfd_udp_set_echo_source +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Set BFD feature response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define bfd_udp_set_echo_source_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Delete BFD echo source + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define bfd_udp_del_echo_source { u32 client_index; u32 context; - u32 slow_timer; - u32 min_tx; - u32 min_rx; - u8 detect_mult; }; -/** \brief Configure BFD feature response +/** \brief Delete BFD feature response @param context - sender context, to match reply w/ request @param retval - return code for the request */ -define bfd_set_config_reply +define bfd_udp_del_echo_source_reply { u32 context; i32 retval; diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c index af70f0ec..6632eae4 100644 --- a/src/vnet/bfd/bfd_api.c +++ b/src/vnet/bfd/bfd_api.c @@ -54,7 +54,9 @@ _ (BFD_AUTH_DEL_KEY, bfd_auth_del_key) \ _ (BFD_AUTH_KEYS_DUMP, bfd_auth_keys_dump) \ _ (BFD_UDP_AUTH_ACTIVATE, bfd_udp_auth_activate) \ - _ (BFD_UDP_AUTH_DEACTIVATE, bfd_udp_auth_deactivate) + _ (BFD_UDP_AUTH_DEACTIVATE, bfd_udp_auth_deactivate) \ + _ (BFD_UDP_SET_ECHO_SOURCE, bfd_udp_set_echo_source) \ + _ (BFD_UDP_DEL_ECHO_SOURCE, bfd_udp_del_echo_source) pub_sub_handler (bfd_events, BFD_EVENTS); @@ -314,6 +316,33 @@ vl_api_bfd_udp_auth_deactivate_t_handler (vl_api_bfd_udp_auth_deactivate_t * REPLY_MACRO (VL_API_BFD_UDP_AUTH_DEACTIVATE_REPLY); } +static void +vl_api_bfd_udp_set_echo_source_t_handler (vl_api_bfd_udp_set_echo_source_t * + mp) +{ + vl_api_bfd_udp_set_echo_source_reply_t *rmp; + int rv; + + VALIDATE_SW_IF_INDEX (mp); + + rv = bfd_udp_set_echo_source (clib_net_to_host_u32 (mp->sw_if_index)); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_BFD_UDP_SET_ECHO_SOURCE_REPLY); +} + +static void +vl_api_bfd_udp_del_echo_source_t_handler (vl_api_bfd_udp_del_echo_source_t * + mp) +{ + vl_api_bfd_udp_del_echo_source_reply_t *rmp; + int rv; + + rv = bfd_udp_del_echo_source (); + + REPLY_MACRO (VL_API_BFD_UDP_DEL_ECHO_SOURCE_REPLY); +} + /* * bfd_api_hookup * Add vpe's API message handlers to the table. diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h index f4486a79..63d4a62e 100644 --- a/src/vnet/bfd/bfd_api.h +++ b/src/vnet/bfd/bfd_api.h @@ -24,6 +24,17 @@ #include #include +#define foreach_bfd_transport(F) \ + F (UDP4, "ip4-rewrite") \ + F (UDP6, "ip6-rewrite") + +typedef enum +{ +#define F(t, n) BFD_TRANSPORT_##t, + foreach_bfd_transport (F) +#undef F +} bfd_transport_e; + vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, const ip46_address_t * peer_addr, @@ -31,12 +42,11 @@ bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, u8 detect_mult, u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id); -vnet_api_error_t bfd_udp_mod_session (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - u32 desired_min_tx_usec, - u32 required_min_rx_usec, - u8 detect_mult); +vnet_api_error_t +bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, + u32 desired_min_tx_usec, u32 required_min_rx_usec, + u8 detect_mult); vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, const ip46_address_t * local_addr, @@ -63,6 +73,10 @@ vnet_api_error_t bfd_udp_auth_deactivate (u32 sw_if_index, const ip46_address_t * peer_addr, u8 is_delayed); +vnet_api_error_t bfd_udp_set_echo_source (u32 loopback_sw_if_index); + +vnet_api_error_t bfd_udp_del_echo_source (); + #endif /* __included_bfd_api_h__ */ /* diff --git a/src/vnet/bfd/bfd_debug.h b/src/vnet/bfd/bfd_debug.h index a06e934f..3017352e 100644 --- a/src/vnet/bfd/bfd_debug.h +++ b/src/vnet/bfd/bfd_debug.h @@ -20,7 +20,7 @@ #define __included_bfd_debug_h__ /* controls debug prints */ -#define BFD_DEBUG (0) +#define BFD_DEBUG (1) #if BFD_DEBUG #define BFD_DEBUG_FILE_DEF \ diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index c0fd18df..29c40458 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -17,17 +17,37 @@ * @brief BFD nodes implementation */ +#if WITH_LIBSSL > 0 +#include +#endif + +#if __SSE4_2__ +#include +#endif + #include #include #include +#include #include #include #include #include #include -#if WITH_LIBSSL > 0 -#include + +static u64 +bfd_calc_echo_checksum (u32 discriminator, u64 expire_time, u32 secret) +{ + u64 checksum = 0; +#if __SSE4_2__ + checksum = _mm_crc32_u64 (0, discriminator); + checksum = _mm_crc32_u64 (checksum, expire_time); + checksum = _mm_crc32_u64 (checksum, secret); +#else + checksum = clib_xxhash (discriminator ^ expire_time ^ secret); #endif + return checksum; +} static u64 bfd_usec_to_clocks (const bfd_main_t * bm, u64 us) @@ -35,6 +55,12 @@ bfd_usec_to_clocks (const bfd_main_t * bm, u64 us) return bm->cpu_cps * ((f64) us / USEC_PER_SECOND); } +static u32 +bfd_clocks_to_usec (const bfd_main_t * bm, u64 clocks) +{ + return (clocks / bm->cpu_cps) * USEC_PER_SECOND; +} + static vlib_node_registration_t bfd_process_node; /* set to 0 here, real values filled at startup */ @@ -81,17 +107,19 @@ bfd_set_defaults (bfd_main_t * bm, bfd_session_t * bs) bs->local_state = BFD_STATE_down; bs->local_diag = BFD_DIAG_CODE_no_diag; bs->remote_state = BFD_STATE_down; - bs->local_demand = 0; bs->remote_discr = 0; - bs->config_desired_min_tx_usec = BFD_DEFAULT_DESIRED_MIN_TX_US; + bs->config_desired_min_tx_usec = BFD_DEFAULT_DESIRED_MIN_TX_USEC; bs->config_desired_min_tx_clocks = bm->default_desired_min_tx_clocks; bs->effective_desired_min_tx_clocks = bm->default_desired_min_tx_clocks; bs->remote_min_rx_usec = 1; bs->remote_min_rx_clocks = bfd_usec_to_clocks (bm, bs->remote_min_rx_usec); + bs->remote_min_echo_rx_usec = 0; + bs->remote_min_echo_rx_clocks = 0; bs->remote_demand = 0; bs->auth.remote_seq_number = 0; bs->auth.remote_seq_number_known = 0; bs->auth.local_seq_number = random_u32 (&bm->random_seed); + bs->echo_secret = random_u32 (&bm->random_seed); } static void @@ -119,68 +147,90 @@ bfd_set_state (bfd_main_t * bm, bfd_session_t * bs, } } -static void -bfd_recalc_tx_interval (bfd_main_t * bm, bfd_session_t * bs) +static const char * +bfd_poll_state_string (bfd_poll_state_e state) { - if (!bs->local_demand) + switch (state) { - bs->transmit_interval_clocks = - clib_max (bs->effective_desired_min_tx_clocks, - bs->remote_min_rx_clocks); +#define F(x) \ + case BFD_POLL_##x: \ + return "BFD_POLL_" #x; + foreach_bfd_poll_state (F) +#undef F } - else + return "UNKNOWN"; +} + +static void +bfd_set_poll_state (bfd_session_t * bs, bfd_poll_state_e state) +{ + if (bs->poll_state != state) { - /* TODO */ + BFD_DBG ("Setting poll state=%s, bs_idx=%u", + bfd_poll_state_string (state), bs->bs_idx); + bs->poll_state = state; } - BFD_DBG ("Recalculated transmit interval %lu clocks/%.2fs", - bs->transmit_interval_clocks, - bs->transmit_interval_clocks / bm->cpu_cps); +} + +static void +bfd_recalc_tx_interval (bfd_main_t * bm, bfd_session_t * bs) +{ + bs->transmit_interval_clocks = + clib_max (bs->effective_desired_min_tx_clocks, bs->remote_min_rx_clocks); + BFD_DBG ("Recalculated transmit interval " BFD_CLK_FMT, + BFD_CLK_PRN (bs->transmit_interval_clocks)); +} + +static void +bfd_recalc_echo_tx_interval (bfd_main_t * bm, bfd_session_t * bs) +{ + bs->echo_transmit_interval_clocks = + clib_max (bs->effective_desired_min_tx_clocks, + bs->remote_min_echo_rx_clocks); + BFD_DBG ("Recalculated echo transmit interval " BFD_CLK_FMT, + BFD_CLK_PRN (bs->echo_transmit_interval_clocks)); } static void bfd_calc_next_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now) { - if (!bs->local_demand) + if (bs->local_detect_mult > 1) { - if (bs->local_detect_mult > 1) - { - /* common case - 75-100% of transmit interval */ - bs->tx_timeout_clocks = bs->last_tx_clocks + - (1 - .25 * (random_f64 (&bm->random_seed))) * - bs->transmit_interval_clocks; - if (bs->tx_timeout_clocks < now) - { - /* huh, we've missed it already, transmit now */ - BFD_DBG ("Missed %lu transmit events (now is %lu, calc " - "tx_timeout is %lu)", - (now - bs->tx_timeout_clocks) / - bs->transmit_interval_clocks, - now, bs->tx_timeout_clocks); - bs->tx_timeout_clocks = now; - } - } - else + /* common case - 75-100% of transmit interval */ + bs->tx_timeout_clocks = bs->last_tx_clocks + + (1 - .25 * (random_f64 (&bm->random_seed))) * + bs->transmit_interval_clocks; + if (bs->tx_timeout_clocks < now) { - /* special case - 75-90% of transmit interval */ - bs->tx_timeout_clocks = - bs->last_tx_clocks + - (.9 - .15 * (random_f64 (&bm->random_seed))) * - bs->transmit_interval_clocks; - if (bs->tx_timeout_clocks < now) - { - /* huh, we've missed it already, transmit now */ - BFD_DBG ("Missed %lu transmit events (now is %lu, calc " - "tx_timeout is %lu)", - (now - bs->tx_timeout_clocks) / - bs->transmit_interval_clocks, - now, bs->tx_timeout_clocks); - bs->tx_timeout_clocks = now; - } + /* + * the timeout is in the past, which means that either remote + * demand mode was set or performance/clock issues ... + */ + BFD_DBG ("Missed %lu transmit events (now is %lu, calc " + "tx_timeout is %lu)", + (now - bs->tx_timeout_clocks) / + bs->transmit_interval_clocks, now, bs->tx_timeout_clocks); + bs->tx_timeout_clocks = now; } } else { - /* TODO */ + /* special case - 75-90% of transmit interval */ + bs->tx_timeout_clocks = bs->last_tx_clocks + + (.9 - .15 * (random_f64 (&bm->random_seed))) * + bs->transmit_interval_clocks; + if (bs->tx_timeout_clocks < now) + { + /* + * the timeout is in the past, which means that either remote + * demand mode was set or performance/clock issues ... + */ + BFD_DBG ("Missed %lu transmit events (now is %lu, calc " + "tx_timeout is %lu)", + (now - bs->tx_timeout_clocks) / + bs->transmit_interval_clocks, now, bs->tx_timeout_clocks); + bs->tx_timeout_clocks = now; + } } if (bs->tx_timeout_clocks) { @@ -191,24 +241,33 @@ bfd_calc_next_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now) } } +static void +bfd_calc_next_echo_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now) +{ + bs->echo_tx_timeout_clocks = + bs->echo_last_tx_clocks + bs->echo_transmit_interval_clocks; + if (bs->echo_tx_timeout_clocks < now) + { + /* huh, we've missed it already, transmit now */ + BFD_DBG ("Missed %lu echo transmit events (now is %lu, calc tx_timeout " + "is %lu)", + (now - bs->echo_tx_timeout_clocks) / + bs->echo_transmit_interval_clocks, + now, bs->echo_tx_timeout_clocks); + bs->echo_tx_timeout_clocks = now; + } + BFD_DBG ("Next echo transmit in %lu clocks/%.02fs@%lu", + bs->echo_tx_timeout_clocks - now, + (bs->echo_tx_timeout_clocks - now) / bm->cpu_cps, + bs->echo_tx_timeout_clocks); +} + static void bfd_recalc_detection_time (bfd_main_t * bm, bfd_session_t * bs) { - if (!bs->local_demand) - { - /* asynchronous mode */ - bs->detection_time_clocks = - bs->remote_detect_mult * - clib_max (bs->effective_required_min_rx_clocks, - bs->remote_desired_min_tx_clocks); - } - else - { - /* demand mode */ - bs->detection_time_clocks = - bs->local_detect_mult * clib_max (bs->config_desired_min_tx_clocks, - bs->remote_min_rx_clocks); - } + bs->detection_time_clocks = + bs->remote_detect_mult * clib_max (bs->effective_required_min_rx_clocks, + bs->remote_desired_min_tx_clocks); BFD_DBG ("Recalculated detection time %lu clocks/%.2fs", bs->detection_time_clocks, bs->detection_time_clocks / bm->cpu_cps); @@ -220,25 +279,37 @@ bfd_set_timer (bfd_main_t * bm, bfd_session_t * bs, u64 now, { u64 next = 0; u64 rx_timeout = 0; + u64 tx_timeout = 0; if (BFD_STATE_up == bs->local_state) { rx_timeout = bs->last_rx_clocks + bs->detection_time_clocks; } - if (bs->tx_timeout_clocks && rx_timeout) + if (BFD_STATE_up != bs->local_state || !bs->remote_demand || + BFD_POLL_NOT_NEEDED != bs->poll_state) { - next = clib_min (bs->tx_timeout_clocks, rx_timeout); + tx_timeout = bs->tx_timeout_clocks; } - else if (bs->tx_timeout_clocks) + if (tx_timeout && rx_timeout) { - next = bs->tx_timeout_clocks; + next = clib_min (tx_timeout, rx_timeout); + } + else if (tx_timeout) + { + next = tx_timeout; } else if (rx_timeout) { next = rx_timeout; } - BFD_DBG ("bs_idx=%u, tx_timeout=%lu, rx_timeout=%lu, next=%s", bs->bs_idx, - bs->tx_timeout_clocks, rx_timeout, - next == bs->tx_timeout_clocks ? "tx" : "rx"); + if (bs->echo && next > bs->echo_tx_timeout_clocks) + { + next = bs->echo_tx_timeout_clocks; + } + BFD_DBG ("bs_idx=%u, tx_timeout=%lu, echo_tx_timeout=%lu, rx_timeout=%lu, " + "next=%s", + bs->bs_idx, tx_timeout, bs->echo_tx_timeout_clocks, rx_timeout, + next == tx_timeout + ? "tx" : (next == bs->echo_tx_timeout_clocks ? "echo tx" : "rx")); /* sometimes the wheel expires an event a bit sooner than requested, account for that here */ if (next && (now + bm->wheel_inaccuracy > bs->wheel_time_clocks || @@ -271,6 +342,7 @@ bfd_set_effective_desired_min_tx (bfd_main_t * bm, BFD_CLK_PRN (bs->effective_desired_min_tx_clocks)); bfd_recalc_detection_time (bm, bs); bfd_recalc_tx_interval (bm, bs); + bfd_recalc_echo_tx_interval (bm, bs); bfd_calc_next_tx (bm, bs, now); } @@ -287,25 +359,40 @@ bfd_set_effective_required_min_rx (bfd_main_t * bm, static void bfd_set_remote_required_min_rx (bfd_main_t * bm, bfd_session_t * bs, - u64 now, - u32 remote_required_min_rx_usec, - int handling_wakeup) + u64 now, u32 remote_required_min_rx_usec) { - bs->remote_min_rx_usec = remote_required_min_rx_usec; - bs->remote_min_rx_clocks = - bfd_usec_to_clocks (bm, remote_required_min_rx_usec); - BFD_DBG ("Set remote min rx to " BFD_CLK_FMT, - BFD_CLK_PRN (bs->remote_min_rx_clocks)); - bfd_recalc_detection_time (bm, bs); - bfd_recalc_tx_interval (bm, bs); - bfd_calc_next_tx (bm, bs, now); - bfd_set_timer (bm, bs, now, handling_wakeup); + if (bs->remote_min_rx_usec != remote_required_min_rx_usec) + { + bs->remote_min_rx_usec = remote_required_min_rx_usec; + bs->remote_min_rx_clocks = + bfd_usec_to_clocks (bm, remote_required_min_rx_usec); + BFD_DBG ("Set remote min rx to " BFD_CLK_FMT, + BFD_CLK_PRN (bs->remote_min_rx_clocks)); + bfd_recalc_detection_time (bm, bs); + bfd_recalc_tx_interval (bm, bs); + } +} + +static void +bfd_set_remote_required_min_echo_rx (bfd_main_t * bm, bfd_session_t * bs, + u64 now, + u32 remote_required_min_echo_rx_usec) +{ + if (bs->remote_min_echo_rx_usec != remote_required_min_echo_rx_usec) + { + bs->remote_min_echo_rx_usec = remote_required_min_echo_rx_usec; + bs->remote_min_echo_rx_clocks = + bfd_usec_to_clocks (bm, bs->remote_min_echo_rx_usec); + BFD_DBG ("Set remote min echo rx to " BFD_CLK_FMT, + BFD_CLK_PRN (bs->remote_min_echo_rx_clocks)); + bfd_recalc_echo_tx_interval (bm, bs); + } } void bfd_session_start (bfd_main_t * bm, bfd_session_t * bs) { - BFD_DBG ("%U", format_bfd_session, bs); + BFD_DBG ("\nStarting session: %U", format_bfd_session, bs); bfd_recalc_tx_interval (bm, bs); vlib_process_signal_event (bm->vlib_main, bm->bfd_process_node_index, BFD_EVENT_NEW_SESSION, bs->bs_idx); @@ -418,11 +505,12 @@ static void bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, int handling_wakeup) { - BFD_DBG ("State changed: %U", format_bfd_session, bs); + BFD_DBG ("\nState changed: %U", format_bfd_session, bs); bfd_event (bm, bs); switch (bs->local_state) { case BFD_STATE_admin_down: + bs->echo = 0; bfd_set_effective_desired_min_tx (bm, bs, now, clib_max (bs->config_desired_min_tx_clocks, @@ -432,6 +520,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, bfd_set_timer (bm, bs, now, handling_wakeup); break; case BFD_STATE_down: + bs->echo = 0; bfd_set_effective_desired_min_tx (bm, bs, now, clib_max (bs->config_desired_min_tx_clocks, @@ -441,6 +530,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, bfd_set_timer (bm, bs, now, handling_wakeup); break; case BFD_STATE_init: + bs->echo = 0; bfd_set_effective_desired_min_tx (bm, bs, now, bs->config_desired_min_tx_clocks); bfd_set_timer (bm, bs, now, handling_wakeup); @@ -448,7 +538,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, case BFD_STATE_up: bfd_set_effective_desired_min_tx (bm, bs, now, bs->config_desired_min_tx_clocks); - if (POLL_NOT_NEEDED == bs->poll_state) + if (BFD_POLL_NOT_NEEDED == bs->poll_state) { bfd_set_effective_required_min_rx (bm, bs, now, bs->config_required_min_rx_clocks); @@ -462,13 +552,14 @@ static void bfd_on_config_change (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm, bfd_session_t * bs, u64 now) { - if (bs->remote_demand) - { - /* TODO - initiate poll sequence here */ - } - else + /* + * if remote demand mode is set and we need to do a poll, set the next + * timeout so that the session wakes up immediately + */ + if (bs->remote_demand && BFD_POLL_NEEDED == bs->poll_state && + bs->poll_state_start_or_timeout_clocks < now) { - /* asynchronous - poll is part of periodic - nothing to do here */ + bs->tx_timeout_clocks = now; } bfd_recalc_detection_time (bm, bs); bfd_set_timer (bm, bs, now, 0); @@ -482,27 +573,36 @@ bfd_add_transport_layer (vlib_main_t * vm, vlib_buffer_t * b, { case BFD_TRANSPORT_UDP4: BFD_DBG ("Transport bfd via udp4, bs_idx=%u", bs->bs_idx); - bfd_add_udp4_transport (vm, b, bs); + bfd_add_udp4_transport (vm, b, bs, 0 /* is_echo */ ); break; case BFD_TRANSPORT_UDP6: BFD_DBG ("Transport bfd via udp6, bs_idx=%u", bs->bs_idx); - bfd_add_udp6_transport (vm, b, bs); + bfd_add_udp6_transport (vm, b, bs, 0 /* is_echo */ ); break; } } -static vlib_buffer_t * -bfd_create_frame_to_next_node (vlib_main_t * vm, bfd_session_t * bs) +static int +bfd_echo_add_transport_layer (vlib_main_t * vm, vlib_buffer_t * b, + bfd_session_t * bs) { - u32 bi; - if (vlib_buffer_alloc (vm, &bi, 1) != 1) + switch (bs->transport) { - clib_warning ("buffer allocation failure"); - return NULL; + case BFD_TRANSPORT_UDP4: + BFD_DBG ("Transport bfd echo via udp4, bs_idx=%u", bs->bs_idx); + return bfd_add_udp4_transport (vm, b, bs, 1 /* is_echo */ ); + break; + case BFD_TRANSPORT_UDP6: + BFD_DBG ("Transport bfd echo via udp6, bs_idx=%u", bs->bs_idx); + return bfd_add_udp6_transport (vm, b, bs, 1 /* is_echo */ ); + break; } + return 0; +} - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - ASSERT (b->current_data == 0); +static void +bfd_create_frame_to_next_node (vlib_main_t * vm, bfd_session_t * bs, u32 bi) +{ vlib_frame_t *f = vlib_get_frame_to_node (vm, bfd_node_index_by_transport[bs->transport]); @@ -510,9 +610,7 @@ bfd_create_frame_to_next_node (vlib_main_t * vm, bfd_session_t * bs) u32 *to_next = vlib_frame_vector_args (f); to_next[0] = bi; f->n_vectors = 1; - vlib_put_frame_to_node (vm, bfd_node_index_by_transport[bs->transport], f); - return b; } #if WITH_LIBSSL > 0 @@ -583,45 +681,118 @@ bfd_add_auth_section (vlib_buffer_t * b, bfd_session_t * bs) } } +static int +bfd_is_echo_possible (bfd_session_t * bs) +{ + if (BFD_STATE_up == bs->local_state && BFD_STATE_up == bs->remote_state && + bs->remote_min_echo_rx_usec > 0) + { + switch (bs->transport) + { + case BFD_TRANSPORT_UDP4: + return bfd_udp_is_echo_available (BFD_TRANSPORT_UDP4); + case BFD_TRANSPORT_UDP6: + return bfd_udp_is_echo_available (BFD_TRANSPORT_UDP6); + } + } + return 0; +} + static void -bfd_init_control_frame (vlib_buffer_t * b, bfd_session_t * bs) +bfd_init_control_frame (bfd_main_t * bm, bfd_session_t * bs, + vlib_buffer_t * b) { bfd_pkt_t *pkt = vlib_buffer_get_current (b); - u32 bfd_length = 0; bfd_length = sizeof (bfd_pkt_t); memset (pkt, 0, sizeof (*pkt)); bfd_pkt_set_version (pkt, 1); bfd_pkt_set_diag_code (pkt, bs->local_diag); bfd_pkt_set_state (pkt, bs->local_state); - if (bs->local_demand && BFD_STATE_up == bs->local_state && - BFD_STATE_up == bs->remote_state) - { - bfd_pkt_set_demand (pkt); - } pkt->head.detect_mult = bs->local_detect_mult; pkt->head.length = clib_host_to_net_u32 (bfd_length); pkt->my_disc = bs->local_discr; pkt->your_disc = bs->remote_discr; pkt->des_min_tx = clib_host_to_net_u32 (bs->config_desired_min_tx_usec); - pkt->req_min_rx = clib_host_to_net_u32 (bs->config_required_min_rx_usec); + if (bs->echo) + { + pkt->req_min_rx = + clib_host_to_net_u32 (bfd_clocks_to_usec + (bm, bs->effective_required_min_rx_clocks)); + } + else + { + pkt->req_min_rx = + clib_host_to_net_u32 (bs->config_required_min_rx_usec); + } pkt->req_min_echo_rx = clib_host_to_net_u32 (1); b->current_length = bfd_length; } +static void +bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt, + bfd_main_t * bm, bfd_session_t * bs, u64 now, + int handling_wakeup) +{ + if (!bfd_is_echo_possible (bs)) + { + BFD_DBG ("\nSwitching off echo function: %U", format_bfd_session, bs); + bs->echo = 0; + return; + } + /* sometimes the wheel expires an event a bit sooner than requested, account + for that here */ + if (now + bm->wheel_inaccuracy >= bs->echo_tx_timeout_clocks) + { + BFD_DBG ("\nSending echo packet: %U", format_bfd_session, bs); + u32 bi; + if (vlib_buffer_alloc (vm, &bi, 1) != 1) + { + clib_warning ("buffer allocation failure"); + return; + } + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + ASSERT (b->current_data == 0); + bfd_echo_pkt_t *pkt = vlib_buffer_get_current (b); + memset (pkt, 0, sizeof (*pkt)); + pkt->discriminator = bs->local_discr; + pkt->expire_time_clocks = + now + bs->echo_transmit_interval_clocks * bs->local_detect_mult; + pkt->checksum = + bfd_calc_echo_checksum (bs->local_discr, pkt->expire_time_clocks, + bs->echo_secret); + b->current_length = sizeof (*pkt); + if (!bfd_echo_add_transport_layer (vm, b, bs)) + { + BFD_ERR ("cannot send echo packet out, turning echo off"); + bs->echo = 0; + vlib_buffer_free_one (vm, bi); + return; + } + bs->echo_last_tx_clocks = now; + bfd_calc_next_echo_tx (bm, bs, now); + bfd_create_frame_to_next_node (vm, bs, bi); + } + else + { + BFD_DBG + ("No need to send echo packet now, now is %lu, tx_timeout is %lu", + now, bs->echo_tx_timeout_clocks); + } +} + static void bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm, bfd_session_t * bs, u64 now, int handling_wakeup) { - if (!bs->remote_min_rx_usec) + if (!bs->remote_min_rx_usec && BFD_POLL_NOT_NEEDED == bs->poll_state) { - BFD_DBG - ("bfd.RemoteMinRxInterval is zero, not sending periodic control " - "frame"); + BFD_DBG ("Remote min rx interval is zero, not sending periodic control " + "frame"); return; } - if (POLL_NOT_NEEDED == bs->poll_state && bs->remote_demand && + if (BFD_POLL_NOT_NEEDED == bs->poll_state && bs->remote_demand && BFD_STATE_up == bs->local_state && BFD_STATE_up == bs->remote_state) { /* @@ -630,33 +801,52 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, * bfd.SessionState is Up, and bfd.RemoteSessionState is Up) and a Poll * Sequence is not being transmitted. */ - BFD_DBG ("bfd.RemoteDemand is non-zero, not sending periodic control " - "frame"); + BFD_DBG ("Remote demand is set, not sending periodic control frame"); return; } /* sometimes the wheel expires an event a bit sooner than requested, account for that here */ if (now + bm->wheel_inaccuracy >= bs->tx_timeout_clocks) { - BFD_DBG ("Send periodic control frame for bs_idx=%lu: %U", bs->bs_idx, - format_bfd_session, bs); - vlib_buffer_t *b = bfd_create_frame_to_next_node (vm, bs); - if (!b) + BFD_DBG ("\nSending periodic control frame: %U", format_bfd_session, + bs); + u32 bi; + if (vlib_buffer_alloc (vm, &bi, 1) != 1) { + clib_warning ("buffer allocation failure"); return; } - bfd_init_control_frame (b, bs); - if (POLL_NOT_NEEDED != bs->poll_state) + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + ASSERT (b->current_data == 0); + bfd_init_control_frame (bm, bs, b); + switch (bs->poll_state) { - /* here we are either beginning a new poll sequence or retrying .. */ + case BFD_POLL_NEEDED: + if (now < bs->poll_state_start_or_timeout_clocks) + { + BFD_DBG ("Cannot start a poll sequence yet, need to wait " + "for " BFD_CLK_FMT, + BFD_CLK_PRN (bs->poll_state_start_or_timeout_clocks - + now)); + break; + } + bs->poll_state_start_or_timeout_clocks = now; + bfd_set_poll_state (bs, BFD_POLL_IN_PROGRESS); + /* fallthrough */ + case BFD_POLL_IN_PROGRESS: + case BFD_POLL_IN_PROGRESS_AND_QUEUED: bfd_pkt_set_poll (vlib_buffer_get_current (b)); - bs->poll_state = POLL_IN_PROGRESS; BFD_DBG ("Setting poll bit in packet, bs_idx=%u", bs->bs_idx); + break; + case BFD_POLL_NOT_NEEDED: + /* fallthrough */ + break; } bfd_add_auth_section (b, bs); bfd_add_transport_layer (vm, b, bs); bs->last_tx_clocks = now; bfd_calc_next_tx (bm, bs, now); + bfd_create_frame_to_next_node (vm, bs, bi); } else { @@ -664,15 +854,14 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, ("No need to send control frame now, now is %lu, tx_timeout is %lu", now, bs->tx_timeout_clocks); } - bfd_set_timer (bm, bs, now, handling_wakeup); } void bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b, - bfd_session_t * bs) + bfd_main_t * bm, bfd_session_t * bs) { BFD_DBG ("Send final control frame for bs_idx=%lu", bs->bs_idx); - bfd_init_control_frame (b, bs); + bfd_init_control_frame (bm, bs, b); bfd_pkt_set_final (vlib_buffer_get_current (b)); bfd_add_auth_section (b, bs); bfd_add_transport_layer (vm, b, bs); @@ -681,7 +870,7 @@ bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b, * RFC allows to include changes in final frame, so if there were any * pending, we already did that, thus we can clear any pending poll needs */ - bs->poll_state = POLL_NOT_NEEDED; + bfd_set_poll_state (bs, BFD_POLL_NOT_NEEDED); } static void @@ -703,7 +892,16 @@ bfd_check_rx_timeout (bfd_main_t * bm, bfd_session_t * bs, u64 now, * since it is no longer required to maintain previous session state) * and then can transmit at its own rate. */ - bfd_set_remote_required_min_rx (bm, bs, now, 1, handling_wakeup); + bfd_set_remote_required_min_rx (bm, bs, now, 1); + } + else if (bs->echo && + bs->echo_last_rx_clocks + + bs->echo_transmit_interval_clocks * bs->local_detect_mult <= + now + bm->wheel_inaccuracy) + { + BFD_DBG ("Echo rx timeout, session goes down"); + bfd_set_diag (bs, BFD_DIAG_CODE_echo_failed); + bfd_set_state (bm, bs, BFD_STATE_down, handling_wakeup); } } @@ -721,10 +919,30 @@ bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm, bfd_send_periodic (vm, rt, bm, bs, now, 1); break; case BFD_STATE_init: - /* fallthrough */ + bfd_check_rx_timeout (bm, bs, now, 1); + bfd_send_periodic (vm, rt, bm, bs, now, 1); + break; case BFD_STATE_up: bfd_check_rx_timeout (bm, bs, now, 1); + if (BFD_POLL_NOT_NEEDED == bs->poll_state && !bs->echo && + bfd_is_echo_possible (bs)) + { + /* switch on echo function as main detection method now */ + BFD_DBG ("Switching on echo function, bs_idx=%u", bs->bs_idx); + bs->echo = 1; + bs->echo_last_rx_clocks = now; + bs->echo_tx_timeout_clocks = now; + bfd_set_effective_required_min_rx (bm, bs, now, + clib_max + (bm->min_required_min_rx_while_echo_clocks, + bs->config_required_min_rx_clocks)); + bfd_set_poll_state (bs, BFD_POLL_NEEDED); + } bfd_send_periodic (vm, rt, bm, bs, now, 1); + if (bs->echo) + { + bfd_send_echo (vm, rt, bm, bs, now, 1); + } break; } } @@ -822,6 +1040,7 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { bfd_session_t *bs = pool_elt_at_index (bm->sessions, bs_idx); bfd_on_timeout (vm, rt, bm, bs, now); + bfd_set_timer (bm, bs, now, 1); } } if (expired) @@ -894,7 +1113,9 @@ bfd_main_init (vlib_main_t * vm) bm->cpu_cps = vm->clib_time.clocks_per_second; BFD_DBG ("cps is %.2f", bm->cpu_cps); bm->default_desired_min_tx_clocks = - bfd_usec_to_clocks (bm, BFD_DEFAULT_DESIRED_MIN_TX_US); + bfd_usec_to_clocks (bm, BFD_DEFAULT_DESIRED_MIN_TX_USEC); + bm->min_required_min_rx_while_echo_clocks = + bfd_usec_to_clocks (bm, BFD_REQUIRED_MIN_RX_USEC_WHILE_ECHO); const u64 now = clib_cpu_time_now (); timing_wheel_init (&bm->wheel, now, bm->cpu_cps); bm->wheel_inaccuracy = 2 << bm->wheel.log2_clocks_per_bin; @@ -912,14 +1133,28 @@ bfd_main_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (bfd_main_init); bfd_session_t * -bfd_get_session (bfd_main_t * bm, bfd_transport_t t) +bfd_get_session (bfd_main_t * bm, bfd_transport_e t) { bfd_session_t *result; pool_get (bm->sessions, result); memset (result, 0, sizeof (*result)); result->bs_idx = result - bm->sessions; result->transport = t; - result->local_discr = random_u32 (&bm->random_seed); + const unsigned limit = 1000; + unsigned counter = 0; + do + { + result->local_discr = random_u32 (&bm->random_seed); + if (counter > limit) + { + clib_warning ("Couldn't allocate unused session discriminator even " + "after %u tries!", limit); + pool_put (bm->sessions, result); + return NULL; + } + ++counter; + } + while (hash_get (bm->session_by_disc, result->local_discr)); bfd_set_defaults (bm, result); hash_set (bm->session_by_disc, result->local_discr, result->bs_idx); return result; @@ -1372,29 +1607,45 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) bfd_usec_to_clocks (bm, clib_net_to_host_u32 (pkt->des_min_tx)); bs->remote_detect_mult = pkt->head.detect_mult; bfd_set_remote_required_min_rx (bm, bs, now, - clib_net_to_host_u32 (pkt->req_min_rx), 0); - /* FIXME - If the Required Min Echo RX Interval field is zero, the - transmission of Echo packets, if any, MUST cease. - - If a Poll Sequence is being transmitted by the local system and - the Final (F) bit in the received packet is set, the Poll Sequence - MUST be terminated. - */ + clib_net_to_host_u32 (pkt->req_min_rx)); + bfd_set_remote_required_min_echo_rx (bm, bs, now, + clib_net_to_host_u32 + (pkt->req_min_echo_rx)); /* FIXME 6.8.2 */ /* FIXME 6.8.4 */ - if (bs->poll_state == POLL_IN_PROGRESS && bfd_pkt_get_final (pkt)) + if (bfd_pkt_get_final (pkt)) { - bs->poll_state = POLL_NOT_NEEDED; - BFD_DBG ("Poll sequence terminated, bs_idx=%u", bs->bs_idx); - if (BFD_STATE_up == bs->local_state) + if (BFD_POLL_IN_PROGRESS == bs->poll_state) { - bfd_set_effective_required_min_rx (bm, bs, now, - bs->config_required_min_rx_clocks); - bfd_recalc_detection_time (bm, bs); - bfd_set_timer (bm, bs, now, 0); + BFD_DBG ("Poll sequence terminated, bs_idx=%u", bs->bs_idx); + bfd_set_poll_state (bs, BFD_POLL_NOT_NEEDED); + if (BFD_STATE_up == bs->local_state) + { + bfd_set_effective_required_min_rx (bm, bs, now, + clib_max (bs->echo * + bm->min_required_min_rx_while_echo_clocks, + bs->config_required_min_rx_clocks)); + } + } + else if (BFD_POLL_IN_PROGRESS_AND_QUEUED == bs->poll_state) + { + /* + * next poll sequence must be delayed by at least the round trip + * time, so calculate that here + */ + BFD_DBG ("Next poll sequence can commence in " BFD_CLK_FMT, + BFD_CLK_PRN (now - + bs->poll_state_start_or_timeout_clocks)); + bs->poll_state_start_or_timeout_clocks = + now + (now - bs->poll_state_start_or_timeout_clocks); + BFD_DBG + ("Poll sequence terminated, but another is needed, bs_idx=%u", + bs->bs_idx); + bfd_set_poll_state (bs, BFD_POLL_NEEDED); } } + bfd_calc_next_tx (bm, bs, now); + bfd_set_timer (bm, bs, now, 0); if (BFD_STATE_admin_down == bs->local_state) { BFD_DBG ("Session is admin-down, ignoring packet, bs_idx=%u", @@ -1435,52 +1686,75 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) } } -static const char * -bfd_poll_state_string (bfd_poll_state_e state) +int +bfd_consume_echo_pkt (bfd_main_t * bm, vlib_buffer_t * b) { - switch (state) + bfd_echo_pkt_t *pkt = NULL; + if (b->current_length != sizeof (*pkt)) { -#define F(x) \ - case POLL_##x: \ - return "POLL_" #x; - foreach_bfd_poll_state (F) -#undef F + return 0; } - return "UNKNOWN"; + pkt = vlib_buffer_get_current (b); + bfd_session_t *bs = bfd_find_session_by_disc (bm, pkt->discriminator); + if (!bs) + { + return 0; + } + BFD_DBG ("Scanning bfd echo packet, bs_idx=%d", bs->bs_idx); + u64 checksum = + bfd_calc_echo_checksum (bs->local_discr, pkt->expire_time_clocks, + bs->echo_secret); + if (checksum != pkt->checksum) + { + BFD_DBG ("Invalid echo packet, checksum mismatch"); + return 1; + } + u64 now = clib_cpu_time_now (); + if (pkt->expire_time_clocks < now) + { + BFD_DBG ("Stale packet received, expire time %lu < now %lu", + pkt->expire_time_clocks, now); + } + else + { + bs->echo_last_rx_clocks = now; + } + return 1; } u8 * format_bfd_session (u8 * s, va_list * args) { const bfd_session_t *bs = va_arg (*args, bfd_session_t *); - s = format (s, "BFD(%u): bfd.SessionState=%s, " - "bfd.RemoteSessionState=%s, " - "bfd.LocalDiscr=%u, " - "bfd.RemoteDiscr=%u, " - "bfd.LocalDiag=%s, " - "bfd.DesiredMinTxInterval=%u, " - "bfd.RequiredMinRxInterval=%u, " - "bfd.RequiredMinEchoRxInterval=%u, " - "bfd.RemoteMinRxInterval=%u, " - "bfd.DemandMode=%s, " - "bfd.RemoteDemandMode=%s, " - "bfd.DetectMult=%u, " - "Auth: {local-seq-num=%u, " - "remote-seq-num=%u, " - "is-delayed=%s, " - "curr-key=%U, " - "next-key=%U}," - "poll-state: %s", + uword indent = format_get_indent (s); + s = format (s, "bs_idx=%u local-state=%s remote-state=%s\n" + "%Ulocal-discriminator=%u remote-discriminator=%u\n" + "%Ulocal-diag=%s echo-active=%s\n" + "%Udesired-min-tx=%u required-min-rx=%u\n" + "%Urequired-min-echo-rx=%u detect-mult=%u\n" + "%Uremote-min-rx=%u remote-min-echo-rx=%u\n" + "%Uremote-demand=%s poll-state=%s\n" + "%Uauth: local-seq-num=%u remote-seq-num=%u\n" + "%U is-delayed=%s\n" + "%U curr-key=%U\n" + "%U next-key=%U", bs->bs_idx, bfd_state_string (bs->local_state), - bfd_state_string (bs->remote_state), bs->local_discr, - bs->remote_discr, bfd_diag_code_string (bs->local_diag), + bfd_state_string (bs->remote_state), format_white_space, indent, + bs->local_discr, bs->remote_discr, format_white_space, indent, + bfd_diag_code_string (bs->local_diag), + (bs->echo ? "yes" : "no"), format_white_space, indent, bs->config_desired_min_tx_usec, bs->config_required_min_rx_usec, - 1, bs->remote_min_rx_usec, (bs->local_demand ? "yes" : "no"), - (bs->remote_demand ? "yes" : "no"), bs->local_detect_mult, - bs->auth.local_seq_number, bs->auth.remote_seq_number, - (bs->auth.is_delayed ? "yes" : "no"), format_bfd_auth_key, - bs->auth.curr_key, format_bfd_auth_key, bs->auth.next_key, - bfd_poll_state_string (bs->poll_state)); + format_white_space, indent, 1, bs->local_detect_mult, + format_white_space, indent, bs->remote_min_rx_usec, + bs->remote_min_echo_rx_usec, format_white_space, indent, + (bs->remote_demand ? "yes" : "no"), + bfd_poll_state_string (bs->poll_state), format_white_space, + indent, bs->auth.local_seq_number, bs->auth.remote_seq_number, + format_white_space, indent, + (bs->auth.is_delayed ? "yes" : "no"), format_white_space, + indent, format_bfd_auth_key, bs->auth.curr_key, + format_white_space, indent, format_bfd_auth_key, + bs->auth.next_key); return s; } @@ -1537,7 +1811,7 @@ bfd_auth_activate (bfd_session_t * bs, u32 conf_key_id, bs->auth.is_delayed = 0; } ++key->use_count; - BFD_DBG ("Session auth modified: %U", format_bfd_session, bs); + BFD_DBG ("\nSession auth modified: %U", format_bfd_session, bs); return 0; } @@ -1571,7 +1845,7 @@ bfd_auth_deactivate (bfd_session_t * bs, u8 is_delayed) --bs->auth.next_key->use_count; bs->auth.next_key = NULL; } - BFD_DBG ("Session auth modified: %U", format_bfd_session, bs); + BFD_DBG ("\nSession auth modified: %U", format_bfd_session, bs); return 0; #else clib_warning ("SSL missing, cannot deactivate BFD authentication"); @@ -1588,10 +1862,10 @@ bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, bs->config_desired_min_tx_usec != desired_min_tx_usec || bs->config_required_min_rx_usec != required_min_rx_usec) { - BFD_DBG ("Changing session params: %U", format_bfd_session, bs); + BFD_DBG ("\nChanging session params: %U", format_bfd_session, bs); switch (bs->poll_state) { - case POLL_NOT_NEEDED: + case BFD_POLL_NOT_NEEDED: if (BFD_STATE_up == bs->local_state || BFD_STATE_init == bs->local_state) { @@ -1599,21 +1873,26 @@ bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, if (bs->config_desired_min_tx_usec != desired_min_tx_usec || bs->config_required_min_rx_usec != required_min_rx_usec) { - bs->poll_state = POLL_NEEDED; - BFD_DBG ("Set poll state=%s, bs_idx=%u", - bfd_poll_state_string (bs->poll_state), - bs->bs_idx); + bfd_set_poll_state (bs, BFD_POLL_NEEDED); } } break; - case POLL_NEEDED: - /* nothing to do */ + case BFD_POLL_NEEDED: + case BFD_POLL_IN_PROGRESS_AND_QUEUED: + /* + * nothing to do - will be handled in the future poll which is + * already scheduled for execution + */ break; - case POLL_IN_PROGRESS: - /* can't change params now ... */ - BFD_ERR ("Poll in progress, cannot change params for session with " - "bs_idx=%u", bs->bs_idx); - return VNET_API_ERROR_BFD_EAGAIN; + case BFD_POLL_IN_PROGRESS: + /* poll sequence is not needed for detect multiplier change */ + if (bs->config_desired_min_tx_usec != desired_min_tx_usec || + bs->config_required_min_rx_usec != required_min_rx_usec) + { + BFD_DBG ("Poll in progress, queueing extra poll, bs_idx=%u", + bs->bs_idx); + bfd_set_poll_state (bs, BFD_POLL_IN_PROGRESS_AND_QUEUED); + } } bs->local_detect_mult = detect_mult; @@ -1623,7 +1902,7 @@ bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, bs->config_required_min_rx_usec = required_min_rx_usec; bs->config_required_min_rx_clocks = bfd_usec_to_clocks (bm, required_min_rx_usec); - BFD_DBG ("Changed session params: %U", format_bfd_session, bs); + BFD_DBG ("\nChanged session params: %U", format_bfd_session, bs); vlib_process_signal_event (bm->vlib_main, bm->bfd_process_node_index, BFD_EVENT_CONFIG_CHANGED, bs->bs_idx); diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index 14a54d6f..d8063f9d 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -24,17 +24,6 @@ #include #include -#define foreach_bfd_transport(F) \ - F (UDP4, "ip4-rewrite") \ - F (UDP6, "ip6-rewrite") - -typedef enum -{ -#define F(t, n) BFD_TRANSPORT_##t, - foreach_bfd_transport (F) -#undef F -} bfd_transport_t; - #define foreach_bfd_mode(F) \ F (asynchronous) \ F (demand) @@ -64,14 +53,15 @@ typedef struct bfd_auth_type_e auth_type; } bfd_auth_key_t; -#define foreach_bfd_poll_state(F)\ - F(NOT_NEEDED)\ -F(NEEDED)\ -F(IN_PROGRESS) +#define foreach_bfd_poll_state(F) \ + F (NOT_NEEDED) \ + F (NEEDED) \ + F (IN_PROGRESS) \ + F (IN_PROGRESS_AND_QUEUED) typedef enum { -#define F(x) POLL_##x, +#define F(x) BFD_POLL_##x, foreach_bfd_poll_state (F) #undef F } bfd_poll_state_e; @@ -120,21 +110,27 @@ typedef struct bfd_session_s /* remote min rx interval (clocks) */ u64 remote_min_rx_clocks; + /* remote min echo rx interval (microseconds) */ + u64 remote_min_echo_rx_usec; + + /* remote min echo rx interval (clocks) */ + u64 remote_min_echo_rx_clocks; + /* remote desired min tx interval (clocks) */ u64 remote_desired_min_tx_clocks; /* configured detect multiplier */ u8 local_detect_mult; - /* 1 if in demand mode, 0 otherwise */ - u8 local_demand; - /* 1 if remote system sets demand mode, 0 otherwise */ u8 remote_demand; /* remote detect multiplier */ u8 remote_detect_mult; + /* 1 is echo function is active, 0 otherwise */ + u8 echo; + /* set to value of timer in timing wheel, 0 if never set */ u64 wheel_time_clocks; @@ -150,12 +146,33 @@ typedef struct bfd_session_s /* timestamp of last packet received */ u64 last_rx_clocks; + /* transmit interval for echo packets */ + u64 echo_transmit_interval_clocks; + + /* next time at which to transmit echo packet */ + u64 echo_tx_timeout_clocks; + + /* timestamp of last echo packet transmitted */ + u64 echo_last_tx_clocks; + + /* timestamp of last echo packet received */ + u64 echo_last_rx_clocks; + + /* secret used for calculating/checking checksum of echo packets */ + u32 echo_secret; + /* detection time */ u64 detection_time_clocks; /* state info regarding poll sequence */ bfd_poll_state_e poll_state; + /* + * helper for delayed poll sequence - marks either start of running poll + * sequence or timeout, after which we can start the next poll sequnce + */ + u64 poll_state_start_or_timeout_clocks; + /* authentication information */ struct { @@ -191,7 +208,7 @@ typedef struct bfd_session_s } auth; /* transport type for this session */ - bfd_transport_t transport; + bfd_transport_e transport; /* union of transport-specific data */ union @@ -227,6 +244,9 @@ typedef struct /* default desired min tx in clocks */ u64 default_desired_min_tx_clocks; + /* minimum required min rx while echo function is active - clocks */ + u64 min_required_min_rx_while_echo_clocks; + /* for generating random numbers */ u32 random_seed; @@ -268,36 +288,54 @@ enum BFD_EVENT_CONFIG_CHANGED, } bfd_process_event_e; -u8 *bfd_input_format_trace (u8 * s, va_list * args); +/* echo packet structure */ +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + /* local discriminator */ + u32 discriminator; + /* expire time of this packet - clocks */ + u64 expire_time_clocks; + /* checksum - based on discriminator, local secret and expire time */ + u64 checksum; +}) bfd_echo_pkt_t; +/* *INDENT-ON* */ -bfd_session_t *bfd_get_session (bfd_main_t * bm, bfd_transport_t t); +u8 *bfd_input_format_trace (u8 * s, va_list * args); +bfd_session_t *bfd_get_session (bfd_main_t * bm, bfd_transport_e t); void bfd_put_session (bfd_main_t * bm, bfd_session_t * bs); bfd_session_t *bfd_find_session_by_idx (bfd_main_t * bm, uword bs_idx); bfd_session_t *bfd_find_session_by_disc (bfd_main_t * bm, u32 disc); void bfd_session_start (bfd_main_t * bm, bfd_session_t * bs); void bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * bfd, u32 bs_idx); +int bfd_consume_echo_pkt (bfd_main_t * bm, vlib_buffer_t * b); int bfd_verify_pkt_common (const bfd_pkt_t * pkt); int bfd_verify_pkt_auth (const bfd_pkt_t * pkt, u16 pkt_size, bfd_session_t * bs); void bfd_event (bfd_main_t * bm, bfd_session_t * bs); void bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b, - bfd_session_t * bs); + bfd_main_t * bm, bfd_session_t * bs); u8 *format_bfd_session (u8 * s, va_list * args); void bfd_session_set_flags (bfd_session_t * bs, u8 admin_up_down); unsigned bfd_auth_type_supported (bfd_auth_type_e auth_type); vnet_api_error_t bfd_auth_activate (bfd_session_t * bs, u32 conf_key_id, u8 bfd_key_id, u8 is_delayed); vnet_api_error_t bfd_auth_deactivate (bfd_session_t * bs, u8 is_delayed); -vnet_api_error_t -bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, - u32 desired_min_tx_usec, - u32 required_min_rx_usec, u8 detect_mult); +vnet_api_error_t bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, + u8 detect_mult); #define USEC_PER_MS 1000LL #define USEC_PER_SECOND (1000 * USEC_PER_MS) /* default, slow transmission interval for BFD packets, per spec at least 1s */ -#define BFD_DEFAULT_DESIRED_MIN_TX_US USEC_PER_SECOND +#define BFD_DEFAULT_DESIRED_MIN_TX_USEC USEC_PER_SECOND + +/* + * minimum required min rx set locally when echo function is used, per spec + * should be set to at least 1s + */ +#define BFD_REQUIRED_MIN_RX_USEC_WHILE_ECHO USEC_PER_SECOND #endif /* __included_bfd_main_h__ */ diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 8519009d..146faad6 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -27,6 +27,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -38,6 +41,12 @@ typedef struct /* hashmap - bfd session index by bfd key - used for CLI/API lookup, where * discriminator is unknown */ mhash_t bfd_session_idx_by_bfd_key; + /* convenience variable */ + vnet_main_t *vnet_main; + /* flag indicating whether echo_source_sw_if_index holds a valid value */ + int echo_source_is_set; + /* loopback interface used to get echo source ip */ + u32 echo_source_sw_if_index; } bfd_udp_main_t; static vlib_node_registration_t bfd_udp4_input_node; @@ -47,6 +56,80 @@ static vlib_node_registration_t bfd_udp_echo6_input_node; bfd_udp_main_t bfd_udp_main; +vnet_api_error_t +bfd_udp_set_echo_source (u32 sw_if_index) +{ + vnet_sw_interface_t *sw_if = + vnet_get_sw_interface_safe (bfd_udp_main.vnet_main, + bfd_udp_main.echo_source_sw_if_index); + if (sw_if) + { + bfd_udp_main.echo_source_sw_if_index = sw_if_index; + bfd_udp_main.echo_source_is_set = 1; + return 0; + } + return VNET_API_ERROR_BFD_ENOENT; +} + +vnet_api_error_t +bfd_udp_del_echo_source (u32 sw_if_index) +{ + bfd_udp_main.echo_source_sw_if_index = ~0; + bfd_udp_main.echo_source_is_set = 0; + return 0; +} + +int +bfd_udp_is_echo_available (bfd_transport_e transport) +{ + if (!bfd_udp_main.echo_source_is_set) + { + return 0; + } + /* + * for the echo to work, we need a loopback interface with at least one + * address with netmask length at most 31 (ip4) or 127 (ip6) so that we can + * pick an unused address from that subnet + */ + vnet_sw_interface_t *sw_if = + vnet_get_sw_interface_safe (bfd_udp_main.vnet_main, + bfd_udp_main.echo_source_sw_if_index); + if (sw_if && sw_if->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + { + if (BFD_TRANSPORT_UDP4 == transport) + { + ip4_main_t *im = &ip4_main; + ip_interface_address_t *ia = NULL; + /* *INDENT-OFF* */ + foreach_ip_interface_address (&im->lookup_main, ia, + bfd_udp_main.echo_source_sw_if_index, + 0 /* honor unnumbered */, ({ + if (ia->address_length <= 31) + { + return 1; + } + })); + /* *INDENT-ON* */ + } + else if (BFD_TRANSPORT_UDP6 == transport) + { + ip6_main_t *im = &ip6_main; + ip_interface_address_t *ia = NULL; + /* *INDENT-OFF* */ + foreach_ip_interface_address (&im->lookup_main, ia, + bfd_udp_main.echo_source_sw_if_index, + 0 /* honor unnumbered */, ({ + if (ia->address_length <= 127) + { + return 1; + } + })); + /* *INDENT-ON* */ + } + } + return 0; +} + static u16 bfd_udp_bs_idx_to_sport (u32 bs_idx) { @@ -61,9 +144,78 @@ bfd_udp_bs_idx_to_sport (u32 bs_idx) return 49152 + bs_idx % (65535 - 49152 + 1); } -void +static void +lol () +{ +} + +int +bfd_udp_get_echo_src_ip4 (ip4_address_t * addr) +{ + if (!bfd_udp_main.echo_source_is_set) + { + BFD_ERR ("cannot find ip4 address, echo source not set"); + return 0; + } + ip_interface_address_t *ia = NULL; + ip4_main_t *im = &ip4_main; + + /* *INDENT-OFF* */ + foreach_ip_interface_address ( + &im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index, + 0 /* honor unnumbered */, ({ + ip4_address_t *x = + ip_interface_address_get_address (&im->lookup_main, ia); + if (ia->address_length <= 31) + { + addr->as_u32 = clib_host_to_net_u32 (x->as_u32); + /* + * flip the last bit to get a different address, might be network, + * we don't care ... + */ + addr->as_u32 ^= 1; + addr->as_u32 = clib_net_to_host_u32 (addr->as_u32); + return 1; + } + })); + /* *INDENT-ON* */ + BFD_ERR ("cannot find ip4 address, no usable address found"); + return 0; +} + +int +bfd_udp_get_echo_src_ip6 (ip6_address_t * addr) +{ + if (!bfd_udp_main.echo_source_is_set) + { + BFD_ERR ("cannot find ip6 address, echo source not set"); + return 0; + } + ip_interface_address_t *ia = NULL; + ip6_main_t *im = &ip6_main; + + /* *INDENT-OFF* */ + foreach_ip_interface_address ( + &im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index, + 0 /* honor unnumbered */, ({ + ip6_address_t *x = + ip_interface_address_get_address (&im->lookup_main, ia); + if (ia->address_length <= 127) + { + *addr = *x; + addr->as_u8[15] ^= 1; /* flip the last bit of the address */ + lol (); + return 1; + } + })); + /* *INDENT-ON* */ + BFD_ERR ("cannot find ip6 address, no usable address found"); + return 0; +} + +int bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, - const bfd_session_t * bs) + const bfd_session_t * bs, int is_echo) { const bfd_udp_session_t *bus = &bs->udp; const bfd_udp_key_t *key = &bus->key; @@ -83,12 +235,24 @@ bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, headers->ip4.ip_version_and_header_length = 0x45; headers->ip4.ttl = 255; headers->ip4.protocol = IP_PROTOCOL_UDP; - headers->ip4.src_address.as_u32 = key->local_addr.ip4.as_u32; - headers->ip4.dst_address.as_u32 = key->peer_addr.ip4.as_u32; - headers->udp.src_port = clib_host_to_net_u16 (bfd_udp_bs_idx_to_sport (bs->bs_idx)); - headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); + if (is_echo) + { + int rv; + if (!(rv = bfd_udp_get_echo_src_ip4 (&headers->ip4.src_address))) + { + return rv; + } + headers->ip4.dst_address.as_u32 = key->local_addr.ip4.as_u32; + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd_echo4); + } + else + { + headers->ip4.src_address.as_u32 = key->local_addr.ip4.as_u32; + headers->ip4.dst_address.as_u32 = key->peer_addr.ip4.as_u32; + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); + } /* fix ip length, checksum and udp length */ const u16 ip_length = vlib_buffer_length_in_chain (vm, b); @@ -98,11 +262,12 @@ bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, const u16 udp_length = ip_length - (sizeof (headers->ip4)); headers->udp.length = clib_host_to_net_u16 (udp_length); + return 1; } -void +int bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, - const bfd_session_t * bs) + const bfd_session_t * bs, int is_echo) { const bfd_udp_session_t *bus = &bs->udp; const bfd_udp_key_t *key = &bus->key; @@ -123,14 +288,28 @@ bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, clib_host_to_net_u32 (0x6 << 28); headers->ip6.hop_limit = 255; headers->ip6.protocol = IP_PROTOCOL_UDP; - clib_memcpy (&headers->ip6.src_address, &key->local_addr.ip6, - sizeof (headers->ip6.src_address)); - clib_memcpy (&headers->ip6.dst_address, &key->peer_addr.ip6, - sizeof (headers->ip6.dst_address)); - headers->udp.src_port = clib_host_to_net_u16 (bfd_udp_bs_idx_to_sport (bs->bs_idx)); - headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); + if (is_echo) + { + int rv; + if (!(rv = bfd_udp_get_echo_src_ip6 (&headers->ip6.src_address))) + { + return rv; + } + clib_memcpy (&headers->ip6.dst_address, &key->local_addr.ip6, + sizeof (headers->ip6.dst_address)); + + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd_echo6); + } + else + { + clib_memcpy (&headers->ip6.src_address, &key->local_addr.ip6, + sizeof (headers->ip6.src_address)); + clib_memcpy (&headers->ip6.dst_address, &key->peer_addr.ip6, + sizeof (headers->ip6.dst_address)); + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); + } /* fix ip payload length and udp length */ const u16 udp_length = @@ -147,6 +326,7 @@ bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, { headers->udp.checksum = 0xffff; } + return 1; } static bfd_session_t * @@ -182,12 +362,17 @@ bfd_udp_add_session_internal (bfd_udp_main_t * bum, u32 sw_if_index, bfd_session_t ** bs_out) { /* get a pool entry and if we end up not needing it, give it back */ - bfd_transport_t t = BFD_TRANSPORT_UDP4; + bfd_transport_e t = BFD_TRANSPORT_UDP4; if (!ip46_address_is_ip4 (local_addr)) { t = BFD_TRANSPORT_UDP6; } bfd_session_t *bs = bfd_get_session (bum->bfd_main, t); + if (!bs) + { + bfd_put_session (bum->bfd_main, bs); + return VNET_API_ERROR_BFD_EAGAIN; + } bfd_udp_session_t *bus = &bs->udp; memset (bus, 0, sizeof (*bus)); bfd_udp_key_t *key = &bus->key; @@ -213,6 +398,21 @@ bfd_udp_add_session_internal (bfd_udp_main_t * bum, u32 sw_if_index, BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, VNET_LINK_IP4, %U, %d) " "returns %d", format_ip46_address, &key->peer_addr, IP46_TYPE_ANY, key->sw_if_index, bus->adj_index); + + fib_prefix_t fib_prefix; + memset (&fib_prefix, 0, sizeof (fib_prefix)); + fib_prefix.fp_len = 0; + fib_prefix.fp_proto = FIB_PROTOCOL_IP4; + fib_prefix.fp_addr = key->local_addr; + u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, 0); /* FIXME table id 0? */ + dpo_id_t dpo = DPO_INVALID; + dpo_proto_t dproto; + dproto = fib_proto_to_dpo (fib_prefix.fp_proto); + receive_dpo_add_or_lock (dproto, ~0, NULL, &dpo); + fib_table_entry_special_dpo_update (fib_index, &fib_prefix, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_LOCAL, &dpo); + dpo_reset (&dpo); } else { @@ -234,7 +434,7 @@ bfd_udp_validate_api_input (u32 sw_if_index, const ip46_address_t * peer_addr) { vnet_sw_interface_t *sw_if = - vnet_get_sw_interface (vnet_get_main (), sw_if_index); + vnet_get_sw_interface_safe (bfd_udp_main.vnet_main, sw_if_index); u8 local_ip_valid = 0; ip_interface_address_t *ia = NULL; if (!sw_if) @@ -1001,7 +1201,8 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, const bfd_pkt_t *pkt = vlib_buffer_get_current (b0); if (bfd_pkt_get_poll (pkt)) { - bfd_init_final_control_frame (vm, b0, bs); + bfd_init_final_control_frame (vm, b0, bfd_udp_main.bfd_main, + bs); if (is_ipv6) { vlib_node_increment_counter (vm, bfd_udp6_input_node.index, @@ -1081,44 +1282,6 @@ VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = { }; /* *INDENT-ON* */ -/** - * @brief swap the source and destination IP addresses in the packet - */ -static int -bfd_echo_address_swap (vlib_buffer_t * b, int is_ipv6) -{ - udp_header_t *dummy = NULL; - if (is_ipv6) - { - ip6_header_t *ip6 = NULL; - bfd_udp6_find_headers (b, &ip6, &dummy); - if (!ip6) - { - return 0; - } - ip6_address_t tmp = ip6->dst_address; - ip6->dst_address = ip6->src_address; - ip6->src_address = tmp; - vlib_buffer_advance (b, - (u8 *) ip6 - (u8 *) vlib_buffer_get_current (b)); - } - else - { - ip4_header_t *ip4 = NULL; - bfd_udp4_find_headers (b, &ip4, &dummy); - if (!ip4) - { - return 0; - } - ip4_address_t tmp = ip4->dst_address; - ip4->dst_address = ip4->src_address; - ip4->src_address = tmp; - vlib_buffer_advance (b, - (u8 *) ip4 - (u8 *) vlib_buffer_get_current (b)); - } - return 1; -} - /* * Process a frame of bfd echo packets * Expect 1 packet / frame @@ -1153,7 +1316,12 @@ bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt, clib_memcpy (t0->data, vlib_buffer_get_current (b0), len); } - if (bfd_echo_address_swap (b0, is_ipv6)) + if (bfd_consume_echo_pkt (bfd_udp_main.bfd_main, b0)) + { + b0->error = rt->errors[BFD_UDP_ERROR_NONE]; + next0 = BFD_UDP_INPUT_NEXT_NORMAL; + } + else { /* loop back the packet */ b0->error = rt->errors[BFD_UDP_ERROR_NONE]; @@ -1169,11 +1337,6 @@ bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt, } next0 = BFD_UDP_INPUT_NEXT_REPLY; } - else - { - b0->error = rt->errors[BFD_UDP_ERROR_BAD]; - next0 = BFD_UDP_INPUT_NEXT_NORMAL; - } vlib_set_next_frame_buffer (vm, rt, next0, bi0); @@ -1294,6 +1457,7 @@ bfd_udp_init (vlib_main_t * vm) mhash_init (&bfd_udp_main.bfd_session_idx_by_bfd_key, sizeof (uword), sizeof (bfd_udp_key_t)); bfd_udp_main.bfd_main = &bfd_main; + bfd_udp_main.vnet_main = vnet_get_main (); udp_register_dst_port (vm, UDP_DST_PORT_bfd4, bfd_udp4_input_node.index, 1); udp_register_dst_port (vm, UDP_DST_PORT_bfd6, bfd_udp6_input_node.index, 0); udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo4, diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h index 502e2314..ce2ee3cb 100644 --- a/src/vnet/bfd/bfd_udp.h +++ b/src/vnet/bfd/bfd_udp.h @@ -22,6 +22,7 @@ #include #include #include +#include /* *INDENT-OFF* */ typedef CLIB_PACKED (struct { @@ -49,10 +50,17 @@ typedef struct struct bfd_session_s; -void bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, - const struct bfd_session_s *bs); -void bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, - const struct bfd_session_s *bs); +int bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, + const struct bfd_session_s *bs, int is_echo); +int bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, + const struct bfd_session_s *bs, int is_echo); + +/** + * @brief check if the bfd udp layer is echo-capable at this time + * + * @return 1 if available, 0 otherwise + */ +int bfd_udp_is_echo_available (bfd_transport_e transport); #endif /* __included_bfd_udp_h__ */ diff --git a/test/bfd.py b/test/bfd.py index 8bd9f9a3..b467cc79 100644 --- a/test/bfd.py +++ b/test/bfd.py @@ -152,6 +152,27 @@ class BFD(Packet): bind_layers(UDP, BFD, dport=BFD.udp_dport) +class BFD_vpp_echo(Packet): + """ BFD echo packet as used by VPP (non-rfc, as rfc doesn't define one) """ + + udp_dport = 3785 #: BFD echo destination port per RFC 5881 + name = "BFD_VPP_ECHO" + + fields_desc = [ + BitField("discriminator", 0, 32), + BitField("expire_time_clocks", 0, 64), + BitField("checksum", 0, 64) + ] + + def mysummary(self): + return self.sprintf( + "BFD_VPP_ECHO(disc=%BFD_VPP_ECHO.discriminator%," + "expire_time_clocks=%BFD_VPP_ECHO.expire_time_clocks%)") + +# glue the BFD echo packet class to scapy parser +bind_layers(UDP, BFD_vpp_echo, dport=BFD_vpp_echo.udp_dport) + + class VppBFDAuthKey(VppObject): """ Represents BFD authentication key in VPP """ diff --git a/test/framework.py b/test/framework.py index 90e0574a..3bbd37d5 100644 --- a/test/framework.py +++ b/test/framework.py @@ -574,7 +574,7 @@ class VppTestCase(unittest.TestCase): def assert_equal(self, real_value, expected_value, name_or_class=None): if name_or_class is None: - self.assertEqual(real_value, expected_value, msg) + self.assertEqual(real_value, expected_value) return try: msg = "Invalid %s: %d('%s') does not match expected value %d('%s')" diff --git a/test/test_bfd.py b/test/test_bfd.py index 68baf837..ce0cca55 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -6,14 +6,14 @@ import unittest import hashlib import binascii import time -from random import randint, shuffle +from random import randint, shuffle, getrandbits from socket import AF_INET, AF_INET6 from scapy.packet import Raw from scapy.layers.l2 import Ether from scapy.layers.inet import UDP, IP from scapy.layers.inet6 import IPv6 from bfd import VppBFDAuthKey, BFD, BFDAuthType, VppBFDUDPSession, \ - BFDDiagCode, BFDState + BFDDiagCode, BFDState, BFD_vpp_echo from framework import VppTestCase, VppTestRunner from vpp_pg_interface import CaptureTimeoutError from util import ppp @@ -266,6 +266,7 @@ class BFDTestSession(object): self.my_discriminator = 0 self.desired_min_tx = 100000 self.required_min_rx = 100000 + self.required_min_echo_rx = None self.detect_mult = detect_mult self.diag = BFDDiagCode.no_diagnostic self.your_discriminator = None @@ -280,24 +281,27 @@ class BFDTestSession(object): self.our_seq_number += 1 def update(self, my_discriminator=None, your_discriminator=None, - desired_min_tx=None, required_min_rx=None, detect_mult=None, + desired_min_tx=None, required_min_rx=None, + required_min_echo_rx=None, detect_mult=None, diag=None, state=None, auth_type=None): """ update BFD parameters associated with session """ - if my_discriminator: + if my_discriminator is not None: self.my_discriminator = my_discriminator - if your_discriminator: + if your_discriminator is not None: self.your_discriminator = your_discriminator - if required_min_rx: + if required_min_rx is not None: self.required_min_rx = required_min_rx - if desired_min_tx: + if required_min_echo_rx is not None: + self.required_min_echo_rx = required_min_echo_rx + if desired_min_tx is not None: self.desired_min_tx = desired_min_tx - if detect_mult: + if detect_mult is not None: self.detect_mult = detect_mult - if diag: + if diag is not None: self.diag = diag - if state: + if state is not None: self.state = state - if auth_type: + if auth_type is not None: self.auth_type = auth_type def fill_packet_fields(self, packet): @@ -316,6 +320,11 @@ class BFDTestSession(object): "BFD: setting packet.required_min_rx_interval=%s", self.required_min_rx) bfd.required_min_rx_interval = self.required_min_rx + if self.required_min_echo_rx: + self.test.logger.debug( + "BFD: setting packet.required_min_echo_rx=%s", + self.required_min_echo_rx) + bfd.required_min_echo_rx_interval = self.required_min_echo_rx if self.desired_min_tx: self.test.logger.debug( "BFD: setting packet.desired_min_tx_interval=%s", @@ -579,6 +588,10 @@ class BFD4TestCase(VppTestCase): super(BFD4TestCase, cls).setUpClass() try: cls.create_pg_interfaces([0]) + cls.create_loopback_interfaces([0]) + cls.loopback0 = cls.lo_interfaces[0] + cls.loopback0.config_ip4() + cls.loopback0.admin_up() cls.pg0.config_ip4() cls.pg0.configure_ipv4_neighbors() cls.pg0.admin_up() @@ -646,32 +659,29 @@ class BFD4TestCase(VppTestCase): bfd_session_up(self) self.test_session.update(required_min_rx=0) self.test_session.send_packet() - cap = 2 * self.vpp_session.desired_min_tx *\ - self.test_session.detect_mult - time_mark = time.time() - count = 0 - # busy wait here, trying to collect a packet or event, vpp is not - # allowed to send packets and the session will timeout first - so the - # Up->Down event must arrive before any packets do - while time.time() < time_mark + cap / USEC_IN_SEC: + for dummy in range(self.test_session.detect_mult): + self.sleep(self.vpp_session.required_min_rx / USEC_IN_SEC, + "sleep before transmitting bfd packet") + self.test_session.send_packet() try: - p = wait_for_bfd_packet( - self, timeout=0, - pcap_time_min=time_mark - self.vpp_clock_offset) + p = wait_for_bfd_packet(self, timeout=0) self.logger.error(ppp("Received unexpected packet:", p)) - count += 1 except CaptureTimeoutError: pass - events = self.vapi.collect_events() - if len(events) > 0: - verify_event(self, events[0], BFDState.down) - break - self.assert_equal(count, 0, "number of packets received") + self.assert_equal( + len(self.vapi.collect_events()), 0, "number of bfd events") + self.test_session.update(required_min_rx=100000) + for dummy in range(3): + self.test_session.send_packet() + wait_for_bfd_packet( + self, timeout=self.test_session.required_min_rx / USEC_IN_SEC) + self.assert_equal( + len(self.vapi.collect_events()), 0, "number of bfd events") def test_conn_down(self): """ verify session goes down after inactivity """ bfd_session_up(self) - detection_time = self.vpp_session.detect_mult *\ + detection_time = self.test_session.detect_mult *\ self.vpp_session.required_min_rx / USEC_IN_SEC self.sleep(detection_time, "waiting for BFD session time-out") e = self.vapi.wait_for_event(1, "bfd_udp_session_details") @@ -799,7 +809,7 @@ class BFD4TestCase(VppTestCase): before = time.time() e = self.vapi.wait_for_event(1, "bfd_udp_session_details") after = time.time() - detection_time = self.vpp_session.detect_mult *\ + detection_time = self.test_session.detect_mult *\ self.vpp_session.required_min_rx / USEC_IN_SEC self.assert_in_range(after - before, 0.9 * detection_time, @@ -830,6 +840,71 @@ class BFD4TestCase(VppTestCase): self.assertNotIn("P", p.sprintf("%BFD.flags%"), "Poll bit not set in BFD packet") + def test_queued_poll(self): + """ test poll sequence queueing """ + bfd_session_up(self) + p = wait_for_bfd_packet(self) + self.vpp_session.modify_parameters( + required_min_rx=2 * self.vpp_session.required_min_rx) + p = wait_for_bfd_packet(self) + poll_sequence_start = time.time() + poll_sequence_length_min = 0.5 + send_final_after = time.time() + poll_sequence_length_min + # poll bit needs to be set + self.assertIn("P", p.sprintf("%BFD.flags%"), + "Poll bit not set in BFD packet") + self.assert_equal(p[BFD].required_min_rx_interval, + self.vpp_session.required_min_rx, + "BFD required min rx interval") + self.vpp_session.modify_parameters( + required_min_rx=2 * self.vpp_session.required_min_rx) + # 2nd poll sequence should be queued now + # don't send the reply back yet, wait for some time to emulate + # longer round-trip time + packet_count = 0 + while time.time() < send_final_after: + self.test_session.send_packet() + p = wait_for_bfd_packet(self) + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + self.assert_equal(p[BFD].required_min_rx_interval, + self.vpp_session.required_min_rx, + "BFD required min rx interval") + packet_count += 1 + # poll bit must be set + self.assertIn("P", p.sprintf("%BFD.flags%"), + "Poll bit not set in BFD packet") + final = self.test_session.create_packet() + final[BFD].flags = "F" + self.test_session.send_packet(final) + # finish 1st with final + poll_sequence_length = time.time() - poll_sequence_start + # vpp must wait for some time before starting new poll sequence + poll_no_2_started = False + for dummy in range(2 * packet_count): + p = wait_for_bfd_packet(self) + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + if "P" in p.sprintf("%BFD.flags%"): + poll_no_2_started = True + if time.time() < poll_sequence_start + poll_sequence_length: + raise Exception("VPP started 2nd poll sequence too soon") + final = self.test_session.create_packet() + final[BFD].flags = "F" + self.test_session.send_packet(final) + break + else: + self.test_session.send_packet() + self.assertTrue(poll_no_2_started, "2nd poll sequence not performed") + # finish 2nd with final + final = self.test_session.create_packet() + final[BFD].flags = "F" + self.test_session.send_packet(final) + p = wait_for_bfd_packet(self) + # poll bit must not be set + self.assertNotIn("P", p.sprintf("%BFD.flags%"), + "Poll bit set in BFD packet") + def test_no_periodic_if_remote_demand(self): """ no periodic frames outside poll sequence if remote demand set """ bfd_session_up(self) @@ -868,7 +943,7 @@ class BFD4TestCase(VppTestCase): echo_packet = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / IP(src=self.pg0.remote_ip4, - dst=self.pg0.local_ip4) / + dst=self.pg0.remote_ip4) / UDP(dport=BFD.udp_dport_echo) / Raw("this should be looped back")) for dummy in range(echo_packet_count): @@ -887,18 +962,236 @@ class BFD4TestCase(VppTestCase): self.assert_equal(self.pg0.local_mac, ether.src, "Source MAC") ip = p[IP] self.assert_equal(self.pg0.remote_ip4, ip.dst, "Destination IP") - self.assert_equal(self.pg0.local_ip4, ip.src, "Destination IP") + self.assert_equal(self.pg0.remote_ip4, ip.src, "Destination IP") udp = p[UDP] self.assert_equal(udp.dport, BFD.udp_dport_echo, "UDP destination port") self.assert_equal(udp.sport, udp_sport_rx, "UDP source port") udp_sport_rx += 1 - self.assertTrue(p.haslayer(Raw) and p[Raw] == echo_packet[Raw], - "Received packet is not the echo packet sent") + # need to compare the hex payload here, otherwise BFD_vpp_echo + # gets in way + self.assertEqual(str(p[UDP].payload), + str(echo_packet[UDP].payload), + "Received packet is not the echo packet sent") self.assert_equal(udp_sport_tx, udp_sport_rx, "UDP source port (== " "ECHO packet identifier for test purposes)") + def test_echo(self): + """ echo function """ + bfd_session_up(self) + self.test_session.update(required_min_echo_rx=50000) + self.test_session.send_packet() + detection_time = self.test_session.detect_mult *\ + self.vpp_session.required_min_rx / USEC_IN_SEC + # echo shouldn't work without echo source set + for dummy in range(3): + sleep = 0.75 * detection_time + self.sleep(sleep, "delay before sending bfd packet") + self.test_session.send_packet() + p = wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) + self.assert_equal(p[BFD].required_min_rx_interval, + self.vpp_session.required_min_rx, + "BFD required min rx interval") + self.vapi.bfd_udp_set_echo_source(self.loopback0.sw_if_index) + # should be turned on - loopback echo packets + for dummy in range(3): + loop_until = time.time() + 0.75 * detection_time + while time.time() < loop_until: + p = self.pg0.wait_for_packet(1) + self.logger.debug(ppp("Got packet:", p)) + if p[UDP].dport == BFD.udp_dport_echo: + self.assert_equal( + p[IP].dst, self.pg0.local_ip4, "BFD ECHO dst IP") + self.assertNotEqual(p[IP].src, self.loopback0.local_ip4, + "BFD ECHO src IP equal to loopback IP") + self.logger.debug(ppp("Looping back packet:", p)) + self.pg0.add_stream(p) + self.pg_start() + elif p.haslayer(BFD): + self.assertGreaterEqual(p[BFD].required_min_rx_interval, + 1000000) + if "P" in p.sprintf("%BFD.flags%"): + final = self.test_session.create_packet() + final[BFD].flags = "F" + self.test_session.send_packet(final) + else: + raise Exception(ppp("Received unknown packet:", p)) + + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + self.test_session.send_packet() + + def test_echo_fail(self): + """ session goes down if echo function fails """ + bfd_session_up(self) + self.test_session.update(required_min_echo_rx=50000) + self.test_session.send_packet() + detection_time = self.test_session.detect_mult *\ + self.vpp_session.required_min_rx / USEC_IN_SEC + self.vapi.bfd_udp_set_echo_source(self.loopback0.sw_if_index) + # echo function should be used now, but we will drop the echo packets + verified_diag = False + for dummy in range(3): + loop_until = time.time() + 0.75 * detection_time + while time.time() < loop_until: + p = self.pg0.wait_for_packet(1) + self.logger.debug(ppp("Got packet:", p)) + if p[UDP].dport == BFD.udp_dport_echo: + # dropped + pass + elif p.haslayer(BFD): + if "P" in p.sprintf("%BFD.flags%"): + self.assertGreaterEqual( + p[BFD].required_min_rx_interval, + 1000000) + final = self.test_session.create_packet() + final[BFD].flags = "F" + self.test_session.send_packet(final) + if p[BFD].state == BFDState.down: + self.assert_equal(p[BFD].diag, + BFDDiagCode.echo_function_failed, + BFDDiagCode) + verified_diag = True + else: + raise Exception(ppp("Received unknown packet:", p)) + self.test_session.send_packet() + events = self.vapi.collect_events() + self.assert_equal(len(events), 1, "number of bfd events") + self.assert_equal(events[0].state, BFDState.down, BFDState) + self.assertTrue(verified_diag, "Incorrect diagnostics code received") + + def test_echo_stop(self): + """ echo function stops if peer sets required min echo rx zero """ + bfd_session_up(self) + self.test_session.update(required_min_echo_rx=50000) + self.test_session.send_packet() + self.vapi.bfd_udp_set_echo_source(self.loopback0.sw_if_index) + # wait for first echo packet + while True: + p = self.pg0.wait_for_packet(1) + self.logger.debug(ppp("Got packet:", p)) + if p[UDP].dport == BFD.udp_dport_echo: + self.logger.debug(ppp("Looping back packet:", p)) + self.pg0.add_stream(p) + self.pg_start() + break + elif p.haslayer(BFD): + # ignore BFD + pass + else: + raise Exception(ppp("Received unknown packet:", p)) + self.test_session.update(required_min_echo_rx=0) + self.test_session.send_packet() + # echo packets shouldn't arrive anymore + for dummy in range(5): + wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) + self.test_session.send_packet() + events = self.vapi.collect_events() + self.assert_equal(len(events), 0, "number of bfd events") + + def test_stale_echo(self): + """ stale echo packets don't keep a session up """ + bfd_session_up(self) + self.test_session.update(required_min_echo_rx=50000) + self.vapi.bfd_udp_set_echo_source(self.loopback0.sw_if_index) + self.test_session.send_packet() + # should be turned on - loopback echo packets + echo_packet = None + timeout_at = None + timeout_ok = False + for dummy in range(10 * self.vpp_session.detect_mult): + p = self.pg0.wait_for_packet(1) + if p[UDP].dport == BFD.udp_dport_echo: + if echo_packet is None: + self.logger.debug(ppp("Got first echo packet:", p)) + echo_packet = p + timeout_at = time.time() + self.vpp_session.detect_mult * \ + self.test_session.required_min_echo_rx / USEC_IN_SEC + else: + self.logger.debug(ppp("Got followup echo packet:", p)) + self.logger.debug(ppp("Looping back first echo packet:", p)) + self.pg0.add_stream(echo_packet) + self.pg_start() + elif p.haslayer(BFD): + self.logger.debug(ppp("Got packet:", p)) + if "P" in p.sprintf("%BFD.flags%"): + final = self.test_session.create_packet() + final[BFD].flags = "F" + self.test_session.send_packet(final) + if p[BFD].state == BFDState.down: + self.assertIsNotNone( + timeout_at, + "Session went down before first echo packet received") + now = time.time() + self.assertGreaterEqual( + now, timeout_at, + "Session timeout at %s, but is expected at %s" % + (now, timeout_at)) + self.assert_equal(p[BFD].diag, + BFDDiagCode.echo_function_failed, + BFDDiagCode) + events = self.vapi.collect_events() + self.assert_equal(len(events), 1, "number of bfd events") + self.assert_equal(events[0].state, BFDState.down, BFDState) + timeout_ok = True + break + else: + raise Exception(ppp("Received unknown packet:", p)) + self.test_session.send_packet() + self.assertTrue(timeout_ok, "Expected timeout event didn't occur") + + def test_invalid_echo_checksum(self): + """ echo packets with invalid checksum don't keep a session up """ + bfd_session_up(self) + self.test_session.update(required_min_echo_rx=50000) + self.vapi.bfd_udp_set_echo_source(self.loopback0.sw_if_index) + self.test_session.send_packet() + # should be turned on - loopback echo packets + timeout_at = None + timeout_ok = False + for dummy in range(10 * self.vpp_session.detect_mult): + p = self.pg0.wait_for_packet(1) + if p[UDP].dport == BFD.udp_dport_echo: + self.logger.debug(ppp("Got echo packet:", p)) + if timeout_at is None: + timeout_at = time.time() + self.vpp_session.detect_mult * \ + self.test_session.required_min_echo_rx / USEC_IN_SEC + p[BFD_vpp_echo].checksum = getrandbits(64) + self.logger.debug(ppp("Looping back modified echo packet:", p)) + self.pg0.add_stream(p) + self.pg_start() + elif p.haslayer(BFD): + self.logger.debug(ppp("Got packet:", p)) + if "P" in p.sprintf("%BFD.flags%"): + final = self.test_session.create_packet() + final[BFD].flags = "F" + self.test_session.send_packet(final) + if p[BFD].state == BFDState.down: + self.assertIsNotNone( + timeout_at, + "Session went down before first echo packet received") + now = time.time() + self.assertGreaterEqual( + now, timeout_at, + "Session timeout at %s, but is expected at %s" % + (now, timeout_at)) + self.assert_equal(p[BFD].diag, + BFDDiagCode.echo_function_failed, + BFDDiagCode) + events = self.vapi.collect_events() + self.assert_equal(len(events), 1, "number of bfd events") + self.assert_equal(events[0].state, BFDState.down, BFDState) + timeout_ok = True + break + else: + raise Exception(ppp("Received unknown packet:", p)) + self.test_session.send_packet() + self.assertTrue(timeout_ok, "Expected timeout event didn't occur") + def test_admin_up_down(self): + """ put session admin-up and admin-down """ bfd_session_up(self) self.vpp_session.admin_down() self.pg0.enable_capture() @@ -931,6 +1224,42 @@ class BFD4TestCase(VppTestCase): e = self.vapi.wait_for_event(1, "bfd_udp_session_details") verify_event(self, e, expected_state=BFDState.up) + def test_config_change_remote_demand(self): + """ configuration change while peer in demand mode """ + bfd_session_up(self) + demand = self.test_session.create_packet() + demand[BFD].flags = "D" + self.test_session.send_packet(demand) + self.vpp_session.modify_parameters( + required_min_rx=2 * self.vpp_session.required_min_rx) + p = wait_for_bfd_packet(self) + # poll bit must be set + self.assertIn("P", p.sprintf("%BFD.flags%"), "Poll bit not set") + # terminate poll sequence + final = self.test_session.create_packet() + final[BFD].flags = "D+F" + self.test_session.send_packet(final) + # vpp should be quiet now again + transmit_time = 0.9 \ + * max(self.vpp_session.required_min_rx, + self.test_session.desired_min_tx) \ + / USEC_IN_SEC + count = 0 + for dummy in range(self.test_session.detect_mult * 2): + time.sleep(transmit_time) + self.test_session.send_packet(demand) + try: + p = wait_for_bfd_packet(self, timeout=0) + self.logger.error(ppp("Received unexpected packet:", p)) + count += 1 + except CaptureTimeoutError: + pass + events = self.vapi.collect_events() + for e in events: + self.logger.error("Received unexpected event: %s", e) + self.assert_equal(count, 0, "number of packets received") + self.assert_equal(len(events), 0, "number of events received") + class BFD6TestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD) (IPv6) """ @@ -949,6 +1278,10 @@ class BFD6TestCase(VppTestCase): cls.pg0.configure_ipv6_neighbors() cls.pg0.admin_up() cls.pg0.resolve_ndp() + cls.create_loopback_interfaces([0]) + cls.loopback0 = cls.lo_interfaces[0] + cls.loopback0.config_ip6() + cls.loopback0.admin_up() except Exception: super(BFD6TestCase, cls).tearDownClass() @@ -1003,7 +1336,7 @@ class BFD6TestCase(VppTestCase): echo_packet = (Ether(src=self.pg0.remote_mac, dst=self.pg0.local_mac) / IPv6(src=self.pg0.remote_ip6, - dst=self.pg0.local_ip6) / + dst=self.pg0.remote_ip6) / UDP(dport=BFD.udp_dport_echo) / Raw("this should be looped back")) for dummy in range(echo_packet_count): @@ -1022,17 +1355,68 @@ class BFD6TestCase(VppTestCase): self.assert_equal(self.pg0.local_mac, ether.src, "Source MAC") ip = p[IPv6] self.assert_equal(self.pg0.remote_ip6, ip.dst, "Destination IP") - self.assert_equal(self.pg0.local_ip6, ip.src, "Destination IP") + self.assert_equal(self.pg0.remote_ip6, ip.src, "Destination IP") udp = p[UDP] self.assert_equal(udp.dport, BFD.udp_dport_echo, "UDP destination port") self.assert_equal(udp.sport, udp_sport_rx, "UDP source port") udp_sport_rx += 1 - self.assertTrue(p.haslayer(Raw) and p[Raw] == echo_packet[Raw], - "Received packet is not the echo packet sent") + # need to compare the hex payload here, otherwise BFD_vpp_echo + # gets in way + self.assertEqual(str(p[UDP].payload), + str(echo_packet[UDP].payload), + "Received packet is not the echo packet sent") + self.assert_equal(udp_sport_tx, udp_sport_rx, "UDP source port (== " + "ECHO packet identifier for test purposes)") self.assert_equal(udp_sport_tx, udp_sport_rx, "UDP source port (== " "ECHO packet identifier for test purposes)") + def test_echo(self): + """ echo function used """ + bfd_session_up(self) + self.test_session.update(required_min_echo_rx=50000) + self.test_session.send_packet() + detection_time = self.test_session.detect_mult *\ + self.vpp_session.required_min_rx / USEC_IN_SEC + # echo shouldn't work without echo source set + for dummy in range(3): + sleep = 0.75 * detection_time + self.sleep(sleep, "delay before sending bfd packet") + self.test_session.send_packet() + p = wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) + self.assert_equal(p[BFD].required_min_rx_interval, + self.vpp_session.required_min_rx, + "BFD required min rx interval") + self.vapi.bfd_udp_set_echo_source(self.loopback0.sw_if_index) + # should be turned on - loopback echo packets + for dummy in range(3): + loop_until = time.time() + 0.75 * detection_time + while time.time() < loop_until: + p = self.pg0.wait_for_packet(1) + self.logger.debug(ppp("Got packet:", p)) + if p[UDP].dport == BFD.udp_dport_echo: + self.assert_equal( + p[IPv6].dst, self.pg0.local_ip6, "BFD ECHO dst IP") + self.assertNotEqual(p[IPv6].src, self.loopback0.local_ip6, + "BFD ECHO src IP equal to loopback IP") + self.logger.debug(ppp("Looping back packet:", p)) + self.pg0.add_stream(p) + self.pg_start() + elif p.haslayer(BFD): + self.assertGreaterEqual(p[BFD].required_min_rx_interval, + 1000000) + if "P" in p.sprintf("%BFD.flags%"): + final = self.test_session.create_packet() + final[BFD].flags = "F" + self.test_session.send_packet(final) + else: + raise Exception(ppp("Received unknown packet:", p)) + + self.assert_equal(len(self.vapi.collect_events()), 0, + "number of bfd events") + self.test_session.send_packet() + class BFDSHA1TestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD) (SHA1 auth) """ @@ -1121,7 +1505,7 @@ class BFDSHA1TestCase(VppTestCase): self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) def test_send_bad_seq_number(self): - """ session is not kept alive by msgs with bad seq numbers""" + """ session is not kept alive by msgs with bad sequence numbers""" key = self.factory.create_random_key( self, BFDAuthType.meticulous_keyed_sha1) key.add_vpp_config() @@ -1133,16 +1517,13 @@ class BFDSHA1TestCase(VppTestCase): self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id) bfd_session_up(self) - detection_time = self.vpp_session.detect_mult *\ + detection_time = self.test_session.detect_mult *\ self.vpp_session.required_min_rx / USEC_IN_SEC - session_timeout = time.time() + detection_time - while time.time() < session_timeout: - self.assert_equal(len(self.vapi.collect_events()), 0, - "number of bfd events") - wait_for_bfd_packet(self) + send_until = time.time() + 2 * detection_time + while time.time() < send_until: self.test_session.send_packet() - wait_for_bfd_packet(self) - self.test_session.send_packet() + self.sleep(0.7 * self.vpp_session.required_min_rx / USEC_IN_SEC, + "time between bfd packets") e = self.vapi.collect_events() # session should be down now, because the sequence numbers weren't # updated @@ -1250,7 +1631,7 @@ class BFDSHA1TestCase(VppTestCase): bfd_key_id=self.vpp_session.bfd_key_id, our_seq_number=0) bfd_session_up(self) # don't send any packets for 2*detection_time - detection_time = self.vpp_session.detect_mult *\ + detection_time = self.test_session.detect_mult *\ self.vpp_session.required_min_rx / USEC_IN_SEC self.sleep(detection_time, "simulating peer restart") events = self.vapi.collect_events() diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 59e58ad0..dd9baff1 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -1117,6 +1117,10 @@ class VppPapiProvider(object): def bfd_auth_keys_dump(self): return self.api(self.papi.bfd_auth_keys_dump, {}) + def bfd_udp_set_echo_source(self, sw_if_index): + return self.api(self.papi.bfd_udp_set_echo_source, + {'sw_if_index': sw_if_index}) + def classify_add_del_table( self, is_add, -- cgit 1.2.3-korg From f61bc52eb99019da519c105c8d4ca2c433433e53 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Mon, 27 Feb 2017 12:49:27 +0100 Subject: BFD: disable debug prints Change-Id: I356581f4bdf47b9610b9e50f4f8db9a1510872a7 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_debug.h b/src/vnet/bfd/bfd_debug.h index 3017352e..a06e934f 100644 --- a/src/vnet/bfd/bfd_debug.h +++ b/src/vnet/bfd/bfd_debug.h @@ -20,7 +20,7 @@ #define __included_bfd_debug_h__ /* controls debug prints */ -#define BFD_DEBUG (1) +#define BFD_DEBUG (0) #if BFD_DEBUG #define BFD_DEBUG_FILE_DEF \ -- cgit 1.2.3-korg From 68b0fb0c620c7451ef1a6380c43c39de6614db51 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 28 Feb 2017 15:15:56 -0500 Subject: VPP-598: tcp stack initial commit Change-Id: I49e5ce0aae6e4ff634024387ceaf7dbc432a0351 Signed-off-by: Dave Barach Signed-off-by: Florin Coras --- src/Makefile.am | 1 + src/plugins/ioam/export-common/ioam_export.h | 2 +- src/plugins/ioam/ipfixcollector/ipfixcollector.c | 2 +- src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c | 2 +- src/plugins/snat/in2out.c | 26 +- src/plugins/snat/out2in.c | 24 +- src/scripts/vnet/tcp | 18 +- src/scripts/vnet/udp | 19 + src/scripts/vnet/uri/tcp-setup.sh | 39 + src/scripts/vnet/uri/tcp_server | 4 + src/scripts/vnet/uri/udp | 19 + src/svm.am | 10 +- src/svm/ssvm.c | 16 + src/svm/ssvm.h | 18 +- src/svm/svm_fifo.c | 568 ++++++ src/svm/svm_fifo.h | 157 ++ src/svm/svm_fifo_segment.c | 193 ++ src/svm/svm_fifo_segment.h | 89 + src/svm/test_svm_fifo1.c | 361 ++++ src/uri.am | 22 + src/uri/uri_tcp_test.c | 916 +++++++++ src/uri/uri_udp_test.c | 553 ++++++ src/uri/uri_udp_test2.c | 954 +++++++++ src/uri/uritest.c | 484 +++++ src/vlib/buffer.c | 2 +- src/vlib/buffer.h | 68 + src/vlibmemory/unix_shared_memory_queue.c | 12 +- src/vlibmemory/unix_shared_memory_queue.h | 2 +- src/vnet.am | 66 +- src/vnet/api_errno.h | 21 +- src/vnet/bfd/bfd_udp.c | 4 +- src/vnet/buffer.h | 10 + src/vnet/classify/vnet_classify.c | 4 +- src/vnet/dhcp/dhcp_proxy.h | 2 +- src/vnet/flow/flow_report.h | 2 +- src/vnet/ip/ip.h | 4 +- src/vnet/ip/ip4.h | 42 +- src/vnet/ip/ip4_forward.c | 173 +- src/vnet/ip/ip4_packet.h | 26 +- src/vnet/ip/ip6.h | 44 +- src/vnet/ip/ip6_packet.h | 26 +- src/vnet/ip/punt.c | 2 +- src/vnet/ip/tcp_packet.h | 141 -- src/vnet/ip/udp.h | 315 --- src/vnet/ip/udp_error.def | 21 - src/vnet/ip/udp_format.c | 91 - src/vnet/ip/udp_init.c | 71 - src/vnet/ip/udp_local.c | 645 ------ src/vnet/ip/udp_packet.h | 65 - src/vnet/ip/udp_pg.c | 237 --- src/vnet/ipsec/ikev2.c | 2 +- src/vnet/ipsec/ikev2_cli.c | 2 +- src/vnet/ipsec/ikev2_crypto.c | 2 +- src/vnet/lisp-cp/packets.c | 65 +- src/vnet/lisp-cp/packets.h | 45 - src/vnet/lisp-gpe/interface.c | 2 +- src/vnet/lisp-gpe/lisp_gpe.h | 4 +- src/vnet/lisp-gpe/lisp_gpe_adjacency.c | 2 + src/vnet/session/application.c | 343 ++++ src/vnet/session/application.h | 120 ++ src/vnet/session/application_interface.c | 459 +++++ src/vnet/session/application_interface.h | 136 ++ src/vnet/session/hashes.c | 28 + src/vnet/session/node.c | 435 ++++ src/vnet/session/session.api | 429 ++++ src/vnet/session/session.c | 1286 ++++++++++++ src/vnet/session/session.h | 380 ++++ src/vnet/session/session_api.c | 821 ++++++++ src/vnet/session/session_cli.c | 189 ++ src/vnet/session/transport.c | 64 + src/vnet/session/transport.h | 250 +++ src/vnet/tcp/tcp.c | 708 +++++++ src/vnet/tcp/tcp.h | 624 ++++++ src/vnet/tcp/tcp_error.def | 35 + src/vnet/tcp/tcp_format.c | 136 ++ src/vnet/tcp/tcp_input.c | 2316 ++++++++++++++++++++++ src/vnet/tcp/tcp_newreno.c | 93 + src/vnet/tcp/tcp_output.c | 1412 +++++++++++++ src/vnet/tcp/tcp_packet.h | 184 ++ src/vnet/tcp/tcp_pg.c | 236 +++ src/vnet/tcp/tcp_syn_filter4.c | 542 +++++ src/vnet/tcp/tcp_timer.h | 29 + src/vnet/udp/builtin_server.c | 239 +++ src/vnet/udp/udp.c | 342 ++++ src/vnet/udp/udp.h | 362 ++++ src/vnet/udp/udp_error.def | 21 + src/vnet/udp/udp_format.c | 91 + src/vnet/udp/udp_input.c | 314 +++ src/vnet/udp/udp_local.c | 666 +++++++ src/vnet/udp/udp_packet.h | 65 + src/vnet/udp/udp_pg.c | 237 +++ src/vnet/vnet_all_api_h.h | 1 + src/vnet/vxlan-gpe/vxlan_gpe.h | 2 +- src/vnet/vxlan/vxlan.h | 2 +- src/vpp/api/vpe.api | 1 + src/vppinfra.am | 5 + src/vppinfra/bihash_16_8.h | 103 + src/vppinfra/bihash_48_8.h | 116 ++ src/vppinfra/tw_timer_16t_1w_2048sl.c | 26 + src/vppinfra/tw_timer_16t_1w_2048sl.h | 46 + 100 files changed, 18737 insertions(+), 1874 deletions(-) create mode 100644 src/scripts/vnet/udp create mode 100755 src/scripts/vnet/uri/tcp-setup.sh create mode 100644 src/scripts/vnet/uri/tcp_server create mode 100644 src/scripts/vnet/uri/udp create mode 100644 src/svm/svm_fifo.c create mode 100644 src/svm/svm_fifo.h create mode 100644 src/svm/svm_fifo_segment.c create mode 100644 src/svm/svm_fifo_segment.h create mode 100644 src/svm/test_svm_fifo1.c create mode 100644 src/uri.am create mode 100644 src/uri/uri_tcp_test.c create mode 100644 src/uri/uri_udp_test.c create mode 100644 src/uri/uri_udp_test2.c create mode 100644 src/uri/uritest.c delete mode 100644 src/vnet/ip/tcp_packet.h delete mode 100644 src/vnet/ip/udp.h delete mode 100644 src/vnet/ip/udp_error.def delete mode 100644 src/vnet/ip/udp_format.c delete mode 100644 src/vnet/ip/udp_init.c delete mode 100644 src/vnet/ip/udp_local.c delete mode 100644 src/vnet/ip/udp_packet.h delete mode 100644 src/vnet/ip/udp_pg.c create mode 100644 src/vnet/session/application.c create mode 100644 src/vnet/session/application.h create mode 100644 src/vnet/session/application_interface.c create mode 100644 src/vnet/session/application_interface.h create mode 100644 src/vnet/session/hashes.c create mode 100644 src/vnet/session/node.c create mode 100644 src/vnet/session/session.api create mode 100644 src/vnet/session/session.c create mode 100644 src/vnet/session/session.h create mode 100644 src/vnet/session/session_api.c create mode 100644 src/vnet/session/session_cli.c create mode 100644 src/vnet/session/transport.c create mode 100644 src/vnet/session/transport.h create mode 100644 src/vnet/tcp/tcp.c create mode 100644 src/vnet/tcp/tcp.h create mode 100644 src/vnet/tcp/tcp_error.def create mode 100644 src/vnet/tcp/tcp_format.c create mode 100644 src/vnet/tcp/tcp_input.c create mode 100644 src/vnet/tcp/tcp_newreno.c create mode 100644 src/vnet/tcp/tcp_output.c create mode 100644 src/vnet/tcp/tcp_packet.h create mode 100644 src/vnet/tcp/tcp_pg.c create mode 100644 src/vnet/tcp/tcp_syn_filter4.c create mode 100644 src/vnet/tcp/tcp_timer.h create mode 100644 src/vnet/udp/builtin_server.c create mode 100644 src/vnet/udp/udp.c create mode 100644 src/vnet/udp/udp.h create mode 100644 src/vnet/udp/udp_error.def create mode 100644 src/vnet/udp/udp_format.c create mode 100644 src/vnet/udp/udp_input.c create mode 100644 src/vnet/udp/udp_local.c create mode 100644 src/vnet/udp/udp_packet.h create mode 100644 src/vnet/udp/udp_pg.c create mode 100644 src/vppinfra/bihash_16_8.h create mode 100644 src/vppinfra/bihash_48_8.h create mode 100644 src/vppinfra/tw_timer_16t_1w_2048sl.c create mode 100644 src/vppinfra/tw_timer_16t_1w_2048sl.h (limited to 'src/vnet/bfd') diff --git a/src/Makefile.am b/src/Makefile.am index 08feb29a..641707ed 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -88,6 +88,7 @@ include vlib-api.am include vnet.am include vpp.am include vpp-api-test.am +include uri.am SUBDIRS += plugins diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h index e84dab0b..dd48a93b 100644 --- a/src/plugins/ioam/export-common/ioam_export.h +++ b/src/plugins/ioam/export-common/ioam_export.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/plugins/ioam/ipfixcollector/ipfixcollector.c b/src/plugins/ioam/ipfixcollector/ipfixcollector.c index 4ae47edc..71b934ec 100644 --- a/src/plugins/ioam/ipfixcollector/ipfixcollector.c +++ b/src/plugins/ioam/ipfixcollector/ipfixcollector.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include ipfix_collector_main_t ipfix_collector_main; diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c index b42c357c..f334c983 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c index e30c913c..b4b7793d 100644 --- a/src/plugins/snat/in2out.c +++ b/src/plugins/snat/in2out.c @@ -689,12 +689,12 @@ snat_hairpinning (snat_main_t *sm, ip4_header_t, dst_address); ip0->checksum = ip_csum_fold (sum0); - old_dst_port0 = tcp0->ports.dst; + old_dst_port0 = tcp0->dst; if (PREDICT_TRUE(new_dst_port0 != old_dst_port0)) { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - tcp0->ports.dst = new_dst_port0; + tcp0->dst = new_dst_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t, dst_address); @@ -872,9 +872,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = s0->out2in.port; - new_port0 = tcp0->ports.src; + old_port0 = tcp0->src_port; + tcp0->src_port = s0->out2in.port; + new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -1012,9 +1012,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) { - old_port1 = tcp1->ports.src; - tcp1->ports.src = s1->out2in.port; - new_port1 = tcp1->ports.src; + old_port1 = tcp1->src_port; + tcp1->src_port = s1->out2in.port; + new_port1 = tcp1->src_port; sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, @@ -1188,9 +1188,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = s0->out2in.port; - new_port0 = tcp0->ports.src; + old_port0 = tcp0->src_port; + tcp0->src_port = s0->out2in.port; + new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -1667,8 +1667,8 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm, { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = new_port0; + old_port0 = tcp0->src_port; + tcp0->src_port = new_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c index 328f5ba4..3bfc0aa3 100644 --- a/src/plugins/snat/out2in.c +++ b/src/plugins/snat/out2in.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include @@ -602,9 +602,9 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.dst; - tcp0->ports.dst = s0->in2out.port; - new_port0 = tcp0->ports.dst; + old_port0 = tcp0->dst_port; + tcp0->dst_port = s0->in2out.port; + new_port0 = tcp0->dst_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -737,9 +737,9 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) { - old_port1 = tcp1->ports.dst; - tcp1->ports.dst = s1->in2out.port; - new_port1 = tcp1->ports.dst; + old_port1 = tcp1->dst_port; + tcp1->dst_port = s1->in2out.port; + new_port1 = tcp1->dst_port; sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, @@ -907,9 +907,9 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.dst; - tcp0->ports.dst = s0->in2out.port; - new_port0 = tcp0->ports.dst; + old_port0 = tcp0->dst_port; + tcp0->dst_port = s0->in2out.port; + new_port0 = tcp0->dst_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -1369,8 +1369,8 @@ snat_out2in_fast_node_fn (vlib_main_t * vm, { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.dst; - tcp0->ports.dst = new_port0; + old_port0 = tcp0->dst_port; + tcp0->dst_port = new_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, diff --git a/src/scripts/vnet/tcp b/src/scripts/vnet/tcp index a2ee8b2d..b9c23c3a 100644 --- a/src/scripts/vnet/tcp +++ b/src/scripts/vnet/tcp @@ -1,16 +1,18 @@ +loop create +set int ip address loop0 192.168.1.1/8 +set int state loop0 up + packet-generator new { name x - limit 1 + limit 2048 node ip4-input - size 64-64 + size 100-100 + interface loop0 no-recycle data { - TCP: 1.2.3.4 -> 5.6.7.8 - TCP: 1234 -> 5678 + TCP: 192.168.1.2 -> 192.168.1.1 + TCP: 32415 -> 80 + SYN incrementing 100 } } - -tr add pg-input 100 -ip route 5.6.7.8/32 via local -ip route 1.2.3.4/32 via local diff --git a/src/scripts/vnet/udp b/src/scripts/vnet/udp new file mode 100644 index 00000000..7dda1eec --- /dev/null +++ b/src/scripts/vnet/udp @@ -0,0 +1,19 @@ +loop create +set int ip address loop0 192.168.1.1/8 +set int state loop0 up + +packet-generator new { + name udp + limit 512 + rate 1e4 + node ip4-input + size 100-100 + interface loop0 + no-recycle + data { + UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1 + UDP: 4321 -> 1234 + length 72 + incrementing 100 + } +} diff --git a/src/scripts/vnet/uri/tcp-setup.sh b/src/scripts/vnet/uri/tcp-setup.sh new file mode 100755 index 00000000..e0b01588 --- /dev/null +++ b/src/scripts/vnet/uri/tcp-setup.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +function topo_setup +{ + ip netns add vppns1 + ip link add veth_vpp1 type veth peer name vpp1 + ip link set dev vpp1 up + ip link set dev veth_vpp1 up netns vppns1 + + ip netns exec vppns1 \ + bash -c " + ip link set dev lo up + ip addr add 6.0.1.2/24 dev veth_vpp1 + " + + ethtool --offload vpp1 rx off tx off + ip netns exec vppns1 ethtool --offload veth_vpp1 rx off tx off + +} + +function topo_clean +{ + ip link del dev veth_vpp1 &> /dev/null + ip netns del vppns1 &> /dev/null +} + +if [ "$1" == "clean" ] ; then + topo_clean + exit 0 +else + topo_setup +fi + +# to test connectivity do: +# sudo ip netns exec vppns1 telnet 6.0.1.1 1234 +# to push traffic to the server +# dd if=/dev/zero bs=1024K count=512 | nc 6.0.1.1 +# to listen for incoming connection from vpp +# nc -l 1234 diff --git a/src/scripts/vnet/uri/tcp_server b/src/scripts/vnet/uri/tcp_server new file mode 100644 index 00000000..7f5a86de --- /dev/null +++ b/src/scripts/vnet/uri/tcp_server @@ -0,0 +1,4 @@ +create host-interface name vpp1 +set int state host-vpp1 up +set int ip address host-vpp1 6.0.1.1/24 +trace add af-packet-input 10 diff --git a/src/scripts/vnet/uri/udp b/src/scripts/vnet/uri/udp new file mode 100644 index 00000000..ca13b83c --- /dev/null +++ b/src/scripts/vnet/uri/udp @@ -0,0 +1,19 @@ +loop create +set int ip address loop0 10.0.0.1/32 +set int state loop0 up + +packet-generator new { + name udp + limit 512 + rate 1e4 + node ip4-input + size 100-100 + interface loop0 + no-recycle + data { + UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1 + UDP: 4321 -> 1234 + length 72 + incrementing 100 + } +} diff --git a/src/svm.am b/src/svm.am index 2cd385bd..442eba8e 100644 --- a/src/svm.am +++ b/src/svm.am @@ -13,13 +13,14 @@ bin_PROGRAMS += svmtool svmdbtool -nobase_include_HEADERS += svm/svm.h svm/ssvm.h svm/svmdb.h +nobase_include_HEADERS += svm/svm.h svm/ssvm.h svm/svmdb.h \ + svm/svm_fifo.h svm/svm_fifo_segment.h lib_LTLIBRARIES += libsvm.la libsvmdb.la +libsvm_la_SOURCES = svm/svm.c svm/ssvm.c svm/svm_fifo.c svm/svm_fifo_segment.c libsvm_la_LIBADD = libvppinfra.la -lrt -lpthread libsvm_la_DEPENDENCIES = libvppinfra.la -libsvm_la_SOURCES = svm/svm.c svm/ssvm.c svmtool_SOURCES = svm/svmtool.c svmtool_LDADD = libsvm.la libvppinfra.la -lpthread -lrt @@ -31,4 +32,9 @@ libsvmdb_la_SOURCES = svm/svmdb.c svmdbtool_SOURCES = svm/svmdbtool.c svmdbtool_LDADD = libsvmdb.la libsvm.la libvppinfra.la -lpthread -lrt +noinst_PROGRAMS += test_svm_fifo1 +test_svm_fifo1_SOURCES = svm/test_svm_fifo1.c +test_svm_fifo1_LDADD = libsvm.la libvppinfra.la -lpthread -lrt +test_svm_fifo1_LDFLAGS = -static + # vi:syntax=automake diff --git a/src/svm/ssvm.c b/src/svm/ssvm.c index 6f409eb6..6cda1f27 100644 --- a/src/svm/ssvm.c +++ b/src/svm/ssvm.c @@ -169,6 +169,22 @@ re_map_it: return 0; } +void +ssvm_delete (ssvm_private_t * ssvm) +{ + u8 *fn; + + fn = format (0, "/dev/shm/%s%c", ssvm->name, 0); + + /* Throw away the backing file */ + if (unlink ((char *) fn) < 0) + clib_unix_warning ("unlink segment '%s'", ssvm->name); + + munmap ((void *) ssvm->requested_va, ssvm->ssvm_size); + vec_free (fn); +} + + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/svm/ssvm.h b/src/svm/ssvm.h index 9e61b9a0..bccfc164 100644 --- a/src/svm/ssvm.h +++ b/src/svm/ssvm.h @@ -38,7 +38,10 @@ #include #include -#define MMAP_PAGESIZE (4<<10) +#ifndef MMAP_PAGESIZE +#define MMAP_PAGESIZE (clib_mem_get_page_size()) +#endif + #define SSVM_N_OPAQUE 7 typedef struct @@ -125,12 +128,12 @@ ssvm_pop_heap (void *oldheap) } #define foreach_ssvm_api_error \ -_(NO_NAME, "No shared segment name", -10) \ -_(NO_SIZE, "Size not set (master)", -11) \ -_(CREATE_FAILURE, "Create failed", -12) \ -_(SET_SIZE, "Set size failed", -13) \ -_(MMAP, "mmap failed", -14) \ -_(SLAVE_TIMEOUT, "Slave map timeout", -15) +_(NO_NAME, "No shared segment name", -100) \ +_(NO_SIZE, "Size not set (master)", -101) \ +_(CREATE_FAILURE, "Create failed", -102) \ +_(SET_SIZE, "Set size failed", -103) \ +_(MMAP, "mmap failed", -104) \ +_(SLAVE_TIMEOUT, "Slave map timeout", -105) typedef enum { @@ -143,6 +146,7 @@ typedef enum int ssvm_master_init (ssvm_private_t * ssvm, u32 master_index); int ssvm_slave_init (ssvm_private_t * ssvm, int timeout_in_seconds); +void ssvm_delete (ssvm_private_t * ssvm); #endif /* __included_ssvm_h__ */ diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c new file mode 100644 index 00000000..11f90193 --- /dev/null +++ b/src/svm/svm_fifo.c @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svm_fifo.h" + +/** create an svm fifo, in the current heap. Fails vs blow up the process */ +svm_fifo_t * +svm_fifo_create (u32 data_size_in_bytes) +{ + svm_fifo_t *f; + pthread_mutexattr_t attr; + pthread_condattr_t cattr; + + f = clib_mem_alloc_aligned_or_null (sizeof (*f) + data_size_in_bytes, + CLIB_CACHE_LINE_BYTES); + if (f == 0) + return 0; + + memset (f, 0, sizeof (*f) + data_size_in_bytes); + f->nitems = data_size_in_bytes; + f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX; + + memset (&attr, 0, sizeof (attr)); + memset (&cattr, 0, sizeof (cattr)); + + if (pthread_mutexattr_init (&attr)) + clib_unix_warning ("mutexattr_init"); + if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("pthread_mutexattr_setpshared"); + if (pthread_mutex_init (&f->mutex, &attr)) + clib_unix_warning ("mutex_init"); + if (pthread_mutexattr_destroy (&attr)) + clib_unix_warning ("mutexattr_destroy"); + if (pthread_condattr_init (&cattr)) + clib_unix_warning ("condattr_init"); + if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("condattr_setpshared"); + if (pthread_cond_init (&f->condvar, &cattr)) + clib_unix_warning ("cond_init1"); + if (pthread_condattr_destroy (&cattr)) + clib_unix_warning ("cond_init2"); + + return (f); +} + +always_inline ooo_segment_t * +ooo_segment_new (svm_fifo_t * f, u32 start, u32 length) +{ + ooo_segment_t *s; + + pool_get (f->ooo_segments, s); + + s->fifo_position = start; + s->length = length; + + s->prev = s->next = OOO_SEGMENT_INVALID_INDEX; + + return s; +} + +always_inline void +ooo_segment_del (svm_fifo_t * f, u32 index) +{ + ooo_segment_t *cur, *prev = 0, *next = 0; + cur = pool_elt_at_index (f->ooo_segments, index); + + if (cur->next != OOO_SEGMENT_INVALID_INDEX) + { + next = pool_elt_at_index (f->ooo_segments, cur->next); + next->prev = cur->prev; + } + + if (cur->prev != OOO_SEGMENT_INVALID_INDEX) + { + prev = pool_elt_at_index (f->ooo_segments, cur->prev); + prev->next = cur->next; + } + else + { + f->ooos_list_head = cur->next; + } + + pool_put (f->ooo_segments, cur); +} + +/** + * Add segment to fifo's out-of-order segment list. Takes care of merging + * adjacent segments and removing overlapping ones. + */ +static void +ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) +{ + ooo_segment_t *s, *new_s, *prev, *next, *it; + u32 new_index, position, end_offset, s_sof, s_eof, s_index; + + position = (f->tail + offset) % f->nitems; + end_offset = offset + length; + + if (f->ooos_list_head == OOO_SEGMENT_INVALID_INDEX) + { + s = ooo_segment_new (f, position, length); + f->ooos_list_head = s - f->ooo_segments; + f->ooos_newest = f->ooos_list_head; + return; + } + + /* Find first segment that starts after new segment */ + s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head); + while (s->next != OOO_SEGMENT_INVALID_INDEX + && ooo_segment_offset (f, s) <= offset) + s = pool_elt_at_index (f->ooo_segments, s->next); + + s_index = s - f->ooo_segments; + s_sof = ooo_segment_offset (f, s); + s_eof = ooo_segment_end_offset (f, s); + + /* No overlap, add before current segment */ + if (end_offset < s_sof) + { + new_s = ooo_segment_new (f, position, length); + new_index = new_s - f->ooo_segments; + + /* Pool might've moved, get segment again */ + s = pool_elt_at_index (f->ooo_segments, s_index); + + if (s->prev != OOO_SEGMENT_INVALID_INDEX) + { + new_s->prev = s->prev; + + prev = pool_elt_at_index (f->ooo_segments, new_s->prev); + prev->next = new_index; + } + else + { + /* New head */ + f->ooos_list_head = new_index; + } + + new_s->next = s - f->ooo_segments; + s->prev = new_index; + f->ooos_newest = new_index; + return; + } + /* No overlap, add after current segment */ + else if (s_eof < offset) + { + new_s = ooo_segment_new (f, position, length); + new_index = new_s - f->ooo_segments; + + /* Pool might've moved, get segment again */ + s = pool_elt_at_index (f->ooo_segments, s_index); + + if (s->next != OOO_SEGMENT_INVALID_INDEX) + { + new_s->next = s->next; + + next = pool_elt_at_index (f->ooo_segments, new_s->next); + next->prev = new_index; + } + + new_s->prev = s - f->ooo_segments; + s->next = new_index; + f->ooos_newest = new_index; + + return; + } + + /* + * Merge needed + */ + + /* Merge at head */ + if (offset <= s_sof) + { + /* If we have a previous, check if we overlap */ + if (s->prev != OOO_SEGMENT_INVALID_INDEX) + { + prev = pool_elt_at_index (f->ooo_segments, s->prev); + + /* New segment merges prev and current. Remove previous and + * update position of current. */ + if (ooo_segment_end_offset (f, prev) >= offset) + { + s->fifo_position = prev->fifo_position; + s->length = s_eof - ooo_segment_offset (f, prev); + ooo_segment_del (f, s->prev); + } + } + else + { + s->fifo_position = position; + s->length = s_eof - ooo_segment_offset (f, s); + } + + /* The new segment's tail may cover multiple smaller ones */ + if (s_eof < end_offset) + { + /* Remove segments completely covered */ + it = (s->next != OOO_SEGMENT_INVALID_INDEX) ? + pool_elt_at_index (f->ooo_segments, s->next) : 0; + while (it && ooo_segment_end_offset (f, it) < end_offset) + { + next = (it->next != OOO_SEGMENT_INVALID_INDEX) ? + pool_elt_at_index (f->ooo_segments, it->next) : 0; + ooo_segment_del (f, it - f->ooo_segments); + it = next; + } + + /* Update length. Segment's start might have changed. */ + s->length = end_offset - ooo_segment_offset (f, s); + + /* If partial overlap with last, merge */ + if (it && ooo_segment_offset (f, it) < end_offset) + { + s->length += + it->length - (ooo_segment_offset (f, it) - end_offset); + ooo_segment_del (f, it - f->ooo_segments); + } + } + } + /* Last but overlapping previous */ + else if (s_eof <= end_offset) + { + s->length = end_offset - ooo_segment_offset (f, s); + } + /* New segment completely covered by current one */ + else + { + /* Do Nothing */ + } + + /* Most recently updated segment */ + f->ooos_newest = s - f->ooo_segments; +} + +/** + * Removes segments that can now be enqueued because the fifo's tail has + * advanced. Returns the number of bytes added to tail. + */ +static int +ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued) +{ + ooo_segment_t *s; + u32 index, bytes = 0, diff; + + s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head); + + /* If last tail update overlaps one/multiple ooo segments, remove them */ + diff = (f->nitems + f->tail - s->fifo_position) % f->nitems; + while (0 < diff && diff < n_bytes_enqueued) + { + /* Segment end is beyond the tail. Advance tail and be done */ + if (diff < s->length) + { + f->tail += s->length - diff; + f->tail %= f->nitems; + break; + } + /* If we have next go on */ + else if (s->next != OOO_SEGMENT_INVALID_INDEX) + { + index = s - f->ooo_segments; + s = pool_elt_at_index (f->ooo_segments, s->next); + diff = (f->nitems + f->tail - s->fifo_position) % f->nitems; + ooo_segment_del (f, index); + } + /* End of search */ + else + { + break; + } + } + + /* If tail is adjacent to an ooo segment, 'consume' it */ + if (diff == 0) + { + bytes = ((f->nitems - f->cursize) >= s->length) ? s->length : + f->nitems - f->cursize; + + f->tail += bytes; + f->tail %= f->nitems; + + ooo_segment_del (f, s - f->ooo_segments); + } + + return bytes; +} + +static int +svm_fifo_enqueue_internal (svm_fifo_t * f, + int pid, u32 max_bytes, u8 * copy_from_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + + if (PREDICT_FALSE (f->cursize == f->nitems)) + return -2; /* fifo stuffed */ + + /* read cursize, which can only decrease while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (nitems - cursize) < max_bytes ? + (nitems - cursize) : max_bytes; + + if (PREDICT_TRUE (copy_from_here != 0)) + { + /* Number of bytes in first copy segment */ + first_copy_bytes = ((nitems - f->tail) < total_copy_bytes) + ? (nitems - f->tail) : total_copy_bytes; + + clib_memcpy (&f->data[f->tail], copy_from_here, first_copy_bytes); + f->tail += first_copy_bytes; + f->tail = (f->tail == nitems) ? 0 : f->tail; + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + clib_memcpy (&f->data[f->tail], copy_from_here + first_copy_bytes, + second_copy_bytes); + f->tail += second_copy_bytes; + f->tail = (f->tail == nitems) ? 0 : f->tail; + } + } + else + { + /* Account for a zero-copy enqueue done elsewhere */ + ASSERT (max_bytes <= (nitems - cursize)); + f->tail += max_bytes; + f->tail = f->tail % nitems; + total_copy_bytes = max_bytes; + } + + /* Any out-of-order segments to collect? */ + if (PREDICT_FALSE (f->ooos_list_head != OOO_SEGMENT_INVALID_INDEX)) + total_copy_bytes += ooo_segment_try_collect (f, total_copy_bytes); + + /* Atomically increase the queue length */ + __sync_fetch_and_add (&f->cursize, total_copy_bytes); + + return (total_copy_bytes); +} + +int +svm_fifo_enqueue_nowait (svm_fifo_t * f, + int pid, u32 max_bytes, u8 * copy_from_here) +{ + return svm_fifo_enqueue_internal (f, pid, max_bytes, copy_from_here); +} + +/** Enqueue a future segment. + * Two choices: either copies the entire segment, or copies nothing + * Returns 0 of the entire segment was copied + * Returns -1 if none of the segment was copied due to lack of space + */ + +static int +svm_fifo_enqueue_with_offset_internal2 (svm_fifo_t * f, + int pid, + u32 offset, + u32 required_bytes, + u8 * copy_from_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + u32 tail_plus_offset; + + ASSERT (offset > 0); + + /* read cursize, which can only decrease while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Will this request fit? */ + if ((required_bytes + offset) > (nitems - cursize)) + return -1; + + ooo_segment_add (f, offset, required_bytes); + + /* Number of bytes we're going to copy */ + total_copy_bytes = required_bytes; + tail_plus_offset = (f->tail + offset) % nitems; + + /* Number of bytes in first copy segment */ + first_copy_bytes = ((nitems - tail_plus_offset) < total_copy_bytes) + ? (nitems - tail_plus_offset) : total_copy_bytes; + + clib_memcpy (&f->data[tail_plus_offset], copy_from_here, first_copy_bytes); + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + tail_plus_offset += first_copy_bytes; + tail_plus_offset %= nitems; + + ASSERT (tail_plus_offset == 0); + + clib_memcpy (&f->data[tail_plus_offset], + copy_from_here + first_copy_bytes, second_copy_bytes); + } + + return (0); +} + + +int +svm_fifo_enqueue_with_offset (svm_fifo_t * f, + int pid, + u32 offset, + u32 required_bytes, u8 * copy_from_here) +{ + return svm_fifo_enqueue_with_offset_internal2 + (f, pid, offset, required_bytes, copy_from_here); +} + + +static int +svm_fifo_dequeue_internal2 (svm_fifo_t * f, + int pid, u32 max_bytes, u8 * copy_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + + if (PREDICT_FALSE (f->cursize == 0)) + return -2; /* nothing in the fifo */ + + /* read cursize, which can only increase while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (cursize < max_bytes) ? cursize : max_bytes; + + if (PREDICT_TRUE (copy_here != 0)) + { + /* Number of bytes in first copy segment */ + first_copy_bytes = ((nitems - f->head) < total_copy_bytes) + ? (nitems - f->head) : total_copy_bytes; + clib_memcpy (copy_here, &f->data[f->head], first_copy_bytes); + f->head += first_copy_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + clib_memcpy (copy_here + first_copy_bytes, + &f->data[f->head], second_copy_bytes); + f->head += second_copy_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + } + } + else + { + /* Account for a zero-copy dequeue done elsewhere */ + ASSERT (max_bytes <= cursize); + f->head += max_bytes; + f->head = f->head % nitems; + cursize -= max_bytes; + total_copy_bytes = max_bytes; + } + + __sync_fetch_and_sub (&f->cursize, total_copy_bytes); + + return (total_copy_bytes); +} + +int +svm_fifo_dequeue_nowait (svm_fifo_t * f, + int pid, u32 max_bytes, u8 * copy_here) +{ + return svm_fifo_dequeue_internal2 (f, pid, max_bytes, copy_here); +} + +int +svm_fifo_peek (svm_fifo_t * f, int pid, u32 offset, u32 max_bytes, + u8 * copy_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + + if (PREDICT_FALSE (f->cursize == 0)) + return -2; /* nothing in the fifo */ + + /* read cursize, which can only increase while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (cursize < max_bytes) ? cursize : max_bytes; + + if (PREDICT_TRUE (copy_here != 0)) + { + /* Number of bytes in first copy segment */ + first_copy_bytes = + ((nitems - f->head) < total_copy_bytes) ? + (nitems - f->head) : total_copy_bytes; + clib_memcpy (copy_here, &f->data[f->head], first_copy_bytes); + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + clib_memcpy (copy_here + first_copy_bytes, &f->data[0], + second_copy_bytes); + } + } + return total_copy_bytes; +} + +int +svm_fifo_dequeue_drop (svm_fifo_t * f, int pid, u32 max_bytes) +{ + u32 total_drop_bytes, first_drop_bytes, second_drop_bytes; + u32 cursize, nitems; + + if (PREDICT_FALSE (f->cursize == 0)) + return -2; /* nothing in the fifo */ + + /* read cursize, which can only increase while we're working */ + cursize = f->cursize; + nitems = f->nitems; + + /* Number of bytes we're going to drop */ + total_drop_bytes = (cursize < max_bytes) ? cursize : max_bytes; + + /* Number of bytes in first copy segment */ + first_drop_bytes = + ((nitems - f->head) < total_drop_bytes) ? + (nitems - f->head) : total_drop_bytes; + f->head += first_drop_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + + /* Number of bytes in second drop segment, if any */ + second_drop_bytes = total_drop_bytes - first_drop_bytes; + if (second_drop_bytes) + { + f->head += second_drop_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + } + + __sync_fetch_and_sub (&f->cursize, total_drop_bytes); + + return total_drop_bytes; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h new file mode 100644 index 00000000..70624b74 --- /dev/null +++ b/src/svm/svm_fifo.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ssvm_fifo_h__ +#define __included_ssvm_fifo_h__ + +#include +#include +#include +#include +#include +#include +#include + +typedef enum +{ + SVM_FIFO_TAG_NOT_HELD = 0, + SVM_FIFO_TAG_DEQUEUE, + SVM_FIFO_TAG_ENQUEUE, +} svm_lock_tag_t; + +/** Out-of-order segment */ +typedef struct +{ + u32 next; /**< Next linked-list element pool index */ + u32 prev; /**< Previous linked-list element pool index */ + + u32 fifo_position; /**< Start of segment, normalized*/ + u32 length; /**< Length of segment */ +} ooo_segment_t; + +#define OOO_SEGMENT_INVALID_INDEX ((u32)~0) + +typedef struct +{ + pthread_mutex_t mutex; /* 8 bytes */ + pthread_cond_t condvar; /* 8 bytes */ + u32 owner_pid; + svm_lock_tag_t tag; + volatile u32 cursize; + u32 nitems; + + /* Backpointers */ + u32 server_session_index; + u32 client_session_index; + u8 server_thread_index; + u8 client_thread_index; + CLIB_CACHE_LINE_ALIGN_MARK (end_shared); + u32 head; + CLIB_CACHE_LINE_ALIGN_MARK (end_consumer); + + /* producer */ + u32 tail; + + ooo_segment_t *ooo_segments; /**< Pool of ooo segments */ + u32 ooos_list_head; /**< Head of out-of-order linked-list */ + u32 ooos_newest; /**< Last segment to have been updated */ + + CLIB_CACHE_LINE_ALIGN_MARK (data); +} svm_fifo_t; + +static inline int +svm_fifo_lock (svm_fifo_t * f, u32 pid, u32 tag, int nowait) +{ + if (PREDICT_TRUE (nowait == 0)) + pthread_mutex_lock (&f->mutex); + else + { + if (pthread_mutex_trylock (&f->mutex)) + return -1; + } + f->owner_pid = pid; + f->tag = tag; + return 0; +} + +static inline void +svm_fifo_unlock (svm_fifo_t * f) +{ + f->owner_pid = 0; + f->tag = 0; + CLIB_MEMORY_BARRIER (); + pthread_mutex_unlock (&f->mutex); +} + +static inline u32 +svm_fifo_max_dequeue (svm_fifo_t * f) +{ + return f->cursize; +} + +static inline u32 +svm_fifo_max_enqueue (svm_fifo_t * f) +{ + return f->nitems - f->cursize; +} + +static inline u8 +svm_fifo_has_ooo_data (svm_fifo_t * f) +{ + return f->ooos_list_head != OOO_SEGMENT_INVALID_INDEX; +} + +svm_fifo_t *svm_fifo_create (u32 data_size_in_bytes); + +int svm_fifo_enqueue_nowait (svm_fifo_t * f, int pid, u32 max_bytes, + u8 * copy_from_here); + +int svm_fifo_enqueue_with_offset (svm_fifo_t * f, int pid, + u32 offset, u32 required_bytes, + u8 * copy_from_here); + +int svm_fifo_dequeue_nowait (svm_fifo_t * f, int pid, u32 max_bytes, + u8 * copy_here); + +int svm_fifo_peek (svm_fifo_t * f, int pid, u32 offset, u32 max_bytes, + u8 * copy_here); +int svm_fifo_dequeue_drop (svm_fifo_t * f, int pid, u32 max_bytes); + +always_inline ooo_segment_t * +svm_fifo_newest_ooo_segment (svm_fifo_t * f) +{ + return f->ooo_segments + f->ooos_newest; +} + +always_inline u32 +ooo_segment_offset (svm_fifo_t * f, ooo_segment_t * s) +{ + return ((f->nitems + s->fifo_position - f->tail) % f->nitems); +} + +always_inline u32 +ooo_segment_end_offset (svm_fifo_t * f, ooo_segment_t * s) +{ + return ((f->nitems + s->fifo_position + s->length - f->tail) % f->nitems); +} + +#endif /* __included_ssvm_fifo_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c new file mode 100644 index 00000000..acabb3bd --- /dev/null +++ b/src/svm/svm_fifo_segment.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +svm_fifo_segment_main_t svm_fifo_segment_main; + +/** (master) create an svm fifo segment */ +int +svm_fifo_segment_create (svm_fifo_segment_create_args_t * a) +{ + int rv; + svm_fifo_segment_private_t *s; + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + void *oldheap; + + /* Allocate a fresh segment */ + pool_get (sm->segments, s); + memset (s, 0, sizeof (*s)); + + s->ssvm.ssvm_size = a->segment_size; + s->ssvm.i_am_master = 1; + s->ssvm.my_pid = getpid (); + s->ssvm.name = (u8 *) a->segment_name; + s->ssvm.requested_va = sm->next_baseva; + + rv = ssvm_master_init (&s->ssvm, s - sm->segments); + + if (rv) + { + _vec_len (s) = vec_len (s) - 1; + return (rv); + } + + /* Note; requested_va updated due to seg base addr randomization */ + sm->next_baseva = s->ssvm.requested_va + a->segment_size; + + sh = s->ssvm.sh; + oldheap = ssvm_push_heap (sh); + + /* Set up svm_fifo_segment shared header */ + fsh = clib_mem_alloc (sizeof (*fsh)); + memset (fsh, 0, sizeof (*fsh)); + sh->opaque[0] = fsh; + s->h = fsh; + fsh->segment_name = format (0, "%s%c", a->segment_name, 0); + + /* Avoid vec_add1(...) failure when adding a fifo, etc. */ + vec_validate (fsh->fifos, 64); + _vec_len (fsh->fifos) = 0; + + ssvm_pop_heap (oldheap); + + sh->ready = 1; + a->new_segment_index = s - sm->segments; + return (0); +} + +/** (slave) attach to an svm fifo segment */ +int +svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a) +{ + int rv; + svm_fifo_segment_private_t *s; + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + + /* Allocate a fresh segment */ + pool_get (sm->segments, s); + + memset (s, 0, sizeof (*s)); + + s->ssvm.ssvm_size = a->segment_size; + s->ssvm.my_pid = getpid (); + s->ssvm.name = (u8 *) a->segment_name; + s->ssvm.requested_va = sm->next_baseva; + + rv = ssvm_slave_init (&s->ssvm, sm->timeout_in_seconds); + + if (rv) + { + _vec_len (s) = vec_len (s) - 1; + return (rv); + } + + /* Fish the segment header */ + sh = s->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + s->h = fsh; + + a->new_segment_index = s - sm->segments; + return (0); +} + +void +svm_fifo_segment_delete (svm_fifo_segment_private_t * s) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + ssvm_delete (&s->ssvm); + pool_put (sm->segments, s); +} + +svm_fifo_t * +svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s, + u32 data_size_in_bytes) +{ + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + svm_fifo_t *f; + void *oldheap; + + sh = s->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + oldheap = ssvm_push_heap (sh); + + /* Note: this can fail, in which case: create another segment */ + f = svm_fifo_create (data_size_in_bytes); + if (f == 0) + { + ssvm_pop_heap (oldheap); + return (0); + } + + vec_add1 (fsh->fifos, f); + + ssvm_pop_heap (oldheap); + return (f); +} + +void +svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, svm_fifo_t * f) +{ + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + void *oldheap; + int i; + + sh = s->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + oldheap = ssvm_push_heap (sh); + + for (i = 0; i < vec_len (fsh->fifos); i++) + { + if (fsh->fifos[i] == f) + { + vec_delete (fsh->fifos, 1, i); + goto found; + } + } + clib_warning ("fifo 0x%llx not found in fifo table...", f); + +found: + clib_mem_free (f); + ssvm_pop_heap (oldheap); +} + +void +svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + + sm->next_baseva = baseva; + sm->timeout_in_seconds = timeout_in_seconds; +} + +u32 +svm_fifo_segment_index (svm_fifo_segment_private_t * s) +{ + return s - svm_fifo_segment_main.segments; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo_segment.h b/src/svm/svm_fifo_segment.h new file mode 100644 index 00000000..793fa7c8 --- /dev/null +++ b/src/svm/svm_fifo_segment.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ssvm_fifo_segment_h__ +#define __included_ssvm_fifo_segment_h__ + +#include "svm_fifo.h" +#include "ssvm.h" + +typedef struct +{ + volatile svm_fifo_t **fifos; + u8 *segment_name; +} svm_fifo_segment_header_t; + +typedef struct +{ + ssvm_private_t ssvm; + svm_fifo_segment_header_t *h; +} svm_fifo_segment_private_t; + +typedef struct +{ + /** pool of segments */ + svm_fifo_segment_private_t *segments; + /* Where to put the next one */ + u64 next_baseva; + u32 timeout_in_seconds; +} svm_fifo_segment_main_t; + +extern svm_fifo_segment_main_t svm_fifo_segment_main; + +typedef struct +{ + char *segment_name; + u32 segment_size; + u32 new_segment_index; +} svm_fifo_segment_create_args_t; + +static inline svm_fifo_segment_private_t * +svm_fifo_get_segment (u32 segment_index) +{ + svm_fifo_segment_main_t *ssm = &svm_fifo_segment_main; + return vec_elt_at_index (ssm->segments, segment_index); +} + +#define foreach_ssvm_fifo_segment_api_error \ +_(OUT_OF_SPACE, "Out of space in segment", -200) + +typedef enum +{ +#define _(n,s,c) SSVM_FIFO_SEGMENT_API_ERROR_##n = c, + foreach_ssvm_fifo_segment_api_error +#undef _ +} ssvm_fifo_segment_api_error_enum_t; + +int svm_fifo_segment_create (svm_fifo_segment_create_args_t * a); +int svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a); +void svm_fifo_segment_delete (svm_fifo_segment_private_t * s); + +svm_fifo_t *svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s, + u32 data_size_in_bytes); +void svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, + svm_fifo_t * f); + +void svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds); + +u32 svm_fifo_segment_index (svm_fifo_segment_private_t * s); + +#endif /* __included_ssvm_fifo_segment_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/test_svm_fifo1.c b/src/svm/test_svm_fifo1.c new file mode 100644 index 00000000..355653df --- /dev/null +++ b/src/svm/test_svm_fifo1.c @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svm_fifo_segment.h" + +clib_error_t * +hello_world (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + int rv; + u8 *test_data; + u8 *retrieved_data = 0; + clib_error_t *error = 0; + int pid = getpid (); + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + + f = svm_fifo_segment_alloc_fifo (sp, 4096); + + if (f == 0) + return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed"); + + test_data = format (0, "Hello world%c", 0); + vec_validate (retrieved_data, vec_len (test_data) - 1); + + while (svm_fifo_max_enqueue (f) >= vec_len (test_data)) + svm_fifo_enqueue_nowait (f, pid, vec_len (test_data), test_data); + + while (svm_fifo_max_dequeue (f) >= vec_len (test_data)) + svm_fifo_dequeue_nowait (f, pid, vec_len (retrieved_data), + retrieved_data); + + while (svm_fifo_max_enqueue (f) >= vec_len (test_data)) + svm_fifo_enqueue_nowait (f, pid, vec_len (test_data), test_data); + + while (svm_fifo_max_dequeue (f) >= vec_len (test_data)) + svm_fifo_dequeue_nowait (f, pid, vec_len (retrieved_data), + retrieved_data); + + if (!memcmp (retrieved_data, test_data, vec_len (test_data))) + error = clib_error_return (0, "data test OK, got '%s'", retrieved_data); + else + error = clib_error_return (0, "data test FAIL!"); + + svm_fifo_segment_free_fifo (sp, f); + + return error; +} + +clib_error_t * +master (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + int rv; + u8 *test_data; + u8 *retrieved_data = 0; + int i; + int pid = getpid (); + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + + f = svm_fifo_segment_alloc_fifo (sp, 4096); + + if (f == 0) + return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed"); + + test_data = format (0, "Hello world%c", 0); + vec_validate (retrieved_data, vec_len (test_data) - 1); + + for (i = 0; i < 1000; i++) + svm_fifo_enqueue_nowait (f, pid, vec_len (test_data), test_data); + + return clib_error_return (0, "master (enqueue) done"); +} + +clib_error_t * +mempig (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + svm_fifo_t **flist = 0; + int rv; + int i; + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + + for (i = 0; i < 1000; i++) + { + f = svm_fifo_segment_alloc_fifo (sp, 4096); + if (f == 0) + break; + vec_add1 (flist, f); + } + + fformat (stdout, "Try #1: created %d fifos...\n", vec_len (flist)); + for (i = 0; i < vec_len (flist); i++) + { + f = flist[i]; + svm_fifo_segment_free_fifo (sp, f); + } + + _vec_len (flist) = 0; + + for (i = 0; i < 1000; i++) + { + f = svm_fifo_segment_alloc_fifo (sp, 4096); + if (f == 0) + break; + vec_add1 (flist, f); + } + + fformat (stdout, "Try #2: created %d fifos...\n", vec_len (flist)); + for (i = 0; i < vec_len (flist); i++) + { + f = flist[i]; + svm_fifo_segment_free_fifo (sp, f); + } + + return 0; +} + +clib_error_t * +offset (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + int rv; + u32 *test_data = 0; + u32 *recovered_data = 0; + int i; + int pid = getpid (); + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + + f = svm_fifo_segment_alloc_fifo (sp, 200 << 10); + + if (f == 0) + return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed"); + + for (i = 0; i < (3 * 1024); i++) + vec_add1 (test_data, i); + + /* Enqueue the first 1024 u32's */ + svm_fifo_enqueue_nowait (f, pid, 4096 /* bytes to enqueue */ , + (u8 *) test_data); + + /* Enqueue the third 1024 u32's 2048 ahead of the current tail */ + svm_fifo_enqueue_with_offset (f, pid, 4096, 4096, (u8 *) & test_data[2048]); + + /* Enqueue the second 1024 u32's at the current tail */ + svm_fifo_enqueue_nowait (f, pid, 4096 /* bytes to enqueue */ , + (u8 *) & test_data[1024]); + + vec_validate (recovered_data, (3 * 1024) - 1); + + svm_fifo_dequeue_nowait (f, pid, 3 * 4096, (u8 *) recovered_data); + + for (i = 0; i < (3 * 1024); i++) + { + if (recovered_data[i] != test_data[i]) + { + clib_warning ("[%d] expected %d recovered %d", i, + test_data[i], recovered_data[i]); + return clib_error_return (0, "offset test FAILED"); + } + } + + return clib_error_return (0, "offset test OK"); +} + +clib_error_t * +slave (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_segment_header_t *fsh; + svm_fifo_t *f; + ssvm_shared_header_t *sh; + int rv; + u8 *test_data; + u8 *retrieved_data = 0; + int pid = getpid (); + int i; + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + + rv = svm_fifo_segment_attach (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_attach returned %d", rv); + + sp = svm_fifo_get_segment (a->new_segment_index); + sh = sp->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + /* might wanna wait.. */ + f = (svm_fifo_t *) fsh->fifos[0]; + + /* Lazy bastards united */ + test_data = format (0, "Hello world%c", 0); + vec_validate (retrieved_data, vec_len (test_data) - 1); + + for (i = 0; i < 1000; i++) + { + svm_fifo_dequeue_nowait (f, pid, vec_len (retrieved_data), + retrieved_data); + if (memcmp (retrieved_data, test_data, vec_len (retrieved_data))) + return clib_error_return (0, "retrieved data incorrect, '%s'", + retrieved_data); + } + + return clib_error_return (0, "slave (dequeue) done"); +} + + +int +test_ssvm_fifo1 (unformat_input_t * input) +{ + clib_error_t *error = 0; + int verbose = 0; + int test_id = 0; + + svm_fifo_segment_init (0x200000000ULL, 20); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose %d", &verbose)) + ; + else if (unformat (input, "verbose")) + verbose = 1; + else if (unformat (input, "master")) + test_id = 1; + else if (unformat (input, "slave")) + test_id = 2; + else if (unformat (input, "mempig")) + test_id = 3; + else if (unformat (input, "offset")) + test_id = 4; + else + { + error = clib_error_create ("unknown input `%U'\n", + format_unformat_error, input); + goto out; + } + } + + switch (test_id) + { + case 0: + error = hello_world (verbose); + break; + + case 1: + error = master (verbose); + break; + + case 2: + error = slave (verbose); + break; + + case 3: + error = mempig (verbose); + break; + + case 4: + error = offset (verbose); + break; + + default: + error = clib_error_return (0, "test id %d unknown", test_id); + break; + } + +out: + if (error) + clib_error_report (error); + + return 0; +} + + + +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + int r; + + unformat_init_command_line (&i, argv); + r = test_ssvm_fifo1 (&i); + unformat_free (&i); + return r; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/uri.am b/src/uri.am new file mode 100644 index 00000000..8cdd77c6 --- /dev/null +++ b/src/uri.am @@ -0,0 +1,22 @@ +# Copyright (c) 2016 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +noinst_PROGRAMS += uri_udp_test2 uri_tcp_test + +uri_udp_test2_SOURCES = uri/uri_udp_test2.c +uri_udp_test2_LDADD = libvlibmemoryclient.la libvlibapi.la libsvm.la \ + libvppinfra.la -lpthread -lm -lrt + +uri_tcp_test_SOURCES = uri/uri_tcp_test.c +uri_tcp_test_LDADD = libvlibmemoryclient.la libvlibapi.la libsvm.la \ + libvppinfra.la -lpthread -lm -lrt diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c new file mode 100644 index 00000000..ed5a37d8 --- /dev/null +++ b/src/uri/uri_tcp_test.c @@ -0,0 +1,916 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "../vnet/session/application_interface.h" + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Satisfy external references when not linking with -lvlib */ +vlib_main_t vlib_global_main; +vlib_main_t **vlib_mains; + +typedef struct +{ + svm_fifo_t * server_rx_fifo; + svm_fifo_t * server_tx_fifo; + + u32 vpp_session_index; + u32 vpp_session_thread; +} session_t; + +typedef enum +{ + STATE_START, + STATE_READY, + STATE_DISCONNECTING, + STATE_FAILED +} connection_state_t; + +typedef struct +{ + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* The URI we're playing with */ + u8 * uri; + + /* Session pool */ + session_t * sessions; + + /* Hash table for disconnect processing */ + uword * session_index_by_vpp_handles; + + /* intermediate rx buffer */ + u8 * rx_buf; + + /* URI for slave's connect */ + u8 * connect_uri; + + u32 connected_session_index; + + int i_am_master; + + /* drop all packets */ + int drop_packets; + + /* Our event queue */ + unix_shared_memory_queue_t * our_event_queue; + + /* $$$ single thread only for the moment */ + unix_shared_memory_queue_t * vpp_event_queue; + + pid_t my_pid; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile connection_state_t state; + + /* Signal variables */ + volatile int time_to_stop; + volatile int time_to_print_stats; + + u32 configured_segment_size; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword * error_string_by_error_number; + + /* convenience */ + svm_fifo_segment_main_t * segment_main; + + u8 *connect_test_data; +} uri_tcp_test_main_t; + +uri_tcp_test_main_t uri_tcp_test_main; + +#if CLIB_DEBUG > 0 +#define NITER 10000 +#else +#define NITER 4000000 +#endif + +int +wait_for_state_change (uri_tcp_test_main_t * utm, connection_state_t state) +{ +#if CLIB_DEBUG > 0 +#define TIMEOUT 600.0 +#else +#define TIMEOUT 600.0 +#endif + + f64 timeout = clib_time_now (&utm->clib_time) + TIMEOUT; + + while (clib_time_now (&utm->clib_time) < timeout) + { + if (utm->state == state) + return 0; + if (utm->state == STATE_FAILED) + return -1; + } + clib_warning ("timeout waiting for STATE_READY"); + return -1; +} + +static void +init_error_string_table (uri_tcp_test_main_t * utm) +{ + utm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (utm->error_string_by_error_number, 99, "Misc"); +} + +static void +stop_signal (int signum) +{ + uri_tcp_test_main_t *um = &uri_tcp_test_main; + + um->time_to_stop = 1; +} + +static void +stats_signal (int signum) +{ + uri_tcp_test_main_t *um = &uri_tcp_test_main; + + um->time_to_print_stats = 1; +} + +static clib_error_t * +setup_signal_handlers (void) +{ + signal (SIGINT, stats_signal); + signal (SIGQUIT, stop_signal); + signal (SIGTERM, stop_signal); + + return 0; +} + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG"); +} + +int +connect_to_vpp (char *name) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0) + return -1; + + utm->vl_input_queue = am->shmem_hdr->vl_input_queue; + utm->my_client_index = am->my_client_index; + + return 0; +} + +static void +vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t *mp) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", + mp->segment_name); + return; + } + clib_warning ("Mapped new segment '%s' size %d", mp->segment_name, + mp->segment_size); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + session_t * session; + vl_api_disconnect_session_reply_t * rmp; + uword * p; + int rv = 0; + u64 key; + + key = (((u64)mp->session_thread_index) << 32) | (u64)mp->session_index; + + p = hash_get (utm->session_index_by_vpp_handles, key); + + if (p) + { + session = pool_elt_at_index (utm->sessions, p[0]); + hash_unset (utm->session_index_by_vpp_handles, key); + pool_put (utm->sessions, session); + } + else + { + clib_warning ("couldn't find session key %llx", key); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = rv; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&rmp); +} + +static void +vl_api_reset_session_t_handler (vl_api_reset_session_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + session_t * session; + vl_api_reset_session_reply_t * rmp; + uword * p; + int rv = 0; + u64 key; + + key = (((u64)mp->session_thread_index) << 32) | (u64)mp->session_index; + + p = hash_get(utm->session_index_by_vpp_handles, key); + + if (p) + { + session = pool_elt_at_index(utm->sessions, p[0]); + hash_unset(utm->session_index_by_vpp_handles, key); + pool_put(utm->sessions, session); + } + else + { + clib_warning("couldn't find session key %llx", key); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = rv; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&rmp); +} + +void +handle_fifo_event_connect_rx (uri_tcp_test_main_t *utm, session_fifo_event_t * e) +{ + svm_fifo_t * rx_fifo; + int n_read, bytes; + + rx_fifo = e->fifo; + + bytes = e->enqueue_length; + do + { + n_read = svm_fifo_dequeue_nowait (rx_fifo, 0, vec_len(utm->rx_buf), + utm->rx_buf); + if (n_read > 0) + bytes -= n_read; + } + while (n_read < 0 || bytes > 0); + + // bytes_to_read = svm_fifo_max_dequeue (rx_fifo); + // + // bytes_to_read = vec_len(utm->rx_buf) > bytes_to_read ? + // bytes_to_read : vec_len(utm->rx_buf); + // + // buffer_offset = 0; + // while (bytes_to_read > 0) + // { + // rv = svm_fifo_dequeue_nowait2 (rx_fifo, mypid, + // bytes_to_read, + // utm->rx_buf + buffer_offset); + // if (rv > 0) + // { + // bytes_to_read -= rv; + // buffer_offset += rv; + // bytes_received += rv; + // } + // } + + + // while (bytes_received < bytes_sent) + // { + // rv = svm_fifo_dequeue_nowait2 (rx_fifo, mypid, + // vec_len (utm->rx_buf), + // utm->rx_buf); + // if (rv > 0) + // { + //#if CLIB_DEBUG > 0 + // int j; + // for (j = 0; j < rv; j++) + // { + // if (utm->rx_buf[j] != ((bytes_received + j) & 0xff)) + // { + // clib_warning ("error at byte %lld, 0x%x not 0x%x", + // bytes_received + j, + // utm->rx_buf[j], + // ((bytes_received + j )&0xff)); + // } + // } + //#endif + // bytes_received += (u64) rv; + // } + // } +} + +void +handle_connect_event_queue (uri_tcp_test_main_t * utm) +{ + session_fifo_event_t _e, *e = &_e;; + + unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e, 0 /* nowait */); + switch (e->event_type) + { + case FIFO_EVENT_SERVER_RX: + handle_fifo_event_connect_rx (utm, e); + break; + + case FIFO_EVENT_SERVER_EXIT: + return; + + default: + clib_warning("unknown event type %d", e->event_type); + break; + } +} + +void +uri_tcp_connect_send (uri_tcp_test_main_t *utm) +{ + u8 *test_data = utm->connect_test_data; + u64 bytes_sent = 0; + int rv; + int mypid = getpid(); + session_t * session; + svm_fifo_t *tx_fifo; + int buffer_offset, bytes_to_send = 0; + session_fifo_event_t evt; + static int serial_number = 0; + int i; + u32 max_chunk = 64 << 10, write; + + session = pool_elt_at_index (utm->sessions, utm->connected_session_index); + tx_fifo = session->server_tx_fifo; + + vec_validate (utm->rx_buf, vec_len (test_data) - 1); + + for (i = 0; i < 10; i++) + { + bytes_to_send = vec_len (test_data); + buffer_offset = 0; + while (bytes_to_send > 0) + { + write = bytes_to_send > max_chunk ? max_chunk : bytes_to_send; + rv = svm_fifo_enqueue_nowait (tx_fifo, mypid, write, + test_data + buffer_offset); + + if (rv > 0) + { + bytes_to_send -= rv; + buffer_offset += rv; + bytes_sent += rv; + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = rv; + evt.event_id = serial_number++; + + unix_shared_memory_queue_add (utm->vpp_event_queue, (u8 *) &evt, + 0 /* do wait for mutex */); + } + } + } +} + +static void +uri_tcp_client_test (uri_tcp_test_main_t * utm) +{ + vl_api_connect_uri_t * cmp; + vl_api_disconnect_session_t *dmp; + session_t *connected_session; + int i; + + cmp = vl_msg_api_alloc (sizeof (*cmp)); + memset (cmp, 0, sizeof (*cmp)); + + cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI); + cmp->client_index = utm->my_client_index; + cmp->context = ntohl(0xfeedface); + memcpy (cmp->uri, utm->connect_uri, vec_len (utm->connect_uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&cmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + return; + } + + /* Init test data */ + vec_validate (utm->connect_test_data, 64 * 1024 - 1); + for (i = 0; i < vec_len (utm->connect_test_data); i++) + utm->connect_test_data[i] = i & 0xff; + + /* Start reader thread */ + /* handle_connect_event_queue (utm); */ + + /* Start send */ + uri_tcp_connect_send (utm); + + /* Disconnect */ + connected_session = pool_elt_at_index(utm->sessions, + utm->connected_session_index); + dmp = vl_msg_api_alloc (sizeof (*dmp)); + memset (dmp, 0, sizeof (*dmp)); + dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION); + dmp->client_index = utm->my_client_index; + dmp->session_index = connected_session->vpp_session_index; + dmp->session_thread_index = connected_session->vpp_session_thread; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&dmp); +} + +void +handle_fifo_event_server_rx (uri_tcp_test_main_t *utm, session_fifo_event_t * e) +{ + svm_fifo_t * rx_fifo, * tx_fifo; + int n_read; + + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + int rv, bytes; + + rx_fifo = e->fifo; + tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo; + + bytes = e->enqueue_length; + do + { + n_read = svm_fifo_dequeue_nowait (rx_fifo, 0, vec_len(utm->rx_buf), + utm->rx_buf); + + /* Reflect if a non-drop session */ + if (!utm->drop_packets && n_read > 0) + { + do + { + rv = svm_fifo_enqueue_nowait (tx_fifo, 0, n_read, utm->rx_buf); + } + while (rv == -2); + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = n_read; + evt.event_id = e->event_id; + q = utm->vpp_event_queue; + unix_shared_memory_queue_add (q, (u8 *) &evt, 0 /* do wait for mutex */); + } + + if (n_read > 0) + bytes -= n_read; + } + while (n_read < 0 || bytes > 0); +} + +void +handle_event_queue (uri_tcp_test_main_t * utm) +{ + session_fifo_event_t _e, *e = &_e;; + + while (1) + { + unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *)e, + 0 /* nowait */); + switch (e->event_type) + { + case FIFO_EVENT_SERVER_RX: + handle_fifo_event_server_rx (utm, e); + break; + + case FIFO_EVENT_SERVER_EXIT: + return; + + default: + clib_warning ("unknown event type %d", e->event_type); + break; + } + if (PREDICT_FALSE(utm->time_to_stop == 1)) + break; + if (PREDICT_FALSE(utm->time_to_print_stats == 1)) + { + utm->time_to_print_stats = 0; + fformat(stdout, "%d connections\n", pool_elts (utm->sessions)); + } + } +} + +static void +vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + if (mp->retval) + { + clib_warning("bind failed: %d", mp->retval); + return; + } + + if (mp->segment_name_length == 0) + { + clib_warning("segment_name_length zero"); + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + ASSERT(mp->server_event_queue_address); + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning("svm_fifo_segment_attach ('%s') failed", mp->segment_name); + return; + } + + utm->our_event_queue = + (unix_shared_memory_queue_t *) mp->server_event_queue_address; + + utm->state = STATE_READY; +} + +static void +vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + session_t *session; + u32 session_index; + svm_fifo_t *rx_fifo, *tx_fifo; + int rv; + + if (mp->retval) + { + clib_warning ("connection failed with code: %d", mp->retval); + utm->state = STATE_FAILED; + return; + } + /* + * Attatch to segment + */ + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + utm->state = STATE_FAILED; + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + ASSERT(mp->client_event_queue_address); + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", + mp->segment_name); + return; + } + + /* + * Save the queues + */ + + utm->our_event_queue = (unix_shared_memory_queue_t *) + mp->client_event_queue_address; + + utm->vpp_event_queue = (unix_shared_memory_queue_t *) + mp->vpp_event_queue_address; + + /* + * Setup session + */ + + pool_get (utm->sessions, session); + session_index = session - utm->sessions; + + rx_fifo = (svm_fifo_t *)mp->server_rx_fifo; + rx_fifo->client_session_index = session_index; + tx_fifo = (svm_fifo_t *)mp->server_tx_fifo; + tx_fifo->client_session_index = session_index; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + session->vpp_session_index = mp->session_index; + session->vpp_session_thread = mp->session_thread_index; + + /* Save handle */ + utm->connected_session_index = session_index; + + utm->state = STATE_READY; +} + +void +uri_tcp_bind (uri_tcp_test_main_t *utm) +{ + vl_api_bind_uri_t * bmp; + u32 fifo_size = 3 << 20; + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_URI); + bmp->client_index = utm->my_client_index; + bmp->context = ntohl(0xfeedface); + bmp->initial_segment_size = 256<<20; /* size of initial segment */ + bmp->options[SESSION_OPTIONS_FLAGS] = + SESSION_OPTIONS_FLAGS_USE_FIFO | SESSION_OPTIONS_FLAGS_ADD_SEGMENT; + bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = fifo_size; + bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = fifo_size; + bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = 128<<20; + memcpy (bmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&bmp); +} + +static void +vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t *mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + + if (mp->retval != 0) + clib_warning ("returned %d", ntohl(mp->retval)); + + utm->state = STATE_START; +} + +void +uri_tcp_unbind (uri_tcp_test_main_t *utm) +{ + vl_api_unbind_uri_t * ump; + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI); + ump->client_index = utm->my_client_index; + memcpy (ump->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&ump); +} + +static void +vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + vl_api_accept_session_reply_t *rmp; + svm_fifo_t * rx_fifo, * tx_fifo; + session_t * session; + static f64 start_time; + u64 key; + u32 session_index; + + if (start_time == 0.0) + start_time = clib_time_now (&utm->clib_time); + + utm->vpp_event_queue = (unix_shared_memory_queue_t *) + mp->vpp_event_queue_address; + + /* Allocate local session and set it up */ + pool_get (utm->sessions, session); + session_index = session - utm->sessions; + + rx_fifo = (svm_fifo_t *)mp->server_rx_fifo; + rx_fifo->client_session_index = session_index; + tx_fifo = (svm_fifo_t *)mp->server_tx_fifo; + tx_fifo->client_session_index = session_index; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + + /* Add it to lookup table */ + key = (((u64)mp->session_thread_index) << 32) | (u64)mp->session_index; + hash_set (utm->session_index_by_vpp_handles, key, session_index); + + utm->state = STATE_READY; + + /* Stats printing */ + if (pool_elts (utm->sessions) && (pool_elts(utm->sessions) % 20000) == 0) + { + f64 now = clib_time_now (&utm->clib_time); + fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n", + pool_elts(utm->sessions), now - start_time, + (f64)pool_elts(utm->sessions) / (now - start_time)); + } + + /* Send accept reply to vpp */ + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY); + rmp->session_type = mp->session_type; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&rmp); +} + +void +uri_tcp_server_test (uri_tcp_test_main_t * utm) +{ + + /* Bind to uri */ + uri_tcp_bind (utm); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + /* Enter handle event loop */ + handle_event_queue (utm); + + /* Cleanup */ + uri_tcp_unbind (utm); + + if (wait_for_state_change (utm, STATE_START)) + { + clib_warning ("timeout waiting for STATE_START"); + return; + } + + fformat (stdout, "Test complete...\n"); +} + +#define foreach_uri_msg \ +_(BIND_URI_REPLY, bind_uri_reply) \ +_(UNBIND_URI_REPLY, unbind_uri_reply) \ +_(ACCEPT_SESSION, accept_session) \ +_(CONNECT_URI_REPLY, connect_uri_reply) \ +_(DISCONNECT_SESSION, disconnect_session) \ +_(RESET_SESSION, reset_session) \ +_(MAP_ANOTHER_SEGMENT, map_another_segment) + +void +uri_api_hookup (uri_tcp_test_main_t * utm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_uri_msg; +#undef _ +} + +int +main (int argc, char **argv) +{ + uri_tcp_test_main_t *utm = &uri_tcp_test_main; + unformat_input_t _argv, *a = &_argv; + u8 *chroot_prefix; + u8 *heap; + u8 * bind_name = (u8 *) "tcp://0.0.0.0/1234"; + u32 tmp; + mheap_t *h; + session_t * session; + int i; + int i_am_master = 1, drop_packets = 0; + + clib_mem_init (0, 256 << 20); + + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + vec_validate (utm->rx_buf, 65536); + + utm->session_index_by_vpp_handles = + hash_create (0, sizeof(uword)); + + utm->my_pid = getpid(); + utm->configured_segment_size = 1<<20; + + clib_time_init (&utm->clib_time); + init_error_string_table (utm); + svm_fifo_segment_init(0x200000000ULL, 20); + unformat_init_command_line (a, argv); + + while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) + { + if (unformat (a, "chroot prefix %s", &chroot_prefix)) + { + vl_set_memory_root_path ((char *) chroot_prefix); + } + else if (unformat (a, "uri %s", &bind_name)) + ; + else if (unformat (a, "segment-size %dM", &tmp)) + utm->configured_segment_size = tmp<<20; + else if (unformat (a, "segment-size %dG", &tmp)) + utm->configured_segment_size = tmp<<30; + else if (unformat (a, "master")) + i_am_master = 1; + else if (unformat (a, "slave")) + i_am_master = 0; + else if (unformat (a, "drop")) + drop_packets = 1; + else + { + fformat (stderr, "%s: usage [master|slave]\n"); + exit (1); + } + } + + utm->uri = format (0, "%s%c", bind_name, 0); + utm->i_am_master = i_am_master; + utm->segment_main = &svm_fifo_segment_main; + utm->drop_packets = drop_packets; + + utm->connect_uri = format (0, "tcp://6.0.1.2/1234%c", 0); + + setup_signal_handlers(); + uri_api_hookup (utm); + + if (connect_to_vpp (i_am_master? "uri_tcp_server":"uri_tcp_client") < 0) + { + svm_region_exit (); + fformat (stderr, "Couldn't connect to vpe, exiting...\n"); + exit (1); + } + + if (i_am_master == 0) + { + uri_tcp_client_test (utm); + exit (0); + } + + /* $$$$ hack preallocation */ + for (i = 0; i < 200000; i++) + { + pool_get (utm->sessions, session); + memset (session, 0, sizeof (*session)); + } + for (i = 0; i < 200000; i++) + pool_put_index (utm->sessions, i); + + uri_tcp_server_test (utm); + + vl_client_disconnect_from_vlib (); + exit (0); +} diff --git a/src/uri/uri_udp_test.c b/src/uri/uri_udp_test.c new file mode 100644 index 00000000..6f5284c9 --- /dev/null +++ b/src/uri/uri_udp_test.c @@ -0,0 +1,553 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Satisfy external references when not linking with -lvlib */ +vlib_main_t vlib_global_main; +vlib_main_t **vlib_mains; + +typedef enum +{ + STATE_START, + STATE_READY, + STATE_DISCONNECTING, +} connection_state_t; + +typedef struct +{ + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; +} session_t; + +typedef struct +{ + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* The URI we're playing with */ + u8 *uri; + + /* Session pool */ + session_t *sessions; + + /* Hash table for disconnect processing */ + uword *session_index_by_vpp_handles; + + /* fifo segment */ + svm_fifo_segment_private_t *seg; + + /* intermediate rx buffer */ + u8 *rx_buf; + + /* Our event queue */ + unix_shared_memory_queue_t *our_event_queue; + + /* $$$ single thread only for the moment */ + unix_shared_memory_queue_t *vpp_event_queue; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile connection_state_t state; + + volatile int time_to_stop; + volatile int time_to_print_stats; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword *error_string_by_error_number; +} uri_udp_test_main_t; + +#if CLIB_DEBUG > 0 +#define NITER 1000 +#else +#define NITER 1000000 +#endif + +uri_udp_test_main_t uri_udp_test_main; + +static void +stop_signal (int signum) +{ + uri_udp_test_main_t *um = &uri_udp_test_main; + + um->time_to_stop = 1; +} + +static void +stats_signal (int signum) +{ + uri_udp_test_main_t *um = &uri_udp_test_main; + + um->time_to_print_stats = 1; +} + +static clib_error_t * +setup_signal_handlers (void) +{ + signal (SIGINT, stats_signal); + signal (SIGQUIT, stop_signal); + signal (SIGTERM, stop_signal); + + return 0; +} + +u8 * +format_api_error (u8 * s, va_list * args) +{ + uri_udp_test_main_t *utm = va_arg (*args, uri_udp_test_main_t *); + i32 error = va_arg (*args, u32); + uword *p; + + p = hash_get (utm->error_string_by_error_number, -error); + + if (p) + s = format (s, "%s", p[0]); + else + s = format (s, "%d", error); + return s; +} + +int +wait_for_state_change (uri_udp_test_main_t * utm, connection_state_t state) +{ + f64 timeout = clib_time_now (&utm->clib_time) + 5.0; + + while (clib_time_now (&utm->clib_time) < timeout) + { + if (utm->state == state) + return 0; + } + return -1; +} + +static void +vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + a->segment_name = (char *) mp->segment_name; + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%s') failed", mp->segment_name); + return; + } + + utm->our_event_queue = (unix_shared_memory_queue_t *) + mp->server_event_queue_address; + + utm->state = STATE_READY; +} + +static void +vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + + if (mp->retval != 0) + clib_warning ("returned %d", ntohl (mp->retval)); + + utm->state = STATE_START; +} + +static void +vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + vl_api_accept_session_reply_t *rmp; + svm_fifo_t *rx_fifo, *tx_fifo; + session_t *session; + static f64 start_time; + u64 key; + + if (start_time == 0.0) + start_time = clib_time_now (&utm->clib_time); + + utm->vpp_event_queue = (unix_shared_memory_queue_t *) + mp->vpp_event_queue_address; + + pool_get (utm->sessions, session); + + rx_fifo = (svm_fifo_t *) mp->server_rx_fifo; + rx_fifo->client_session_index = session - utm->sessions; + tx_fifo = (svm_fifo_t *) mp->server_tx_fifo; + tx_fifo->client_session_index = session - utm->sessions; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + + key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index; + + hash_set (utm->session_index_by_vpp_handles, key, session - utm->sessions); + + utm->state = STATE_READY; + + if (pool_elts (utm->sessions) && (pool_elts (utm->sessions) % 20000) == 0) + { + f64 now = clib_time_now (&utm->clib_time); + fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n", + pool_elts (utm->sessions), now - start_time, + (f64) pool_elts (utm->sessions) / (now - start_time)); + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY); + rmp->session_type = mp->session_type; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + session_t *session; + vl_api_disconnect_session_reply_t *rmp; + uword *p; + int rv = 0; + u64 key; + + key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index; + + p = hash_get (utm->session_index_by_vpp_handles, key); + + if (p) + { + session = pool_elt_at_index (utm->sessions, p[0]); + hash_unset (utm->session_index_by_vpp_handles, key); + pool_put (utm->sessions, session); + } + else + { + clib_warning ("couldn't find session key %llx", key); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = rv; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp); +} + +#define foreach_uri_msg \ +_(BIND_URI_REPLY, bind_uri_reply) \ +_(UNBIND_URI_REPLY, unbind_uri_reply) \ +_(ACCEPT_SESSION, accept_session) \ +_(DISCONNECT_SESSION, disconnect_session) + +void +uri_api_hookup (uri_udp_test_main_t * utm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_uri_msg; +#undef _ + +} + + +int +connect_to_vpp (char *name) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0) + return -1; + + utm->vl_input_queue = am->shmem_hdr->vl_input_queue; + utm->my_client_index = am->my_client_index; + + return 0; +} + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG"); +} + +static void +init_error_string_table (uri_udp_test_main_t * utm) +{ + utm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (utm->error_string_by_error_number, 99, "Misc"); +} + +void +handle_fifo_event_server_rx (uri_udp_test_main_t * utm, + session_fifo_event_t * e) +{ + svm_fifo_t *rx_fifo, *tx_fifo; + int nbytes; + + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + int rv; + + rx_fifo = e->fifo; + tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo; + + do + { + nbytes = svm_fifo_dequeue_nowait (rx_fifo, 0, + vec_len (utm->rx_buf), utm->rx_buf); + } + while (nbytes <= 0); + do + { + rv = svm_fifo_enqueue_nowait (tx_fifo, 0, nbytes, utm->rx_buf); + } + while (rv == -2); + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = nbytes; + evt.event_id = e->event_id; + q = utm->vpp_event_queue; + unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* do wait for mutex */ ); +} + +void +handle_event_queue (uri_udp_test_main_t * utm) +{ + session_fifo_event_t _e, *e = &_e;; + + while (1) + { + unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e, + 0 /* nowait */ ); + switch (e->event_type) + { + case FIFO_EVENT_SERVER_RX: + handle_fifo_event_server_rx (utm, e); + break; + + case FIFO_EVENT_SERVER_EXIT: + return; + + default: + clib_warning ("unknown event type %d", e->event_type); + break; + } + if (PREDICT_FALSE (utm->time_to_stop == 1)) + break; + if (PREDICT_FALSE (utm->time_to_print_stats == 1)) + { + utm->time_to_print_stats = 0; + fformat (stdout, "%d connections\n", pool_elts (utm->sessions)); + } + } +} + +void +uri_udp_test (uri_udp_test_main_t * utm) +{ + vl_api_bind_uri_t *bmp; + vl_api_unbind_uri_t *ump; + + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_URI); + bmp->client_index = utm->my_client_index; + bmp->context = ntohl (0xfeedface); + bmp->segment_size = 2 << 30; + memcpy (bmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + handle_event_queue (utm); + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI); + ump->client_index = utm->my_client_index; + memcpy (ump->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump); + + if (wait_for_state_change (utm, STATE_START)) + { + clib_warning ("timeout waiting for STATE_START"); + return; + } + + fformat (stdout, "Test complete...\n"); +} + +int +main (int argc, char **argv) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + unformat_input_t _argv, *a = &_argv; + u8 *chroot_prefix; + u8 *heap; + u8 *bind_name = (u8 *) "udp4:1234"; + mheap_t *h; + session_t *session; + int i; + + clib_mem_init (0, 256 << 20); + + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + vec_validate (utm->rx_buf, 8192); + + utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword)); + + clib_time_init (&utm->clib_time); + init_error_string_table (utm); + svm_fifo_segment_init (0x200000000ULL, 20); + unformat_init_command_line (a, argv); + + while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) + { + if (unformat (a, "chroot prefix %s", &chroot_prefix)) + { + vl_set_memory_root_path ((char *) chroot_prefix); + } + else if (unformat (a, "uri %s", &bind_name)) + ; + else + { + fformat (stderr, "%s: usage [master|slave]\n"); + exit (1); + } + } + + utm->uri = format (0, "%s%c", bind_name, 0); + + setup_signal_handlers (); + + uri_api_hookup (utm); + + if (connect_to_vpp ("uri_udp_test") < 0) + { + svm_region_exit (); + fformat (stderr, "Couldn't connect to vpe, exiting...\n"); + exit (1); + } + + /* $$$$ hack preallocation */ + for (i = 0; i < 200000; i++) + { + pool_get (utm->sessions, session); + memset (session, 0, sizeof (*session)); + } + for (i = 0; i < 200000; i++) + pool_put_index (utm->sessions, i); + + uri_udp_test (utm); + + vl_client_disconnect_from_vlib (); + exit (0); +} + +#undef vl_api_version +#define vl_api_version(n,v) static u32 vpe_api_version = v; +#include +#undef vl_api_version + +void +vl_client_add_api_signatures (vl_api_memclnt_create_t * mp) +{ + /* + * Send the main API signature in slot 0. This bit of code must + * match the checks in ../vpe/api/api.c: vl_msg_api_version_check(). + */ + mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/uri/uri_udp_test2.c b/src/uri/uri_udp_test2.c new file mode 100644 index 00000000..ddfffaa6 --- /dev/null +++ b/src/uri/uri_udp_test2.c @@ -0,0 +1,954 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../vnet/session/application_interface.h" + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Satisfy external references when not linking with -lvlib */ +vlib_main_t vlib_global_main; +vlib_main_t **vlib_mains; + +typedef enum +{ + STATE_START, + STATE_READY, + STATE_DISCONNECTING, +} connection_state_t; + +typedef struct +{ + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; +} session_t; + +typedef struct +{ + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* The URI we're playing with */ + u8 *uri; + + /* Session pool */ + session_t *sessions; + + /* Hash table for disconnect processing */ + uword *session_index_by_vpp_handles; + + /* fifo segment */ + svm_fifo_segment_private_t *seg; + + /* intermediate rx buffer */ + u8 *rx_buf; + + /* URI for connect */ + u8 *connect_uri; + + int i_am_master; + + /* Our event queue */ + unix_shared_memory_queue_t *our_event_queue; + + /* $$$ single thread only for the moment */ + unix_shared_memory_queue_t *vpp_event_queue; + + /* $$$$ hack: cut-through session index */ + volatile u32 cut_through_session_index; + + /* unique segment name counter */ + u32 unique_segment_index; + + pid_t my_pid; + + /* pthread handle */ + pthread_t cut_through_thread_handle; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile connection_state_t state; + + volatile int time_to_stop; + volatile int time_to_print_stats; + + u32 configured_segment_size; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword *error_string_by_error_number; + + /* convenience */ + svm_fifo_segment_main_t *segment_main; + +} uri_udp_test_main_t; + +#if CLIB_DEBUG > 0 +#define NITER 10000 +#else +#define NITER 4000000 +#endif + +uri_udp_test_main_t uri_udp_test_main; + +static void +stop_signal (int signum) +{ + uri_udp_test_main_t *um = &uri_udp_test_main; + + um->time_to_stop = 1; +} + +static void +stats_signal (int signum) +{ + uri_udp_test_main_t *um = &uri_udp_test_main; + + um->time_to_print_stats = 1; +} + +static clib_error_t * +setup_signal_handlers (void) +{ + signal (SIGINT, stats_signal); + signal (SIGQUIT, stop_signal); + signal (SIGTERM, stop_signal); + + return 0; +} + +u8 * +format_api_error (u8 * s, va_list * args) +{ + uri_udp_test_main_t *utm = va_arg (*args, uri_udp_test_main_t *); + i32 error = va_arg (*args, u32); + uword *p; + + p = hash_get (utm->error_string_by_error_number, -error); + + if (p) + s = format (s, "%s", p[0]); + else + s = format (s, "%d", error); + return s; +} + +int +wait_for_state_change (uri_udp_test_main_t * utm, connection_state_t state) +{ +#if CLIB_DEBUG > 0 +#define TIMEOUT 600.0 +#else +#define TIMEOUT 600.0 +#endif + + f64 timeout = clib_time_now (&utm->clib_time) + TIMEOUT; + + while (clib_time_now (&utm->clib_time) < timeout) + { + if (utm->state == state) + return 0; + } + return -1; +} + +u64 server_bytes_received, server_bytes_sent; + +static void * +cut_through_thread_fn (void *arg) +{ + session_t *s; + svm_fifo_t *rx_fifo; + svm_fifo_t *tx_fifo; + u8 *my_copy_buffer = 0; + uri_udp_test_main_t *utm = &uri_udp_test_main; + i32 actual_transfer; + int rv; + u32 buffer_offset; + + while (utm->cut_through_session_index == ~0) + ; + + s = pool_elt_at_index (utm->sessions, utm->cut_through_session_index); + + rx_fifo = s->server_rx_fifo; + tx_fifo = s->server_tx_fifo; + + vec_validate (my_copy_buffer, 64 * 1024 - 1); + + while (true) + { + /* We read from the tx fifo and write to the rx fifo */ + do + { + actual_transfer = svm_fifo_dequeue_nowait (tx_fifo, 0, + vec_len (my_copy_buffer), + my_copy_buffer); + } + while (actual_transfer <= 0); + + server_bytes_received += actual_transfer; + + buffer_offset = 0; + while (actual_transfer > 0) + { + rv = svm_fifo_enqueue_nowait (rx_fifo, 0, actual_transfer, + my_copy_buffer + buffer_offset); + if (rv > 0) + { + actual_transfer -= rv; + buffer_offset += rv; + server_bytes_sent += rv; + } + + } + if (PREDICT_FALSE (utm->time_to_stop)) + break; + } + + pthread_exit (0); +} + +static void +uri_udp_slave_test (uri_udp_test_main_t * utm) +{ + vl_api_connect_uri_t *cmp; + int i; + u8 *test_data = 0; + u64 bytes_received = 0, bytes_sent = 0; + i32 bytes_to_read; + int rv; + int mypid = getpid (); + f64 before, after, delta, bytes_per_second; + session_t *session; + svm_fifo_t *rx_fifo, *tx_fifo; + int buffer_offset, bytes_to_send = 0; + + vec_validate (test_data, 64 * 1024 - 1); + for (i = 0; i < vec_len (test_data); i++) + test_data[i] = i & 0xff; + + cmp = vl_msg_api_alloc (sizeof (*cmp)); + memset (cmp, 0, sizeof (*cmp)); + + cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI); + cmp->client_index = utm->my_client_index; + cmp->context = ntohl (0xfeedface); + memcpy (cmp->uri, utm->connect_uri, vec_len (utm->connect_uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & cmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + session = pool_elt_at_index (utm->sessions, utm->cut_through_session_index); + rx_fifo = session->server_rx_fifo; + tx_fifo = session->server_tx_fifo; + + before = clib_time_now (&utm->clib_time); + + vec_validate (utm->rx_buf, vec_len (test_data) - 1); + + for (i = 0; i < NITER; i++) + { + bytes_to_send = vec_len (test_data); + buffer_offset = 0; + while (bytes_to_send > 0) + { + rv = svm_fifo_enqueue_nowait (tx_fifo, mypid, + bytes_to_send, + test_data + buffer_offset); + + if (rv > 0) + { + bytes_to_send -= rv; + buffer_offset += rv; + bytes_sent += rv; + } + } + + bytes_to_read = svm_fifo_max_dequeue (rx_fifo); + + bytes_to_read = vec_len (utm->rx_buf) > bytes_to_read ? + bytes_to_read : vec_len (utm->rx_buf); + + buffer_offset = 0; + while (bytes_to_read > 0) + { + rv = svm_fifo_dequeue_nowait (rx_fifo, mypid, + bytes_to_read, + utm->rx_buf + buffer_offset); + if (rv > 0) + { + bytes_to_read -= rv; + buffer_offset += rv; + bytes_received += rv; + } + } + } + while (bytes_received < bytes_sent) + { + rv = svm_fifo_dequeue_nowait (rx_fifo, mypid, + vec_len (utm->rx_buf), utm->rx_buf); + if (rv > 0) + { +#if CLIB_DEBUG > 0 + int j; + for (j = 0; j < rv; j++) + { + if (utm->rx_buf[j] != ((bytes_received + j) & 0xff)) + { + clib_warning ("error at byte %lld, 0x%x not 0x%x", + bytes_received + j, + utm->rx_buf[j], + ((bytes_received + j) & 0xff)); + } + } +#endif + bytes_received += (u64) rv; + } + } + + after = clib_time_now (&utm->clib_time); + delta = after - before; + bytes_per_second = 0.0; + + if (delta > 0.0) + bytes_per_second = (f64) bytes_received / delta; + + fformat (stdout, + "Done: %lld recv bytes in %.2f seconds, %.2f bytes/sec...\n\n", + bytes_received, delta, bytes_per_second); + fformat (stdout, + "Done: %lld sent bytes in %.2f seconds, %.2f bytes/sec...\n\n", + bytes_sent, delta, bytes_per_second); + fformat (stdout, + "client -> server -> client round trip: %.2f Gbit/sec \n\n", + (bytes_per_second * 8.0) / 1e9); +} + +static void +vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + ASSERT (mp->server_event_queue_address); + + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", + mp->segment_name); + return; + } + + utm->our_event_queue = (unix_shared_memory_queue_t *) + mp->server_event_queue_address; + + utm->state = STATE_READY; +} + +static void +vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + /* Attach to the segment vpp created */ + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", + mp->segment_name); + return; + } + clib_warning ("Mapped new segment '%s' size %d", mp->segment_name, + mp->segment_size); +} + +static void +vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp) +{ + u32 segment_index; + uri_udp_test_main_t *utm = &uri_udp_test_main; + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *seg; + unix_shared_memory_queue_t *client_q; + vl_api_connect_uri_reply_t *rmp; + session_t *session; + int rv = 0; + + /* Create the segment */ + a->segment_name = (char *) format (0, "%d:segment%d%c", utm->my_pid, + utm->unique_segment_index++, 0); + a->segment_size = utm->configured_segment_size; + + rv = svm_fifo_segment_create (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%s') failed", a->segment_name); + rv = VNET_API_ERROR_URI_FIFO_CREATE_FAILED; + goto send_reply; + } + + vec_add2 (utm->seg, seg, 1); + + segment_index = vec_len (sm->segments) - 1; + + memcpy (seg, sm->segments + segment_index, sizeof (utm->seg[0])); + + pool_get (utm->sessions, session); + + /* + * By construction the master's idea of the rx fifo ends up in + * fsh->fifos[0], and the master's idea of the tx fifo ends up in + * fsh->fifos[1]. + */ + session->server_rx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, + 128 * 1024); + ASSERT (session->server_rx_fifo); + + session->server_tx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, + 128 * 1024); + ASSERT (session->server_tx_fifo); + + session->server_rx_fifo->server_session_index = session - utm->sessions; + session->server_tx_fifo->server_session_index = session - utm->sessions; + utm->cut_through_session_index = session - utm->sessions; + + rv = pthread_create (&utm->cut_through_thread_handle, + NULL /*attr */ , cut_through_thread_fn, 0); + if (rv) + { + clib_warning ("pthread_create returned %d", rv); + rv = VNET_API_ERROR_SYSCALL_ERROR_1; + } + +send_reply: + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + + rmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI_REPLY); + rmp->context = mp->context; + rmp->retval = ntohl (rv); + rmp->segment_name_length = vec_len (a->segment_name); + memcpy (rmp->segment_name, a->segment_name, vec_len (a->segment_name)); + + vec_free (a->segment_name); + + client_q = (unix_shared_memory_queue_t *) mp->client_queue_address; + vl_msg_api_send_shmem (client_q, (u8 *) & rmp); +} + +static void +vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + + if (mp->retval != 0) + clib_warning ("returned %d", ntohl (mp->retval)); + + utm->state = STATE_START; +} + +static void +vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + vl_api_accept_session_reply_t *rmp; + svm_fifo_t *rx_fifo, *tx_fifo; + session_t *session; + static f64 start_time; + u64 key; + + if (start_time == 0.0) + start_time = clib_time_now (&utm->clib_time); + + utm->vpp_event_queue = (unix_shared_memory_queue_t *) + mp->vpp_event_queue_address; + + pool_get (utm->sessions, session); + + rx_fifo = (svm_fifo_t *) mp->server_rx_fifo; + rx_fifo->client_session_index = session - utm->sessions; + tx_fifo = (svm_fifo_t *) mp->server_tx_fifo; + tx_fifo->client_session_index = session - utm->sessions; + + session->server_rx_fifo = rx_fifo; + session->server_tx_fifo = tx_fifo; + + key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index; + + hash_set (utm->session_index_by_vpp_handles, key, session - utm->sessions); + + utm->state = STATE_READY; + + if (pool_elts (utm->sessions) && (pool_elts (utm->sessions) % 20000) == 0) + { + f64 now = clib_time_now (&utm->clib_time); + fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n", + pool_elts (utm->sessions), now - start_time, + (f64) pool_elts (utm->sessions) / (now - start_time)); + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY); + rmp->session_type = mp->session_type; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + session_t *session; + vl_api_disconnect_session_reply_t *rmp; + uword *p; + int rv = 0; + u64 key; + + key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index; + + p = hash_get (utm->session_index_by_vpp_handles, key); + + if (p) + { + session = pool_elt_at_index (utm->sessions, p[0]); + hash_unset (utm->session_index_by_vpp_handles, key); + pool_put (utm->sessions, session); + } + else + { + clib_warning ("couldn't find session key %llx", key); + rv = -11; + } + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->retval = rv; + rmp->session_index = mp->session_index; + rmp->session_thread_index = mp->session_thread_index; + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp); +} + +static void +vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + uri_udp_test_main_t *utm = &uri_udp_test_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + ssvm_shared_header_t *sh; + svm_fifo_segment_private_t *seg; + svm_fifo_segment_header_t *fsh; + session_t *session; + u32 segment_index; + int rv; + + ASSERT (utm->i_am_master == 0); + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + memset (a, 0, sizeof (*a)); + + a->segment_name = (char *) mp->segment_name; + + sleep (1); + + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%v') failed", mp->segment_name); + return; + } + + segment_index = vec_len (sm->segments) - 1; + + vec_add2 (utm->seg, seg, 1); + + memcpy (seg, sm->segments + segment_index, sizeof (*seg)); + sh = seg->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + while (vec_len (fsh->fifos) < 2) + sleep (1); + + pool_get (utm->sessions, session); + utm->cut_through_session_index = session - utm->sessions; + + session->server_rx_fifo = (svm_fifo_t *) fsh->fifos[0]; + ASSERT (session->server_rx_fifo); + session->server_tx_fifo = (svm_fifo_t *) fsh->fifos[1]; + ASSERT (session->server_tx_fifo); + + /* security: could unlink /dev/shm/segment_name> here, maybe */ + + utm->state = STATE_READY; +} + +#define foreach_uri_msg \ +_(BIND_URI_REPLY, bind_uri_reply) \ +_(CONNECT_URI, connect_uri) \ +_(CONNECT_URI_REPLY, connect_uri_reply) \ +_(UNBIND_URI_REPLY, unbind_uri_reply) \ +_(ACCEPT_SESSION, accept_session) \ +_(DISCONNECT_SESSION, disconnect_session) \ +_(MAP_ANOTHER_SEGMENT, map_another_segment) + +void +uri_api_hookup (uri_udp_test_main_t * utm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_uri_msg; +#undef _ + +} + + +int +connect_to_vpp (char *name) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0) + return -1; + + utm->vl_input_queue = am->shmem_hdr->vl_input_queue; + utm->my_client_index = am->my_client_index; + + return 0; +} + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG"); +} + +static void +init_error_string_table (uri_udp_test_main_t * utm) +{ + utm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (utm->error_string_by_error_number, 99, "Misc"); +} + +void +handle_fifo_event_server_rx (uri_udp_test_main_t * utm, + session_fifo_event_t * e) +{ + svm_fifo_t *rx_fifo, *tx_fifo; + int nbytes; + + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + int rv; + + rx_fifo = e->fifo; + tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo; + + do + { + nbytes = svm_fifo_dequeue_nowait (rx_fifo, 0, + vec_len (utm->rx_buf), utm->rx_buf); + } + while (nbytes <= 0); + do + { + rv = svm_fifo_enqueue_nowait (tx_fifo, 0, nbytes, utm->rx_buf); + } + while (rv == -2); + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = nbytes; + evt.event_id = e->event_id; + q = utm->vpp_event_queue; + unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* do wait for mutex */ ); +} + +void +handle_event_queue (uri_udp_test_main_t * utm) +{ + session_fifo_event_t _e, *e = &_e;; + + while (1) + { + unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e, + 0 /* nowait */ ); + switch (e->event_type) + { + case FIFO_EVENT_SERVER_RX: + handle_fifo_event_server_rx (utm, e); + break; + + case FIFO_EVENT_SERVER_EXIT: + return; + + default: + clib_warning ("unknown event type %d", e->event_type); + break; + } + if (PREDICT_FALSE (utm->time_to_stop == 1)) + break; + if (PREDICT_FALSE (utm->time_to_print_stats == 1)) + { + utm->time_to_print_stats = 0; + fformat (stdout, "%d connections\n", pool_elts (utm->sessions)); + } + } +} + +void +uri_udp_test (uri_udp_test_main_t * utm) +{ + vl_api_bind_uri_t *bmp; + vl_api_unbind_uri_t *ump; + + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_URI); + bmp->client_index = utm->my_client_index; + bmp->context = ntohl (0xfeedface); + bmp->initial_segment_size = 256 << 20; /* size of initial segment */ + bmp->options[SESSION_OPTIONS_FLAGS] = + SESSION_OPTIONS_FLAGS_USE_FIFO | SESSION_OPTIONS_FLAGS_ADD_SEGMENT; + bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = 16 << 10; + bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = 16 << 10; + bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = 128 << 20; + memcpy (bmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + handle_event_queue (utm); + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI); + ump->client_index = utm->my_client_index; + memcpy (ump->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump); + + if (wait_for_state_change (utm, STATE_START)) + { + clib_warning ("timeout waiting for STATE_START"); + return; + } + + fformat (stdout, "Test complete...\n"); +} + +int +main (int argc, char **argv) +{ + uri_udp_test_main_t *utm = &uri_udp_test_main; + unformat_input_t _argv, *a = &_argv; + u8 *chroot_prefix; + u8 *heap; + u8 *bind_name = (u8 *) "udp://0.0.0.0/1234"; + u32 tmp; + mheap_t *h; + session_t *session; + int i; + int i_am_master = 1; + + clib_mem_init (0, 256 << 20); + + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + vec_validate (utm->rx_buf, 8192); + + utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword)); + + utm->my_pid = getpid (); + utm->configured_segment_size = 1 << 20; + + clib_time_init (&utm->clib_time); + init_error_string_table (utm); + svm_fifo_segment_init (0x200000000ULL, 20); + unformat_init_command_line (a, argv); + + while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) + { + if (unformat (a, "chroot prefix %s", &chroot_prefix)) + { + vl_set_memory_root_path ((char *) chroot_prefix); + } + else if (unformat (a, "uri %s", &bind_name)) + ; + else if (unformat (a, "segment-size %dM", &tmp)) + utm->configured_segment_size = tmp << 20; + else if (unformat (a, "segment-size %dG", &tmp)) + utm->configured_segment_size = tmp << 30; + else if (unformat (a, "master")) + i_am_master = 1; + else if (unformat (a, "slave")) + i_am_master = 0; + else + { + fformat (stderr, "%s: usage [master|slave]\n"); + exit (1); + } + } + + utm->cut_through_session_index = ~0; + utm->uri = format (0, "%s%c", bind_name, 0); + utm->i_am_master = i_am_master; + utm->segment_main = &svm_fifo_segment_main; + + utm->connect_uri = format (0, "udp://10.0.0.1/1234%c", 0); + + setup_signal_handlers (); + + uri_api_hookup (utm); + + if (connect_to_vpp (i_am_master ? "uri_udp_master" : "uri_udp_slave") < 0) + { + svm_region_exit (); + fformat (stderr, "Couldn't connect to vpe, exiting...\n"); + exit (1); + } + + if (i_am_master == 0) + { + uri_udp_slave_test (utm); + exit (0); + } + + /* $$$$ hack preallocation */ + for (i = 0; i < 200000; i++) + { + pool_get (utm->sessions, session); + memset (session, 0, sizeof (*session)); + } + for (i = 0; i < 200000; i++) + pool_put_index (utm->sessions, i); + + uri_udp_test (utm); + + vl_client_disconnect_from_vlib (); + exit (0); +} + +#undef vl_api_version +#define vl_api_version(n,v) static u32 vpe_api_version = v; +#include +#undef vl_api_version + +void +vl_client_add_api_signatures (vl_api_memclnt_create_t * mp) +{ + /* + * Send the main API signature in slot 0. This bit of code must + * match the checks in ../vpe/api/api.c: vl_msg_api_version_check(). + */ + mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version); +} + +u32 +vl (void *p) +{ + return vec_len (p); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/uri/uritest.c b/src/uri/uritest.c new file mode 100644 index 00000000..edcdb3ad --- /dev/null +++ b/src/uri/uritest.c @@ -0,0 +1,484 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +typedef enum +{ + STATE_START, + STATE_READY, + STATE_DISCONNECTING, +} connection_state_t; + +typedef struct +{ + /* vpe input queue */ + unix_shared_memory_queue_t *vl_input_queue; + + /* API client handle */ + u32 my_client_index; + + /* role */ + int i_am_master; + + /* The URI we're playing with */ + u8 *uri; + + /* fifo segment */ + svm_fifo_segment_private_t *seg; + + svm_fifo_t *rx_fifo; + svm_fifo_t *tx_fifo; + + /* For deadman timers */ + clib_time_t clib_time; + + /* State of the connection, shared between msg RX thread and main thread */ + volatile connection_state_t state; + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword *error_string_by_error_number; +} uritest_main_t; + +#if CLIB_DEBUG > 0 +#define NITER 1000 +#else +#define NITER 1000000 +#endif + +uritest_main_t uritest_main; + +u8 * +format_api_error (u8 * s, va_list * args) +{ + uritest_main_t *utm = va_arg (*args, uritest_main_t *); + i32 error = va_arg (*args, u32); + uword *p; + + p = hash_get (utm->error_string_by_error_number, -error); + + if (p) + s = format (s, "%s", p[0]); + else + s = format (s, "%d", error); + return s; +} + +int +wait_for_state_change (uritest_main_t * utm, connection_state_t state) +{ + f64 timeout = clib_time_now (&utm->clib_time) + 1.0; + + while (clib_time_now (&utm->clib_time) < timeout) + { + if (utm->state == state) + return 0; + } + return -1; +} + +static void +vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp) +{ + uritest_main_t *utm = &uritest_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + ASSERT (utm->i_am_master); + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + a->segment_name = (char *) mp->segment_name; + a->segment_size = mp->segment_size; + + /* Create the segment */ + rv = svm_fifo_segment_create (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%s') failed", mp->segment_name); + return; + } + + vec_validate (utm->seg, 0); + + memcpy (utm->seg, a->rv, sizeof (*utm->seg)); + + /* + * By construction the master's idea of the rx fifo ends up in + * fsh->fifos[0], and the master's idea of the tx fifo ends up in + * fsh->fifos[1]. + */ + utm->rx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, 10240); + ASSERT (utm->rx_fifo); + + utm->tx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, 10240); + ASSERT (utm->tx_fifo); + + utm->state = STATE_READY; +} + +static void +vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp) +{ + uritest_main_t *utm = &uritest_main; + svm_fifo_segment_create_args_t _a, *a = &_a; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + int rv; + + ASSERT (utm->i_am_master == 0); + + if (mp->segment_name_length == 0) + { + clib_warning ("segment_name_length zero"); + return; + } + + memset (a, 0, sizeof (*a)); + + a->segment_name = (char *) mp->segment_name; + + rv = svm_fifo_segment_attach (a); + if (rv) + { + clib_warning ("sm_fifo_segment_create ('%s') failed", mp->segment_name); + return; + } + + vec_validate (utm->seg, 0); + + memcpy (utm->seg, a->rv, sizeof (*utm->seg)); + sh = utm->seg->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + while (vec_len (fsh->fifos) < 2) + sleep (1); + + utm->rx_fifo = (svm_fifo_t *) fsh->fifos[1]; + ASSERT (utm->rx_fifo); + utm->tx_fifo = (svm_fifo_t *) fsh->fifos[0]; + ASSERT (utm->tx_fifo); + + /* security: could unlink /dev/shm/segment_name> here, maybe */ + + utm->state = STATE_READY; +} + +static void +vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp) +{ + uritest_main_t *utm = &uritest_main; + + if (mp->retval != 0) + clib_warning ("returned %d", ntohl (mp->retval)); + + utm->state = STATE_START; +} + +#define foreach_uri_msg \ +_(BIND_URI_REPLY, bind_uri_reply) \ +_(CONNECT_URI_REPLY, connect_uri_reply) \ +_(UNBIND_URI_REPLY, unbind_uri_reply) + +void +uri_api_hookup (uritest_main_t * utm) +{ +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_uri_msg; +#undef _ + +} + + +int +connect_to_vpp (char *name) +{ + uritest_main_t *utm = &uritest_main; + api_main_t *am = &api_main; + + if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0) + return -1; + + utm->vl_input_queue = am->shmem_hdr->vl_input_queue; + utm->my_client_index = am->my_client_index; + + return 0; +} + +void +vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...) +{ + clib_warning ("BUG"); +} + +static void +init_error_string_table (uritest_main_t * utm) +{ + utm->error_string_by_error_number = hash_create (0, sizeof (uword)); + +#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s); + foreach_vnet_api_error; +#undef _ + + hash_set (utm->error_string_by_error_number, 99, "Misc"); +} + +void +uritest_master (uritest_main_t * utm) +{ + vl_api_bind_uri_t *bmp; + vl_api_unbind_uri_t *ump; + int i; + u8 *test_data = 0; + u8 *reply = 0; + u32 reply_len; + int mypid = getpid (); + + for (i = 0; i < 2048; i++) + vec_add1 (test_data, 'a' + (i % 32)); + + bmp = vl_msg_api_alloc (sizeof (*bmp)); + memset (bmp, 0, sizeof (*bmp)); + + bmp->_vl_msg_id = ntohs (VL_API_BIND_URI); + bmp->client_index = utm->my_client_index; + bmp->context = ntohl (0xfeedface); + bmp->segment_size = 256 << 10; + memcpy (bmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + for (i = 0; i < NITER; i++) + svm_fifo_enqueue (utm->tx_fifo, mypid, vec_len (test_data), test_data); + + vec_validate (reply, 0); + + reply_len = svm_fifo_dequeue (utm->rx_fifo, mypid, vec_len (reply), reply); + + if (reply_len != 1) + clib_warning ("reply length %d", reply_len); + + if (reply[0] == 1) + fformat (stdout, "Test OK..."); + + ump = vl_msg_api_alloc (sizeof (*ump)); + memset (ump, 0, sizeof (*ump)); + + ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI); + ump->client_index = utm->my_client_index; + memcpy (ump->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump); + + if (wait_for_state_change (utm, STATE_START)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + fformat (stdout, "Master done...\n"); +} + +void +uritest_slave (uritest_main_t * utm) +{ + vl_api_connect_uri_t *cmp; + int i, j; + u8 *test_data = 0; + u8 *reply = 0; + u32 bytes_received = 0; + u32 actual_bytes; + int mypid = getpid (); + u8 ok; + f64 before, after, delta, bytes_per_second; + + vec_validate (test_data, 4095); + + cmp = vl_msg_api_alloc (sizeof (*cmp)); + memset (cmp, 0, sizeof (*cmp)); + + cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI); + cmp->client_index = utm->my_client_index; + cmp->context = ntohl (0xfeedface); + memcpy (cmp->uri, utm->uri, vec_len (utm->uri)); + vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & cmp); + + if (wait_for_state_change (utm, STATE_READY)) + { + clib_warning ("timeout waiting for STATE_READY"); + return; + } + + ok = 1; + before = clib_time_now (&utm->clib_time); + for (i = 0; i < NITER; i++) + { + actual_bytes = svm_fifo_dequeue (utm->rx_fifo, mypid, + vec_len (test_data), test_data); + j = 0; + while (j < actual_bytes) + { + if (test_data[j] != ('a' + (bytes_received % 32))) + ok = 0; + bytes_received++; + j++; + } + if (bytes_received == NITER * 2048) + break; + } + + vec_add1 (reply, ok); + + svm_fifo_enqueue (utm->tx_fifo, mypid, vec_len (reply), reply); + after = clib_time_now (&utm->clib_time); + delta = after - before; + bytes_per_second = 0.0; + + if (delta > 0.0) + bytes_per_second = (f64) bytes_received / delta; + + fformat (stdout, + "Slave done, %d bytes in %.2f seconds, %.2f bytes/sec...\n", + bytes_received, delta, bytes_per_second); +} + +int +main (int argc, char **argv) +{ + uritest_main_t *utm = &uritest_main; + unformat_input_t _argv, *a = &_argv; + u8 *chroot_prefix; + u8 *heap; + char *bind_name = "fifo:uritest"; + mheap_t *h; + int i_am_master = 0; + + clib_mem_init (0, 128 << 20); + + heap = clib_mem_get_per_cpu_heap (); + h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + clib_time_init (&utm->clib_time); + init_error_string_table (utm); + svm_fifo_segment_init (0x200000000ULL, 20); + unformat_init_command_line (a, argv); + + utm->uri = format (0, "%s%c", bind_name, 0); + + while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT) + { + if (unformat (a, "master")) + i_am_master = 1; + else if (unformat (a, "slave")) + i_am_master = 0; + else if (unformat (a, "chroot prefix %s", &chroot_prefix)) + { + vl_set_memory_root_path ((char *) chroot_prefix); + } + else + { + fformat (stderr, "%s: usage [master|slave]\n"); + exit (1); + } + } + + uri_api_hookup (utm); + + if (connect_to_vpp (i_am_master ? "uritest_master" : "uritest_slave") < 0) + { + svm_region_exit (); + fformat (stderr, "Couldn't connect to vpe, exiting...\n"); + exit (1); + } + + utm->i_am_master = i_am_master; + + if (i_am_master) + uritest_master (utm); + else + uritest_slave (utm); + + vl_client_disconnect_from_vlib (); + exit (0); +} + +#undef vl_api_version +#define vl_api_version(n,v) static u32 vpe_api_version = v; +#include +#undef vl_api_version + +void +vl_client_add_api_signatures (vl_api_memclnt_create_t * mp) +{ + /* + * Send the main API signature in slot 0. This bit of code must + * match the checks in ../vpe/api/api.c: vl_msg_api_version_check(). + */ + mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 4f5eb09d..9f26bec7 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -360,7 +360,7 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, memset (f, 0, sizeof (f[0])); f->index = f - bm->buffer_free_list_pool; f->n_data_bytes = vlib_buffer_round_size (n_data_bytes); - f->min_n_buffers_each_physmem_alloc = 16; + f->min_n_buffers_each_physmem_alloc = VLIB_FRAME_SIZE; f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name); /* Setup free buffer template. */ diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index 1f723f3b..69c8c7cc 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -240,6 +240,74 @@ vlib_get_buffer_opaque2 (vlib_buffer_t * b) return (void *) b->opaque2; } +/** \brief Get pointer to the end of buffer's data + * @param b pointer to the buffer + * @return pointer to tail of packet's data + */ +always_inline u8 * +vlib_buffer_get_tail (vlib_buffer_t * b) +{ + return b->data + b->current_data + b->current_length; +} + +/** \brief Append uninitialized data to buffer + * @param b pointer to the buffer + * @param size number of uninitialized bytes + * @return pointer to beginning of uninitialized data + */ +always_inline void * +vlib_buffer_put_uninit (vlib_buffer_t * b, u8 size) +{ + void *p = vlib_buffer_get_tail (b); + /* XXX make sure there's enough space */ + b->current_length += size; + return p; +} + +/** \brief Prepend uninitialized data to buffer + * @param b pointer to the buffer + * @param size number of uninitialized bytes + * @return pointer to beginning of uninitialized data + */ +always_inline void * +vlib_buffer_push_uninit (vlib_buffer_t * b, u8 size) +{ + ASSERT (b->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= size); + b->current_data -= size; + b->current_length += size; + + return vlib_buffer_get_current (b); +} + +/** \brief Make head room, typically for packet headers + * @param b pointer to the buffer + * @param size number of head room bytes + * @return pointer to start of buffer (current data) + */ +always_inline void * +vlib_buffer_make_headroom (vlib_buffer_t * b, u8 size) +{ + ASSERT (b->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= size); + b->current_data += size; + return vlib_buffer_get_current (b); +} + +/** \brief Retrieve bytes from buffer head + * @param b pointer to the buffer + * @param size number of bytes to pull + * @return pointer to start of buffer (current data) + */ +always_inline void * +vlib_buffer_pull (vlib_buffer_t * b, u8 size) +{ + if (b->current_length + VLIB_BUFFER_PRE_DATA_SIZE < size) + return 0; + + void *data = vlib_buffer_get_current (b); + vlib_buffer_advance (b, size); + return data; +} + /* Forward declaration. */ struct vlib_main_t; diff --git a/src/vlibmemory/unix_shared_memory_queue.c b/src/vlibmemory/unix_shared_memory_queue.c index 25d28910..e86edec3 100644 --- a/src/vlibmemory/unix_shared_memory_queue.c +++ b/src/vlibmemory/unix_shared_memory_queue.c @@ -33,18 +33,13 @@ * nels = number of elements on the queue * elsize = element size, presumably 4 and cacheline-size will * be popular choices. - * coid = consumer coid, from ChannelCreate * pid = consumer pid - * pulse_code = pulse code consumer expects - * pulse_value = pulse value consumer expects - * consumer_prio = consumer's priority, so pulses won't change - * the consumer's priority. * * The idea is to call this function in the queue consumer, * and e-mail the queue pointer to the producer(s). * - * The spp process / main thread allocates one of these - * at startup; its main input queue. The spp main input queue + * The vpp process / main thread allocates one of these + * at startup; its main input queue. The vpp main input queue * has a pointer to it in the shared memory segment header. * * You probably want to be on an svm data heap before calling this @@ -70,7 +65,7 @@ unix_shared_memory_queue_init (int nels, q->signal_when_queue_non_empty = signal_when_queue_non_empty; memset (&attr, 0, sizeof (attr)); - memset (&cattr, 0, sizeof (attr)); + memset (&cattr, 0, sizeof (cattr)); if (pthread_mutexattr_init (&attr)) clib_unix_warning ("mutexattr_init"); @@ -277,6 +272,7 @@ unix_shared_memory_queue_sub (unix_shared_memory_queue_t * q, clib_memcpy (elem, headp, q->elsize); q->head++; + /* $$$$ JFC shouldn't this be == 0? */ if (q->cursize == q->maxsize) need_broadcast = 1; diff --git a/src/vlibmemory/unix_shared_memory_queue.h b/src/vlibmemory/unix_shared_memory_queue.h index f758f17c..13800065 100644 --- a/src/vlibmemory/unix_shared_memory_queue.h +++ b/src/vlibmemory/unix_shared_memory_queue.h @@ -29,7 +29,7 @@ typedef struct _unix_shared_memory_queue pthread_cond_t condvar; /* 8 bytes */ int head; int tail; - int cursize; + volatile int cursize; int maxsize; int elsize; int consumer_pid; diff --git a/src/vnet.am b/src/vnet.am index 64484e18..923f61d8 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -324,11 +324,7 @@ libvnet_la_SOURCES += \ vnet/ip/ip_input_acl.c \ vnet/ip/lookup.c \ vnet/ip/ping.c \ - vnet/ip/punt.c \ - vnet/ip/udp_format.c \ - vnet/ip/udp_init.c \ - vnet/ip/udp_local.c \ - vnet/ip/udp_pg.c + vnet/ip/punt.c nobase_include_HEADERS += \ vnet/ip/format.h \ @@ -354,11 +350,7 @@ nobase_include_HEADERS += \ vnet/ip/ports.def \ vnet/ip/protocols.def \ vnet/ip/punt_error.def \ - vnet/ip/punt.h \ - vnet/ip/tcp_packet.h \ - vnet/ip/udp_error.def \ - vnet/ip/udp.h \ - vnet/ip/udp_packet.h + vnet/ip/punt.h API_FILES += vnet/ip/ip.api @@ -473,6 +465,38 @@ test_map_LDADD = libvnet.la libvppinfra.la libvlib.la \ test_map_LDFLAGS = -static endif +######################################## +# Layer 4 protocol: tcp +######################################## +libvnet_la_SOURCES += \ + vnet/tcp/tcp_format.c \ + vnet/tcp/tcp_pg.c \ + vnet/tcp/tcp_syn_filter4.c \ + vnet/tcp/tcp_output.c \ + vnet/tcp/tcp_input.c \ + vnet/tcp/tcp_newreno.c \ + vnet/tcp/tcp.c + +nobase_include_HEADERS += \ + vnet/tcp/tcp_packet.h \ + vnet/tcp/tcp_timer.h \ + vnet/tcp/tcp.h + +######################################## +# Layer 4 protocol: udp +######################################## +libvnet_la_SOURCES += \ + vnet/udp/udp.c \ + vnet/udp/udp_input.c \ + vnet/udp/builtin_server.c \ + vnet/udp/udp_format.c \ + vnet/udp/udp_local.c \ + vnet/udp/udp_pg.c + +nobase_include_HEADERS += \ + vnet/udp/udp_error.def \ + vnet/udp/udp.h \ + vnet/udp/udp_packet.h ######################################## # Tunnel protocol: gre @@ -833,6 +857,28 @@ libvnet_la_SOURCES += \ nobase_include_HEADERS += \ vnet/devices/ssvm/ssvm_eth.h +######################################## +# session managmeent +######################################## + +libvnet_la_SOURCES += \ + vnet/session/session.c \ + vnet/session/node.c \ + vnet/session/transport.c \ + vnet/session/application.c \ + vnet/session/session_cli.c \ + vnet/session/hashes.c \ + vnet/session/application_interface.c \ + vnet/session/session_api.c + +nobase_include_HEADERS += \ + vnet/session/session.h \ + vnet/session/application.h \ + vnet/session/transport.h \ + vnet/session/application_interface.h + +API_FILES += vnet/session/session.api + ######################################## # Linux packet interface ######################################## diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 8680ef7c..861a5767 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -91,14 +91,19 @@ _(INVALID_ADDRESS_FAMILY, -97, "Invalid address family") \ _(INVALID_SUB_SW_IF_INDEX, -98, "Invalid sub-interface sw_if_index") \ _(TABLE_TOO_BIG, -99, "Table too big") \ _(CANNOT_ENABLE_DISABLE_FEATURE, -100, "Cannot enable/disable feature") \ -_(BFD_EEXIST, -101, "Duplicate BFD object") \ -_(BFD_ENOENT, -102, "No such BFD object") \ -_(BFD_EINUSE, -103, "BFD object in use") \ -_(BFD_NOTSUPP, -104, "BFD feature not supported") \ -_(LISP_RLOC_LOCAL, -105, "RLOC address is local") \ -_(BFD_EAGAIN, -106, "BFD object cannot be manipulated at this time") \ -_(INVALID_GPE_MODE, -107, "Invalid GPE mode") \ -_(LISP_GPE_ENTRIES_PRESENT, -108, "LISP GPE entries are present") +_(BFD_EEXIST, -101, "Duplicate BFD object") \ +_(BFD_ENOENT, -102, "No such BFD object") \ +_(BFD_EINUSE, -103, "BFD object in use") \ +_(BFD_NOTSUPP, -104, "BFD feature not supported") \ +_(ADDRESS_IN_USE, -105, "Address in use") \ +_(ADDRESS_NOT_IN_USE, -106, "Address not in use") \ +_(QUEUE_FULL, -107, "Queue full") \ +_(UNKNOWN_URI_TYPE, -108, "Unknown URI type") \ +_(URI_FIFO_CREATE_FAILED, -109, "URI FIFO segment create failed") \ +_(LISP_RLOC_LOCAL, -110, "RLOC address is local") \ +_(BFD_EAGAIN, -111, "BFD object cannot be manipulated at this time") \ +_(INVALID_GPE_MODE, -112, "Invalid GPE mode") \ +_(LISP_GPE_ENTRIES_PRESENT, -113, "LISP GPE entries are present") typedef enum { diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 146faad6..cf05089b 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -18,12 +18,12 @@ #include #include #include -#include +#include +#include #include #include #include #include -#include #include #include #include diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index f1cc6371..3de01f2a 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -277,6 +277,16 @@ typedef struct u16 buffer_advance; } device_input_feat; + /* TCP */ + struct + { + u32 connection_index; + u32 seq_number; + u32 seq_end; + u32 ack_number; + u8 flags; + } tcp; + u32 unused[6]; }; } vnet_buffer_opaque_t; diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c index 6093e2ac..b651a1f1 100644 --- a/src/vnet/classify/vnet_classify.c +++ b/src/vnet/classify/vnet_classify.c @@ -695,8 +695,8 @@ int vnet_classify_add_del_table (vnet_classify_main_t * cm, } #define foreach_tcp_proto_field \ -_(src_port) \ -_(dst_port) +_(src) \ +_(dst) #define foreach_udp_proto_field \ _(src_port) \ diff --git a/src/vnet/dhcp/dhcp_proxy.h b/src/vnet/dhcp/dhcp_proxy.h index c0d79c41..4586d883 100644 --- a/src/vnet/dhcp/dhcp_proxy.h +++ b/src/vnet/dhcp/dhcp_proxy.h @@ -26,7 +26,7 @@ #include #include #include -#include +#include typedef enum { #define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n, diff --git a/src/vnet/flow/flow_report.h b/src/vnet/flow/flow_report.h index 4e764377..e8ed3818 100644 --- a/src/vnet/flow/flow_report.h +++ b/src/vnet/flow/flow_report.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h index 02a1a963..70b4ccd8 100644 --- a/src/vnet/ip/ip.h +++ b/src/vnet/ip/ip.h @@ -50,8 +50,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index b184fbae..4e075d0f 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -309,8 +309,8 @@ ip4_compute_flow_hash (const ip4_header_t * ip, b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2; b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0; - t1 = is_tcp_udp ? tcp->ports.src : 0; - t2 = is_tcp_udp ? tcp->ports.dst : 0; + t1 = is_tcp_udp ? tcp->src : 0; + t2 = is_tcp_udp ? tcp->dst : 0; t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0; t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0; @@ -334,6 +334,44 @@ u8 *format_ip4_forward_next_trace (u8 * s, va_list * args); u32 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0); +#define IP_DF 0x4000 /* don't fragment */ + +/** + * Push IPv4 header to buffer + * + * This does not support fragmentation. + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param src - source IP + * @param dst - destination IP + * @param prot - payload proto + * + * @return - pointer to start of IP header + */ +always_inline void * +vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b, + ip4_address_t * src, ip4_address_t * dst, int proto) +{ + ip4_header_t *ih; + + /* make some room */ + ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t)); + + ih->ip_version_and_header_length = 0x45; + ih->tos = 0; + ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b)); + + /* No fragments */ + ih->flags_and_fragment_offset = clib_host_to_net_u16 (IP_DF); + ih->ttl = 255; + ih->protocol = proto; + ih->src_address.as_u32 = src->as_u32; + ih->dst_address.as_u32 = dst->as_u32; + + ih->checksum = ip4_header_checksum (ih); + return ih; +} #endif /* included_ip_ip4_h */ /* diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 8081b34b..66d91ab6 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1478,8 +1478,18 @@ ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) return p0->flags; } -static uword -ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +/* *INDENT-OFF* */ +VNET_FEATURE_ARC_INIT (ip4_local) = +{ + .arc_name = "ip4-local", + .start_nodes = VNET_FEATURES ("ip4-local"), +}; +/* *INDENT-ON* */ + +static inline uword +ip4_local_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int head_of_feature_arc) { ip4_main_t *im = &ip4_main; ip_lookup_main_t *lm = &im->lookup_main; @@ -1487,6 +1497,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) u32 *from, *to_next, n_left_from, n_left_to_next; vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip4_input_node.index); + u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -1513,7 +1524,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) i32 len_diff0, len_diff1; u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1; - u8 enqueue_code; + u32 sw_if_index0, sw_if_index1; pi0 = to_next[0] = from[0]; pi1 = to_next[1] = from[1]; @@ -1522,6 +1533,8 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) to_next += 2; n_left_to_next -= 2; + next0 = next1 = IP_LOCAL_NEXT_DROP; + p0 = vlib_get_buffer (vm, pi0); p1 = vlib_get_buffer (vm, pi1); @@ -1531,14 +1544,18 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data; - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - fib_index1 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p1)->sw_if_index[VLIB_RX]); + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1); fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; @@ -1557,6 +1574,13 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) until support of IP frag reassembly is implemented */ proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol; + + if (head_of_feature_arc == 0) + { + error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL; + goto skip_checks; + } + is_udp0 = proto0 == IP_PROTOCOL_UDP; is_udp1 = proto1 == IP_PROTOCOL_UDP; is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; @@ -1686,6 +1710,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) next0 = lm->local_next_by_ip_protocol[proto0]; next1 = lm->local_next_by_ip_protocol[proto1]; + skip_checks: next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; next1 = @@ -1694,44 +1719,17 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p0->error = error0 ? error_node->errors[error0] : 0; p1->error = error1 ? error_node->errors[error1] : 0; - enqueue_code = (next0 != next_index) + 2 * (next1 != next_index); - - if (PREDICT_FALSE (enqueue_code != 0)) + if (head_of_feature_arc) { - switch (enqueue_code) - { - case 1: - /* A B A */ - to_next[-2] = pi1; - to_next -= 1; - n_left_to_next += 1; - vlib_set_next_frame_buffer (vm, node, next0, pi0); - break; - - case 2: - /* A A B */ - to_next -= 1; - n_left_to_next += 1; - vlib_set_next_frame_buffer (vm, node, next1, pi1); - break; - - case 3: - /* A B B or A B C */ - to_next -= 2; - n_left_to_next += 2; - vlib_set_next_frame_buffer (vm, node, next0, pi0); - vlib_set_next_frame_buffer (vm, node, next1, pi1); - if (next0 == next1) - { - vlib_put_next_frame (vm, node, next_index, - n_left_to_next); - next_index = next1; - vlib_get_next_frame (vm, node, next_index, to_next, - n_left_to_next); - } - break; - } + if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0); + if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1); } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, + next0, next1); } while (n_left_from > 0 && n_left_to_next > 0) @@ -1746,6 +1744,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; load_balance_t *lb0; const dpo_id_t *dpo0; + u32 sw_if_index0; pi0 = to_next[0] = from[0]; from += 1; @@ -1753,14 +1752,18 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) to_next += 1; n_left_to_next -= 1; + next0 = IP_LOCAL_NEXT_DROP; + p0 = vlib_get_buffer (vm, pi0); ip0 = vlib_buffer_get_current (p0); vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); + fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; @@ -1775,6 +1778,13 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) /* Treat IP frag packets as "experimental" protocol for now until support of IP frag reassembly is implemented */ proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; + + if (head_of_feature_arc == 0) + { + error0 = IP4_ERROR_UNKNOWN_PROTOCOL; + goto skip_check; + } + is_udp0 = proto0 == IP_PROTOCOL_UDP; is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; @@ -1847,6 +1857,8 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip0->dst_address.as_u32 != 0xFFFFFFFF) ? IP4_ERROR_SRC_LOOKUP_MISS : error0); + skip_check: + next0 = lm->local_next_by_ip_protocol[proto0]; next0 = @@ -1854,18 +1866,15 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p0->error = error0 ? error_node->errors[error0] : 0; - if (PREDICT_FALSE (next0 != next_index)) + if (head_of_feature_arc) { - n_left_to_next += 1; - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - - next_index = next0; - vlib_get_next_frame (vm, node, next_index, to_next, - n_left_to_next); - to_next[0] = pi0; - to_next += 1; - n_left_to_next -= 1; + if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0); } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, next0); + } vlib_put_next_frame (vm, node, next_index, n_left_to_next); @@ -1874,21 +1883,57 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) return frame->n_vectors; } +static uword +ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ ); +} + +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_local_node) = { - .function = ip4_local,.name = "ip4-local",.vector_size = - sizeof (u32),.format_trace = - format_ip4_forward_next_trace,.n_next_nodes = - IP_LOCAL_N_NEXT,.next_nodes = + .function = ip4_local, + .name = "ip4-local", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_next_nodes = IP_LOCAL_N_NEXT, + .next_nodes = { - [IP_LOCAL_NEXT_DROP] = "error-drop", - [IP_LOCAL_NEXT_PUNT] = "error-punt", - [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup", - [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",} -,}; + [IP_LOCAL_NEXT_DROP] = "error-drop", + [IP_LOCAL_NEXT_PUNT] = "error-punt", + [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup", + [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",}, +}; +/* *INDENT-ON* */ VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local); +static uword +ip4_local_end_of_arc (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = { + .function = ip4_local_end_of_arc, + .name = "ip4-local-end-of-arc", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + .sibling_of = "ip4-local", +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc) + +VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = { + .arc_name = "ip4-local", + .node_name = "ip4-local-end-of-arc", + .runs_before = 0, /* not before any other features */ +}; +/* *INDENT-ON* */ + void ip4_register_protocol (u32 protocol, u32 node_index) { diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h index 8da788b4..b2c1fcd4 100644 --- a/src/vnet/ip/ip4_packet.h +++ b/src/vnet/ip/ip4_packet.h @@ -41,7 +41,7 @@ #define included_ip4_packet_h #include /* for ip_csum_t */ -#include /* for tcp_header_t */ +#include /* for tcp_header_t */ #include /* for clib_net_to_host_u16 */ /* IP4 address which can be accessed either as 4 bytes @@ -342,10 +342,10 @@ ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0) ip0->src_address.data_u32 = dst0; ip0->dst_address.data_u32 = src0; - src0 = tcp0->ports.src; - dst0 = tcp0->ports.dst; - tcp0->ports.src = dst0; - tcp0->ports.dst = src0; + src0 = tcp0->src; + dst0 = tcp0->dst; + tcp0->src = dst0; + tcp0->dst = src0; } always_inline void @@ -363,14 +363,14 @@ ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1, ip0->dst_address.data_u32 = src0; ip1->dst_address.data_u32 = src1; - src0 = tcp0->ports.src; - src1 = tcp1->ports.src; - dst0 = tcp0->ports.dst; - dst1 = tcp1->ports.dst; - tcp0->ports.src = dst0; - tcp1->ports.src = dst1; - tcp0->ports.dst = src0; - tcp1->ports.dst = src1; + src0 = tcp0->src; + src1 = tcp1->src; + dst0 = tcp0->dst; + dst1 = tcp1->dst; + tcp0->src = dst0; + tcp1->src = dst1; + tcp0->dst = src0; + tcp1->dst = src1; } #endif /* included_ip4_packet_h */ diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index 5456f0f2..2615fbfa 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -461,8 +461,8 @@ ip6_compute_flow_hash (const ip6_header_t * ip, b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2; b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0; - t1 = is_tcp_udp ? tcp->ports.src : 0; - t2 = is_tcp_udp ? tcp->ports.dst : 0; + t1 = is_tcp_udp ? tcp->src : 0; + t2 = is_tcp_udp ? tcp->dst : 0; t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0; t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0; @@ -497,6 +497,46 @@ int ip6_hbh_register_option (u8 option, int ip6_hbh_unregister_option (u8 option); void ip6_hbh_set_next_override (uword next); +/** + * Push IPv6 header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param src - source IP + * @param dst - destination IP + * @param prot - payload proto + * + * @return - pointer to start of IP header + */ +always_inline void * +vlib_buffer_push_ip6 (vlib_main_t * vm, vlib_buffer_t * b, + ip6_address_t * src, ip6_address_t * dst, int proto) +{ + ip6_header_t *ip6h; + u16 payload_length; + + /* make some room */ + ip6h = vlib_buffer_push_uninit (b, sizeof (ip6_header_t)); + + ip6h->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + + /* calculate ip6 payload length */ + payload_length = vlib_buffer_length_in_chain (vm, b); + payload_length -= sizeof (*ip6h); + + ip6h->payload_length = clib_host_to_net_u16 (payload_length); + + ip6h->hop_limit = 0xff; + ip6h->protocol = proto; + clib_memcpy (ip6h->src_address.as_u8, src->as_u8, + sizeof (ip6h->src_address)); + clib_memcpy (ip6h->dst_address.as_u8, dst->as_u8, + sizeof (ip6h->src_address)); + + return ip6h; +} + #endif /* included_ip_ip6_h */ /* diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index 1e551c8b..4fd14b96 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -40,7 +40,7 @@ #ifndef included_ip6_packet_h #define included_ip6_packet_h -#include +#include #include typedef union @@ -373,10 +373,10 @@ ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0) { u16 src0, dst0; - src0 = tcp0->ports.src; - dst0 = tcp0->ports.dst; - tcp0->ports.src = dst0; - tcp0->ports.dst = src0; + src0 = tcp0->src; + dst0 = tcp0->dst; + tcp0->src = dst0; + tcp0->dst = src0; } } @@ -400,14 +400,14 @@ ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1, { u16 src0, dst0, src1, dst1; - src0 = tcp0->ports.src; - src1 = tcp1->ports.src; - dst0 = tcp0->ports.dst; - dst1 = tcp1->ports.dst; - tcp0->ports.src = dst0; - tcp1->ports.src = dst1; - tcp0->ports.dst = src0; - tcp1->ports.dst = src1; + src0 = tcp0->src; + src1 = tcp1->src; + dst0 = tcp0->dst; + dst1 = tcp1->dst; + tcp0->src = dst0; + tcp1->src = dst1; + tcp0->dst = src0; + tcp1->dst = src1; } } diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c index 9c735128..48558401 100644 --- a/src/vnet/ip/punt.c +++ b/src/vnet/ip/punt.c @@ -23,7 +23,7 @@ */ #include #include -#include +#include #include #define foreach_punt_next \ diff --git a/src/vnet/ip/tcp_packet.h b/src/vnet/ip/tcp_packet.h deleted file mode 100644 index 93f73e01..00000000 --- a/src/vnet/ip/tcp_packet.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip4/tcp_packet.h: TCP packet format (see RFC 793) - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef included_tcp_packet_h -#define included_tcp_packet_h - -/* TCP flags bit 0 first. */ -#define foreach_tcp_flag \ - _ (FIN) \ - _ (SYN) \ - _ (RST) \ - _ (PSH) \ - _ (ACK) \ - _ (URG) \ - _ (ECE) \ - _ (CWR) - -enum -{ -#define _(f) TCP_FLAG_BIT_##f, - foreach_tcp_flag -#undef _ - TCP_N_FLAG_BITS, - -#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f, - foreach_tcp_flag -#undef _ -}; - -typedef struct -{ - /* Source and destination port. */ - union - { - union - { - struct - { - u16 src, dst; - }; - u32 src_and_dst; - } ports; - struct - { - u16 src_port, dst_port; - }; - }; - - /* Sequence and acknowledgment number. */ - u32 seq_number, ack_number; - - /* Size of TCP header in 32-bit units plus 4 reserved bits. */ - u8 tcp_header_u32s_and_reserved; - - /* see foreach_tcp_flag for enumation of tcp flags. */ - u8 flags; - - /* Current window advertised by sender. - This is the number of bytes sender is willing to receive - right now. */ - u16 window; - - /* Checksum of TCP pseudo header and data. */ - u16 checksum; - - u16 urgent_pointer; -} tcp_header_t; - -always_inline int -tcp_header_bytes (tcp_header_t * t) -{ - return (t->tcp_header_u32s_and_reserved >> 4) * sizeof (u32); -} - -/* TCP options. */ -typedef enum tcp_option_type -{ - TCP_OPTION_END = 0, - TCP_OPTION_NOP = 1, - TCP_OPTION_MSS = 2, - TCP_OPTION_WINDOW_SCALE = 3, - TCP_OPTION_SACK_PERMITTED = 4, - TCP_OPTION_SACK_BLOCK = 5, - TCP_OPTION_TIME_STAMP = 8, -} tcp_option_type_t; - -/* All except NOP and END have 1 byte length field. */ -typedef struct -{ - tcp_option_type_t type:8; - - /* Length of this option in bytes. */ - u8 length; -} tcp_option_with_length_t; - -#endif /* included_tcp_packet_h */ - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp.h b/src/vnet/ip/udp.h deleted file mode 100644 index bad58b5d..00000000 --- a/src/vnet/ip/udp.h +++ /dev/null @@ -1,315 +0,0 @@ -/* - * ip/udp.h: udp protocol - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_udp_h -#define included_udp_h - -#include -#include -#include -#include -#include -#include -#include - -typedef enum -{ -#define udp_error(n,s) UDP_ERROR_##n, -#include -#undef udp_error - UDP_N_ERROR, -} udp_error_t; - -#define foreach_udp4_dst_port \ -_ (67, dhcp_to_server) \ -_ (68, dhcp_to_client) \ -_ (500, ikev2) \ -_ (3784, bfd4) \ -_ (3785, bfd_echo4) \ -_ (4341, lisp_gpe) \ -_ (4342, lisp_cp) \ -_ (4739, ipfix) \ -_ (4789, vxlan) \ -_ (4789, vxlan6) \ -_ (4790, vxlan_gpe) \ -_ (6633, vpath_3) - - -#define foreach_udp6_dst_port \ -_ (547, dhcpv6_to_server) \ -_ (546, dhcpv6_to_client) \ -_ (3784, bfd6) \ -_ (3785, bfd_echo6) \ -_ (4341, lisp_gpe6) \ -_ (4342, lisp_cp6) \ -_ (4790, vxlan6_gpe) \ -_ (6633, vpath6_3) - -typedef enum -{ -#define _(n,f) UDP_DST_PORT_##f = n, - foreach_udp4_dst_port foreach_udp6_dst_port -#undef _ -} udp_dst_port_t; - -typedef enum -{ -#define _(n,f) UDP6_DST_PORT_##f = n, - foreach_udp6_dst_port -#undef _ -} udp6_dst_port_t; - -typedef struct -{ - /* Name (a c string). */ - char *name; - - /* GRE protocol type in host byte order. */ - udp_dst_port_t dst_port; - - /* Node which handles this type. */ - u32 node_index; - - /* Next index for this type. */ - u32 next_index; -} udp_dst_port_info_t; - -typedef enum -{ - UDP_IP6 = 0, - UDP_IP4, /* the code is full of is_ip4... */ - N_UDP_AF, -} udp_af_t; - -typedef struct -{ - udp_dst_port_info_t *dst_port_infos[N_UDP_AF]; - - /* Hash tables mapping name/protocol to protocol info index. */ - uword *dst_port_info_by_name[N_UDP_AF]; - uword *dst_port_info_by_dst_port[N_UDP_AF]; - - /* convenience */ - vlib_main_t *vlib_main; -} udp_main_t; - -always_inline udp_dst_port_info_t * -udp_get_dst_port_info (udp_main_t * um, udp_dst_port_t dst_port, u8 is_ip4) -{ - uword *p = hash_get (um->dst_port_info_by_dst_port[is_ip4], dst_port); - return p ? vec_elt_at_index (um->dst_port_infos[is_ip4], p[0]) : 0; -} - -format_function_t format_udp_header; -format_function_t format_udp_rx_trace; - -unformat_function_t unformat_udp_header; - -void udp_register_dst_port (vlib_main_t * vm, - udp_dst_port_t dst_port, - u32 node_index, u8 is_ip4); - -void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add); - -always_inline void -ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) -{ - u16 new_l0; - udp_header_t *udp0; - - if (is_ip4) - { - ip4_header_t *ip0; - ip_csum_t sum0; - u16 old_l0 = 0; - - ip0 = vlib_buffer_get_current (b0); - - /* fix the ing outer-IP checksum */ - sum0 = ip0->checksum; - /* old_l0 always 0, see the rewrite setup */ - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - - sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */ ); - ip0->checksum = ip_csum_fold (sum0); - ip0->length = new_l0; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - udp0->length = new_l0; - } - else - { - ip6_header_t *ip0; - int bogus0; - - ip0 = vlib_buffer_get_current (b0); - - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - ip0->payload_length = new_l0; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - udp0->length = new_l0; - - udp0->checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); - ASSERT (bogus0 == 0); - - if (udp0->checksum == 0) - udp0->checksum = 0xffff; - } -} - -always_inline void -ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, - u8 is_ip4) -{ - vlib_buffer_advance (b0, -ec_len); - - if (is_ip4) - { - ip4_header_t *ip0; - - ip0 = vlib_buffer_get_current (b0); - - /* Apply the encap string. */ - clib_memcpy (ip0, ec0, ec_len); - ip_udp_fixup_one (vm, b0, 1); - } - else - { - ip6_header_t *ip0; - - ip0 = vlib_buffer_get_current (b0); - - /* Apply the encap string. */ - clib_memcpy (ip0, ec0, ec_len); - ip_udp_fixup_one (vm, b0, 0); - } -} - -always_inline void -ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, - u8 * ec0, u8 * ec1, word ec_len, u8 is_v4) -{ - u16 new_l0, new_l1; - udp_header_t *udp0, *udp1; - - ASSERT (_vec_len (ec0) == _vec_len (ec1)); - - vlib_buffer_advance (b0, -ec_len); - vlib_buffer_advance (b1, -ec_len); - - if (is_v4) - { - ip4_header_t *ip0, *ip1; - ip_csum_t sum0, sum1; - u16 old_l0 = 0, old_l1 = 0; - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* Apply the encap string */ - clib_memcpy (ip0, ec0, ec_len); - clib_memcpy (ip1, ec1, ec_len); - - /* fix the ing outer-IP checksum */ - sum0 = ip0->checksum; - sum1 = ip1->checksum; - - /* old_l0 always 0, see the rewrite setup */ - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); - - sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */ ); - sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, - length /* changed member */ ); - - ip0->checksum = ip_csum_fold (sum0); - ip1->checksum = ip_csum_fold (sum1); - - ip0->length = new_l0; - ip1->length = new_l1; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - udp1 = (udp_header_t *) (ip1 + 1); - - new_l0 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - new_l1 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) - - sizeof (*ip1)); - udp0->length = new_l0; - udp1->length = new_l1; - } - else - { - ip6_header_t *ip0, *ip1; - int bogus0, bogus1; - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* Apply the encap string. */ - clib_memcpy (ip0, ec0, ec_len); - clib_memcpy (ip1, ec1, ec_len); - - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - - sizeof (*ip0)); - new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) - - sizeof (*ip1)); - ip0->payload_length = new_l0; - ip1->payload_length = new_l1; - - /* Fix UDP length */ - udp0 = (udp_header_t *) (ip0 + 1); - udp1 = (udp_header_t *) (ip1 + 1); - - udp0->length = new_l0; - udp1->length = new_l1; - - udp0->checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); - udp1->checksum = - ip6_tcp_udp_icmp_compute_checksum (vm, b1, ip1, &bogus1); - ASSERT (bogus0 == 0); - ASSERT (bogus1 == 0); - - if (udp0->checksum == 0) - udp0->checksum = 0xffff; - if (udp1->checksum == 0) - udp1->checksum = 0xffff; - } -} - -#endif /* included_udp_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_error.def b/src/vnet/ip/udp_error.def deleted file mode 100644 index bfdae0ac..00000000 --- a/src/vnet/ip/udp_error.def +++ /dev/null @@ -1,21 +0,0 @@ -/* - * udp_error.def: udp errors - * - * Copyright (c) 2013-2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -udp_error (NONE, "no error") -udp_error (NO_LISTENER, "no listener for dst port") -udp_error (LENGTH_ERROR, "UDP packets with length errors") -udp_error (PUNT, "no listener punt") diff --git a/src/vnet/ip/udp_format.c b/src/vnet/ip/udp_format.c deleted file mode 100644 index abdf561e..00000000 --- a/src/vnet/ip/udp_format.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip/udp_format.c: udp formatting - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include - -/* Format UDP header. */ -u8 * -format_udp_header (u8 * s, va_list * args) -{ - udp_header_t *udp = va_arg (*args, udp_header_t *); - u32 max_header_bytes = va_arg (*args, u32); - uword indent; - u32 header_bytes = sizeof (udp[0]); - - /* Nothing to do. */ - if (max_header_bytes < sizeof (udp[0])) - return format (s, "UDP header truncated"); - - indent = format_get_indent (s); - indent += 2; - - s = format (s, "UDP: %d -> %d", - clib_net_to_host_u16 (udp->src_port), - clib_net_to_host_u16 (udp->dst_port)); - - s = format (s, "\n%Ulength %d, checksum 0x%04x", - format_white_space, indent, - clib_net_to_host_u16 (udp->length), - clib_net_to_host_u16 (udp->checksum)); - - /* Recurse into next protocol layer. */ - if (max_header_bytes != 0 && header_bytes < max_header_bytes) - { - ip_main_t *im = &ip_main; - tcp_udp_port_info_t *pi; - - pi = ip_get_tcp_udp_port_info (im, udp->dst_port); - - if (pi && pi->format_header) - s = format (s, "\n%U%U", - format_white_space, indent - 2, pi->format_header, - /* next protocol header */ (udp + 1), - max_header_bytes - sizeof (udp[0])); - } - - return s; -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_init.c b/src/vnet/ip/udp_init.c deleted file mode 100644 index 1241ca4a..00000000 --- a/src/vnet/ip/udp_init.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip/udp_init.c: udp initialization - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include - -clib_error_t * -udp_init (vlib_main_t * vm) -{ - ip_main_t *im = &ip_main; - ip_protocol_info_t *pi; - clib_error_t *error; - - error = vlib_call_init_function (vm, ip_main_init); - - if (!error) - { - pi = ip_get_protocol_info (im, IP_PROTOCOL_UDP); - if (pi == 0) - return clib_error_return (0, "UDP protocol info AWOL"); - pi->format_header = format_udp_header; - pi->unformat_pg_edit = unformat_pg_udp_header; - } - - return 0; -} - -VLIB_INIT_FUNCTION (udp_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_local.c b/src/vnet/ip/udp_local.c deleted file mode 100644 index 13ab6e4f..00000000 --- a/src/vnet/ip/udp_local.c +++ /dev/null @@ -1,645 +0,0 @@ -/* - * node.c: udp packet processing - * - * Copyright (c) 2013 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -udp_main_t udp_main; - -#define foreach_udp_input_next \ - _ (PUNT, "error-punt") \ - _ (DROP, "error-drop") \ - _ (ICMP4_ERROR, "ip4-icmp-error") \ - _ (ICMP6_ERROR, "ip6-icmp-error") - -typedef enum -{ -#define _(s,n) UDP_INPUT_NEXT_##s, - foreach_udp_input_next -#undef _ - UDP_INPUT_N_NEXT, -} udp_input_next_t; - -typedef struct -{ - u16 src_port; - u16 dst_port; - u8 bound; -} udp_rx_trace_t; - -u8 * -format_udp_rx_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - udp_rx_trace_t *t = va_arg (*args, udp_rx_trace_t *); - - s = format (s, "UDP: src-port %d dst-port %d%s", - clib_net_to_host_u16 (t->src_port), - clib_net_to_host_u16 (t->dst_port), - t->bound ? "" : " (no listener)"); - return s; -} - -typedef struct -{ - /* Sparse vector mapping udp dst_port in network byte order - to next index. */ - u16 *next_by_dst_port; - u8 punt_unknown; -} udp_input_runtime_t; - -vlib_node_registration_t udp4_input_node; -vlib_node_registration_t udp6_input_node; - -always_inline uword -udp46_input_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame, int is_ip4) -{ - udp_input_runtime_t *rt = is_ip4 ? - (void *) vlib_node_get_runtime_data (vm, udp4_input_node.index) - : (void *) vlib_node_get_runtime_data (vm, udp6_input_node.index); - __attribute__ ((unused)) u32 n_left_from, next_index, *from, *to_next; - word n_no_listener = 0; - u8 punt_unknown = rt->punt_unknown; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - udp_header_t *h0 = 0, *h1 = 0; - u32 i0, i1, dst_port0, dst_port1; - u32 advance0, advance1; - u32 error0, next0, error1, next1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD); - CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - /* ip4/6_local hands us the ip header, not the udp header */ - if (is_ip4) - { - advance0 = sizeof (ip4_header_t); - advance1 = sizeof (ip4_header_t); - } - else - { - advance0 = sizeof (ip6_header_t); - advance1 = sizeof (ip6_header_t); - } - - if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) - { - error0 = UDP_ERROR_LENGTH_ERROR; - next0 = UDP_INPUT_NEXT_DROP; - } - else - { - vlib_buffer_advance (b0, advance0); - h0 = vlib_buffer_get_current (b0); - error0 = next0 = 0; - if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) > - vlib_buffer_length_in_chain (vm, b0))) - { - error0 = UDP_ERROR_LENGTH_ERROR; - next0 = UDP_INPUT_NEXT_DROP; - } - } - - if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1))) - { - error1 = UDP_ERROR_LENGTH_ERROR; - next1 = UDP_INPUT_NEXT_DROP; - } - else - { - vlib_buffer_advance (b1, advance1); - h1 = vlib_buffer_get_current (b1); - error1 = next1 = 0; - if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) > - vlib_buffer_length_in_chain (vm, b1))) - { - error1 = UDP_ERROR_LENGTH_ERROR; - next1 = UDP_INPUT_NEXT_DROP; - } - } - - /* Index sparse array with network byte order. */ - dst_port0 = (error0 == 0) ? h0->dst_port : 0; - dst_port1 = (error1 == 0) ? h1->dst_port : 0; - sparse_vec_index2 (rt->next_by_dst_port, dst_port0, dst_port1, - &i0, &i1); - next0 = (error0 == 0) ? vec_elt (rt->next_by_dst_port, i0) : next0; - next1 = (error1 == 0) ? vec_elt (rt->next_by_dst_port, i1) : next1; - - if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX)) - { - // move the pointer back so icmp-error can find the - // ip packet header - vlib_buffer_advance (b0, -(word) advance0); - - if (PREDICT_FALSE (punt_unknown)) - { - b0->error = node->errors[UDP_ERROR_PUNT]; - next0 = UDP_INPUT_NEXT_PUNT; - } - else if (is_ip4) - { - icmp4_error_set_vnet_buffer (b0, - ICMP4_destination_unreachable, - ICMP4_destination_unreachable_port_unreachable, - 0); - next0 = UDP_INPUT_NEXT_ICMP4_ERROR; - n_no_listener++; - } - else - { - icmp6_error_set_vnet_buffer (b0, - ICMP6_destination_unreachable, - ICMP6_destination_unreachable_port_unreachable, - 0); - next0 = UDP_INPUT_NEXT_ICMP6_ERROR; - n_no_listener++; - } - } - else - { - b0->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b0, sizeof (*h0)); - } - - if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX)) - { - // move the pointer back so icmp-error can find the - // ip packet header - vlib_buffer_advance (b1, -(word) advance1); - - if (PREDICT_FALSE (punt_unknown)) - { - b1->error = node->errors[UDP_ERROR_PUNT]; - next1 = UDP_INPUT_NEXT_PUNT; - } - else if (is_ip4) - { - icmp4_error_set_vnet_buffer (b1, - ICMP4_destination_unreachable, - ICMP4_destination_unreachable_port_unreachable, - 0); - next1 = UDP_INPUT_NEXT_ICMP4_ERROR; - n_no_listener++; - } - else - { - icmp6_error_set_vnet_buffer (b1, - ICMP6_destination_unreachable, - ICMP6_destination_unreachable_port_unreachable, - 0); - next1 = UDP_INPUT_NEXT_ICMP6_ERROR; - n_no_listener++; - } - } - else - { - b1->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b1, sizeof (*h1)); - } - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - udp_rx_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) - { - tr->src_port = h0 ? h0->src_port : 0; - tr->dst_port = h0 ? h0->dst_port : 0; - tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR && - next0 != UDP_INPUT_NEXT_ICMP6_ERROR); - } - } - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - udp_rx_trace_t *tr = vlib_add_trace (vm, node, - b1, sizeof (*tr)); - if (b1->error != node->errors[UDP_ERROR_LENGTH_ERROR]) - { - tr->src_port = h1 ? h1->src_port : 0; - tr->dst_port = h1 ? h1->dst_port : 0; - tr->bound = (next1 != UDP_INPUT_NEXT_ICMP4_ERROR && - next1 != UDP_INPUT_NEXT_ICMP6_ERROR); - } - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - udp_header_t *h0 = 0; - u32 i0, next0; - u32 advance0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - /* ip4/6_local hands us the ip header, not the udp header */ - if (is_ip4) - advance0 = sizeof (ip4_header_t); - else - advance0 = sizeof (ip6_header_t); - - if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) - { - b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; - next0 = UDP_INPUT_NEXT_DROP; - goto trace_x1; - } - - vlib_buffer_advance (b0, advance0); - - h0 = vlib_buffer_get_current (b0); - - if (PREDICT_TRUE (clib_net_to_host_u16 (h0->length) <= - vlib_buffer_length_in_chain (vm, b0))) - { - i0 = sparse_vec_index (rt->next_by_dst_port, h0->dst_port); - next0 = vec_elt (rt->next_by_dst_port, i0); - - if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX)) - { - // move the pointer back so icmp-error can find the - // ip packet header - vlib_buffer_advance (b0, -(word) advance0); - - if (PREDICT_FALSE (punt_unknown)) - { - b0->error = node->errors[UDP_ERROR_PUNT]; - next0 = UDP_INPUT_NEXT_PUNT; - } - else if (is_ip4) - { - icmp4_error_set_vnet_buffer (b0, - ICMP4_destination_unreachable, - ICMP4_destination_unreachable_port_unreachable, - 0); - next0 = UDP_INPUT_NEXT_ICMP4_ERROR; - n_no_listener++; - } - else - { - icmp6_error_set_vnet_buffer (b0, - ICMP6_destination_unreachable, - ICMP6_destination_unreachable_port_unreachable, - 0); - next0 = UDP_INPUT_NEXT_ICMP6_ERROR; - n_no_listener++; - } - } - else - { - b0->error = node->errors[UDP_ERROR_NONE]; - // advance to the payload - vlib_buffer_advance (b0, sizeof (*h0)); - } - } - else - { - b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; - next0 = UDP_INPUT_NEXT_DROP; - } - - trace_x1: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - udp_rx_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) - { - tr->src_port = h0->src_port; - tr->dst_port = h0->dst_port; - tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR && - next0 != UDP_INPUT_NEXT_ICMP6_ERROR); - } - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_error_count (vm, node->node_index, UDP_ERROR_NO_LISTENER, - n_no_listener); - return from_frame->n_vectors; -} - -static char *udp_error_strings[] = { -#define udp_error(n,s) s, -#include "udp_error.def" -#undef udp_error -}; - -static uword -udp4_input (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - return udp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ ); -} - -static uword -udp6_input (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - return udp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ ); -} - - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (udp4_input_node) = { - .function = udp4_input, - .name = "ip4-udp-lookup", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .runtime_data_bytes = sizeof (udp_input_runtime_t), - - .n_errors = UDP_N_ERROR, - .error_strings = udp_error_strings, - - .n_next_nodes = UDP_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [UDP_INPUT_NEXT_##s] = n, - foreach_udp_input_next -#undef _ - }, - - .format_buffer = format_udp_header, - .format_trace = format_udp_rx_trace, - .unformat_buffer = unformat_udp_header, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (udp4_input_node, udp4_input); - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (udp6_input_node) = { - .function = udp6_input, - .name = "ip6-udp-lookup", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .runtime_data_bytes = sizeof (udp_input_runtime_t), - - .n_errors = UDP_N_ERROR, - .error_strings = udp_error_strings, - - .n_next_nodes = UDP_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [UDP_INPUT_NEXT_##s] = n, - foreach_udp_input_next -#undef _ - }, - - .format_buffer = format_udp_header, - .format_trace = format_udp_rx_trace, - .unformat_buffer = unformat_udp_header, -}; -/* *INDENT-ON* */ - -VLIB_NODE_FUNCTION_MULTIARCH (udp6_input_node, udp6_input); - -static void -add_dst_port (udp_main_t * um, - udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4) -{ - udp_dst_port_info_t *pi; - u32 i; - - vec_add2 (um->dst_port_infos[is_ip4], pi, 1); - i = pi - um->dst_port_infos[is_ip4]; - - pi->name = dst_port_name; - pi->dst_port = dst_port; - pi->next_index = pi->node_index = ~0; - - hash_set (um->dst_port_info_by_dst_port[is_ip4], dst_port, i); - - if (pi->name) - hash_set_mem (um->dst_port_info_by_name[is_ip4], pi->name, i); -} - -void -udp_register_dst_port (vlib_main_t * vm, - udp_dst_port_t dst_port, u32 node_index, u8 is_ip4) -{ - udp_main_t *um = &udp_main; - udp_dst_port_info_t *pi; - udp_input_runtime_t *rt; - u16 *n; - - { - clib_error_t *error = vlib_call_init_function (vm, udp_local_init); - if (error) - clib_error_report (error); - } - - pi = udp_get_dst_port_info (um, dst_port, is_ip4); - if (!pi) - { - add_dst_port (um, dst_port, 0, is_ip4); - pi = udp_get_dst_port_info (um, dst_port, is_ip4); - ASSERT (pi); - } - - pi->node_index = node_index; - pi->next_index = vlib_node_add_next (vm, - is_ip4 ? udp4_input_node.index - : udp6_input_node.index, node_index); - - /* Setup udp protocol -> next index sparse vector mapping. */ - rt = vlib_node_get_runtime_data - (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); - n = sparse_vec_validate (rt->next_by_dst_port, - clib_host_to_net_u16 (dst_port)); - n[0] = pi->next_index; -} - -void -udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add) -{ - udp_input_runtime_t *rt; - - { - clib_error_t *error = vlib_call_init_function (vm, udp_local_init); - if (error) - clib_error_report (error); - } - - rt = vlib_node_get_runtime_data - (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); - - rt->punt_unknown = is_add; -} - -/* Parse a UDP header. */ -uword -unformat_udp_header (unformat_input_t * input, va_list * args) -{ - u8 **result = va_arg (*args, u8 **); - udp_header_t *udp; - __attribute__ ((unused)) int old_length; - u16 src_port, dst_port; - - /* Allocate space for IP header. */ - { - void *p; - - old_length = vec_len (*result); - vec_add2 (*result, p, sizeof (ip4_header_t)); - udp = p; - } - - memset (udp, 0, sizeof (udp[0])); - if (unformat (input, "src-port %d dst-port %d", &src_port, &dst_port)) - { - udp->src_port = clib_host_to_net_u16 (src_port); - udp->dst_port = clib_host_to_net_u16 (dst_port); - return 1; - } - return 0; -} - -static void -udp_setup_node (vlib_main_t * vm, u32 node_index) -{ - vlib_node_t *n = vlib_get_node (vm, node_index); - pg_node_t *pn = pg_get_node (node_index); - - n->format_buffer = format_udp_header; - n->unformat_buffer = unformat_udp_header; - pn->unformat_edit = unformat_pg_udp_header; -} - -clib_error_t * -udp_local_init (vlib_main_t * vm) -{ - udp_input_runtime_t *rt; - udp_main_t *um = &udp_main; - int i; - - { - clib_error_t *error; - error = vlib_call_init_function (vm, udp_init); - if (error) - clib_error_report (error); - } - - - for (i = 0; i < 2; i++) - { - um->dst_port_info_by_name[i] = hash_create_string (0, sizeof (uword)); - um->dst_port_info_by_dst_port[i] = hash_create (0, sizeof (uword)); - } - - udp_setup_node (vm, udp4_input_node.index); - udp_setup_node (vm, udp6_input_node.index); - - rt = vlib_node_get_runtime_data (vm, udp4_input_node.index); - - rt->next_by_dst_port = sparse_vec_new - ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]), - /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); - - rt->punt_unknown = 0; - -#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */); - foreach_udp4_dst_port -#undef _ - rt = vlib_node_get_runtime_data (vm, udp6_input_node.index); - - rt->next_by_dst_port = sparse_vec_new - ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]), - /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); - - rt->punt_unknown = 0; - -#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */); - foreach_udp6_dst_port -#undef _ - ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index); - /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */ - return 0; -} - -VLIB_INIT_FUNCTION (udp_local_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_packet.h b/src/vnet/ip/udp_packet.h deleted file mode 100644 index beea3059..00000000 --- a/src/vnet/ip/udp_packet.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip4/udp_packet.h: UDP packet format - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef included_udp_packet_h -#define included_udp_packet_h - -typedef struct -{ - /* Source and destination port. */ - u16 src_port, dst_port; - - /* Length of UDP header plus payload. */ - u16 length; - - /* Checksum of UDP pseudo-header and data or - zero if checksum is disabled. */ - u16 checksum; -} udp_header_t; - -#endif /* included_udp_packet_h */ - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ip/udp_pg.c b/src/vnet/ip/udp_pg.c deleted file mode 100644 index c9d8d38c..00000000 --- a/src/vnet/ip/udp_pg.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * ip/udp_pg: UDP packet-generator interface - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include /* for unformat_udp_udp_port */ - -#define UDP_PG_EDIT_LENGTH (1 << 0) -#define UDP_PG_EDIT_CHECKSUM (1 << 1) - -always_inline void -udp_pg_edit_function_inline (pg_main_t * pg, - pg_stream_t * s, - pg_edit_group_t * g, - u32 * packets, u32 n_packets, u32 flags) -{ - vlib_main_t *vm = vlib_get_main (); - u32 ip_offset, udp_offset; - - udp_offset = g->start_byte_offset; - ip_offset = (g - 1)->start_byte_offset; - - while (n_packets >= 1) - { - vlib_buffer_t *p0; - ip4_header_t *ip0; - udp_header_t *udp0; - u32 udp_len0; - - p0 = vlib_get_buffer (vm, packets[0]); - n_packets -= 1; - packets += 1; - - ip0 = (void *) (p0->data + ip_offset); - udp0 = (void *) (p0->data + udp_offset); - udp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]); - - if (flags & UDP_PG_EDIT_LENGTH) - udp0->length = - clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, p0) - - ip_offset); - - /* Initialize checksum with header. */ - if (flags & UDP_PG_EDIT_CHECKSUM) - { - ip_csum_t sum0; - - sum0 = clib_mem_unaligned (&ip0->src_address, u64); - - sum0 = ip_csum_with_carry - (sum0, clib_host_to_net_u32 (udp_len0 + (ip0->protocol << 16))); - - /* Invalidate possibly old checksum. */ - udp0->checksum = 0; - - sum0 = - ip_incremental_checksum_buffer (vm, p0, udp_offset, udp_len0, - sum0); - - sum0 = ~ip_csum_fold (sum0); - - /* Zero checksum means checksumming disabled. */ - sum0 = sum0 != 0 ? sum0 : 0xffff; - - udp0->checksum = sum0; - } - } -} - -static void -udp_pg_edit_function (pg_main_t * pg, - pg_stream_t * s, - pg_edit_group_t * g, u32 * packets, u32 n_packets) -{ - switch (g->edit_function_opaque) - { - case UDP_PG_EDIT_LENGTH: - udp_pg_edit_function_inline (pg, s, g, packets, n_packets, - UDP_PG_EDIT_LENGTH); - break; - - case UDP_PG_EDIT_CHECKSUM: - udp_pg_edit_function_inline (pg, s, g, packets, n_packets, - UDP_PG_EDIT_CHECKSUM); - break; - - case UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH: - udp_pg_edit_function_inline (pg, s, g, packets, n_packets, - UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH); - break; - - default: - ASSERT (0); - break; - } -} - -typedef struct -{ - pg_edit_t src_port, dst_port; - pg_edit_t length; - pg_edit_t checksum; -} pg_udp_header_t; - -static inline void -pg_udp_header_init (pg_udp_header_t * p) -{ - /* Initialize fields that are not bit fields in the IP header. */ -#define _(f) pg_edit_init (&p->f, udp_header_t, f); - _(src_port); - _(dst_port); - _(length); - _(checksum); -#undef _ -} - -uword -unformat_pg_udp_header (unformat_input_t * input, va_list * args) -{ - pg_stream_t *s = va_arg (*args, pg_stream_t *); - pg_udp_header_t *p; - u32 group_index; - - p = pg_create_edit_group (s, sizeof (p[0]), sizeof (udp_header_t), - &group_index); - pg_udp_header_init (p); - - /* Defaults. */ - p->checksum.type = PG_EDIT_UNSPECIFIED; - p->length.type = PG_EDIT_UNSPECIFIED; - - if (!unformat (input, "UDP: %U -> %U", - unformat_pg_edit, - unformat_tcp_udp_port, &p->src_port, - unformat_pg_edit, unformat_tcp_udp_port, &p->dst_port)) - goto error; - - /* Parse options. */ - while (1) - { - if (unformat (input, "length %U", - unformat_pg_edit, unformat_pg_number, &p->length)) - ; - - else if (unformat (input, "checksum %U", - unformat_pg_edit, unformat_pg_number, &p->checksum)) - ; - - /* Can't parse input: try next protocol level. */ - else - break; - } - - { - ip_main_t *im = &ip_main; - u16 dst_port; - tcp_udp_port_info_t *pi; - - pi = 0; - if (p->dst_port.type == PG_EDIT_FIXED) - { - dst_port = pg_edit_get_value (&p->dst_port, PG_EDIT_LO); - pi = ip_get_tcp_udp_port_info (im, dst_port); - } - - if (pi && pi->unformat_pg_edit - && unformat_user (input, pi->unformat_pg_edit, s)) - ; - - else if (!unformat_user (input, unformat_pg_payload, s)) - goto error; - - p = pg_get_edit_group (s, group_index); - if (p->checksum.type == PG_EDIT_UNSPECIFIED - || p->length.type == PG_EDIT_UNSPECIFIED) - { - pg_edit_group_t *g = pg_stream_get_group (s, group_index); - g->edit_function = udp_pg_edit_function; - g->edit_function_opaque = 0; - if (p->checksum.type == PG_EDIT_UNSPECIFIED) - g->edit_function_opaque |= UDP_PG_EDIT_CHECKSUM; - if (p->length.type == PG_EDIT_UNSPECIFIED) - g->edit_function_opaque |= UDP_PG_EDIT_LENGTH; - } - - return 1; - } - -error: - /* Free up any edits we may have added. */ - pg_free_edit_group (s); - return 0; -} - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/ipsec/ikev2.c b/src/vnet/ipsec/ikev2.c index 09209334..2c1074d8 100644 --- a/src/vnet/ipsec/ikev2.c +++ b/src/vnet/ipsec/ikev2.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vnet/ipsec/ikev2_cli.c b/src/vnet/ipsec/ikev2_cli.c index 5c88d8d4..05ed4e60 100644 --- a/src/vnet/ipsec/ikev2_cli.c +++ b/src/vnet/ipsec/ikev2_cli.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/vnet/ipsec/ikev2_crypto.c b/src/vnet/ipsec/ikev2_crypto.c index c201d3eb..ca56158f 100644 --- a/src/vnet/ipsec/ikev2_crypto.c +++ b/src/vnet/ipsec/ikev2_crypto.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vnet/lisp-cp/packets.c b/src/vnet/lisp-cp/packets.c index 3a4f421b..f24024f1 100644 --- a/src/vnet/lisp-cp/packets.c +++ b/src/vnet/lisp-cp/packets.c @@ -15,7 +15,7 @@ #include #include -#include +#include /* Returns IP ID for the packet */ /* static u16 ip_id = 0; @@ -141,61 +141,6 @@ pkt_push_udp (vlib_main_t * vm, vlib_buffer_t * b, u16 sp, u16 dp) return uh; } -void * -pkt_push_ipv4 (vlib_main_t * vm, vlib_buffer_t * b, ip4_address_t * src, - ip4_address_t * dst, int proto) -{ - ip4_header_t *ih; - - /* make some room */ - ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t)); - - ih->ip_version_and_header_length = 0x45; - ih->tos = 0; - ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b)); - - /* iph->fragment_id = clib_host_to_net_u16(get_IP_ID ()); */ - - /* TODO: decide if we allow fragments in case of control */ - ih->flags_and_fragment_offset = clib_host_to_net_u16 (IP_DF); - ih->ttl = 255; - ih->protocol = proto; - ih->src_address.as_u32 = src->as_u32; - ih->dst_address.as_u32 = dst->as_u32; - - ih->checksum = ip4_header_checksum (ih); - return ih; -} - -void * -pkt_push_ipv6 (vlib_main_t * vm, vlib_buffer_t * b, ip6_address_t * src, - ip6_address_t * dst, int proto) -{ - ip6_header_t *ip6h; - u16 payload_length; - - /* make some room */ - ip6h = vlib_buffer_push_uninit (b, sizeof (ip6_header_t)); - - ip6h->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (0x6 << 28); - - /* calculate ip6 payload length */ - payload_length = vlib_buffer_length_in_chain (vm, b); - payload_length -= sizeof (*ip6h); - - ip6h->payload_length = clib_host_to_net_u16 (payload_length); - - ip6h->hop_limit = 0xff; - ip6h->protocol = proto; - clib_memcpy (ip6h->src_address.as_u8, src->as_u8, - sizeof (ip6h->src_address)); - clib_memcpy (ip6h->dst_address.as_u8, dst->as_u8, - sizeof (ip6h->src_address)); - - return ip6h; -} - void * pkt_push_ip (vlib_main_t * vm, vlib_buffer_t * b, ip_address_t * src, ip_address_t * dst, u32 proto) @@ -210,12 +155,12 @@ pkt_push_ip (vlib_main_t * vm, vlib_buffer_t * b, ip_address_t * src, switch (ip_addr_version (src)) { case IP4: - return pkt_push_ipv4 (vm, b, &ip_addr_v4 (src), &ip_addr_v4 (dst), - proto); + return vlib_buffer_push_ip4 (vm, b, &ip_addr_v4 (src), + &ip_addr_v4 (dst), proto); break; case IP6: - return pkt_push_ipv6 (vm, b, &ip_addr_v6 (src), &ip_addr_v6 (dst), - proto); + return vlib_buffer_push_ip6 (vm, b, &ip_addr_v6 (src), + &ip_addr_v6 (dst), proto); break; } diff --git a/src/vnet/lisp-cp/packets.h b/src/vnet/lisp-cp/packets.h index 212a1d78..f6da3bf4 100644 --- a/src/vnet/lisp-cp/packets.h +++ b/src/vnet/lisp-cp/packets.h @@ -26,51 +26,6 @@ void *pkt_push_udp_and_ip (vlib_main_t * vm, vlib_buffer_t * b, u16 sp, void *pkt_push_ecm_hdr (vlib_buffer_t * b); -always_inline u8 * -vlib_buffer_get_tail (vlib_buffer_t * b) -{ - return b->data + b->current_data + b->current_length; -} - -always_inline void * -vlib_buffer_put_uninit (vlib_buffer_t * b, u8 size) -{ - /* XXX should make sure there's enough space! */ - void *p = vlib_buffer_get_tail (b); - b->current_length += size; - return p; -} - -always_inline void * -vlib_buffer_push_uninit (vlib_buffer_t * b, u8 size) -{ - /* XXX should make sure there's enough space! */ - ASSERT (b->current_data >= size); - b->current_data -= size; - b->current_length += size; - - return vlib_buffer_get_current (b); -} - -always_inline void * -vlib_buffer_make_headroom (vlib_buffer_t * b, u8 size) -{ - /* XXX should make sure there's enough space! */ - b->current_data += size; - return vlib_buffer_get_current (b); -} - -always_inline void * -vlib_buffer_pull (vlib_buffer_t * b, u8 size) -{ - if (b->current_length < size) - return 0; - - void *data = vlib_buffer_get_current (b); - vlib_buffer_advance (b, size); - return data; -} - /* *INDENT-ON* */ /* diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c index 13359277..292c7e6a 100644 --- a/src/vnet/lisp-gpe/interface.c +++ b/src/vnet/lisp-gpe/interface.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/vnet/lisp-gpe/lisp_gpe.h b/src/vnet/lisp-gpe/lisp_gpe.h index c898a7da..b5a50ec6 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.h +++ b/src/vnet/lisp-gpe/lisp_gpe.h @@ -27,10 +27,12 @@ #include #include #include -#include +#include #include #include #include +#include +#include /** IP4-UDP-LISP encap header */ /* *INDENT-OFF* */ diff --git a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c index 65006b81..dbcf7134 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c +++ b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include /** * Memory pool of all adjacencies diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c new file mode 100644 index 00000000..a561e7d1 --- /dev/null +++ b/src/vnet/session/application.c @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +/* + * Pool from which we allocate all applications + */ +static application_t *app_pool; + +/* + * Hash table of apps by api client index + */ +static uword *app_by_api_client_index; + +int +application_api_queue_is_full (application_t * app) +{ + unix_shared_memory_queue_t *q; + + /* builtin servers are always OK */ + if (app->api_client_index == ~0) + return 0; + + q = vl_api_client_index_to_input_queue (app->api_client_index); + if (!q) + return 1; + + if (q->cursize == q->maxsize) + return 1; + return 0; +} + +static void +application_table_add (application_t * app) +{ + hash_set (app_by_api_client_index, app->api_client_index, app->index); +} + +static void +application_table_del (application_t * app) +{ + hash_unset (app_by_api_client_index, app->api_client_index); +} + +application_t * +application_lookup (u32 api_client_index) +{ + uword *p; + p = hash_get (app_by_api_client_index, api_client_index); + if (p) + return application_get (p[0]); + + return 0; +} + +void +application_del (application_t * app) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + api_main_t *am = &api_main; + void *oldheap; + session_manager_t *sm; + + if (app->mode == APP_SERVER) + { + sm = session_manager_get (app->session_manager_index); + session_manager_del (smm, sm); + } + + /* Free the event fifo in the /vpe-api shared-memory segment */ + oldheap = svm_push_data_heap (am->vlib_rp); + if (app->event_queue) + unix_shared_memory_queue_free (app->event_queue); + svm_pop_heap (oldheap); + + application_table_del (app); + + pool_put (app_pool, app); +} + +application_t * +application_new (application_type_t type, session_type_t sst, + u32 api_client_index, u32 flags, session_cb_vft_t * cb_fns) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + api_main_t *am = &api_main; + application_t *app; + void *oldheap; + session_manager_t *sm; + + pool_get (app_pool, app); + memset (app, 0, sizeof (*app)); + + /* Allocate event fifo in the /vpe-api shared-memory segment */ + oldheap = svm_push_data_heap (am->vlib_rp); + + /* Allocate server event queue */ + app->event_queue = + unix_shared_memory_queue_init (128 /* nels $$$$ config */ , + sizeof (session_fifo_event_t), + 0 /* consumer pid */ , + 0 + /* (do not) signal when queue non-empty */ + ); + + svm_pop_heap (oldheap); + + /* If a server, allocate session manager */ + if (type == APP_SERVER) + { + pool_get (smm->session_managers, sm); + memset (sm, 0, sizeof (*sm)); + + app->session_manager_index = sm - smm->session_managers; + } + else if (type == APP_CLIENT) + { + /* Allocate connect session manager if needed */ + if (smm->connect_manager_index[sst] == INVALID_INDEX) + connects_session_manager_init (smm, sst); + app->session_manager_index = smm->connect_manager_index[sst]; + } + + app->mode = type; + app->index = application_get_index (app); + app->session_type = sst; + app->api_client_index = api_client_index; + app->flags = flags; + app->cb_fns = *cb_fns; + + /* Add app to lookup by api_client_index table */ + application_table_add (app); + + return app; +} + +application_t * +application_get (u32 index) +{ + return pool_elt_at_index (app_pool, index); +} + +u32 +application_get_index (application_t * app) +{ + return app - app_pool; +} + +int +application_server_init (application_t * server, u32 segment_size, + u32 add_segment_size, u32 rx_fifo_size, + u32 tx_fifo_size, u8 ** segment_name) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + session_manager_t *sm; + int rv; + + sm = session_manager_get (server->session_manager_index); + + /* Add first segment */ + if ((rv = session_manager_add_first_segment (smm, sm, segment_size, + segment_name))) + { + return rv; + } + + /* Setup session manager */ + sm->add_segment_size = add_segment_size; + sm->rx_fifo_size = rx_fifo_size; + sm->tx_fifo_size = tx_fifo_size; + sm->add_segment = sm->add_segment_size != 0; + return 0; +} + +u8 * +format_application_server (u8 * s, va_list * args) +{ + application_t *srv = va_arg (*args, application_t *); + int verbose = va_arg (*args, int); + vl_api_registration_t *regp; + stream_session_t *listener; + u8 *server_name, *str, *seg_name; + u32 segment_size; + + if (srv == 0) + { + if (verbose) + s = format (s, "%-40s%-20s%-15s%-15s%-10s", "Connection", "Server", + "Segment", "API Client", "Cookie"); + else + s = format (s, "%-40s%-20s", "Connection", "Server"); + + return s; + } + + regp = vl_api_client_index_to_registration (srv->api_client_index); + if (!regp) + server_name = format (0, "%s%c", regp->name, 0); + else + server_name = regp->name; + + listener = stream_session_listener_get (srv->session_type, + srv->session_index); + str = format (0, "%U", format_stream_session, listener, verbose); + + session_manager_get_segment_info (listener->server_segment_index, &seg_name, + &segment_size); + if (verbose) + { + s = format (s, "%-40s%-20s%-20s%-10d%-10d", str, server_name, + seg_name, srv->api_client_index, srv->accept_cookie); + } + else + s = format (s, "%-40s%-20s", str, server_name); + return s; +} + +u8 * +format_application_client (u8 * s, va_list * args) +{ + application_t *client = va_arg (*args, application_t *); + int verbose = va_arg (*args, int); + stream_session_t *session; + u8 *str, *seg_name; + u32 segment_size; + + if (client == 0) + { + if (verbose) + s = + format (s, "%-40s%-20s%-10s", "Connection", "Segment", + "API Client"); + else + s = format (s, "%-40s", "Connection"); + + return s; + } + + session = stream_session_get (client->session_index, client->thread_index); + str = format (0, "%U", format_stream_session, session, verbose); + + session_manager_get_segment_info (session->server_segment_index, &seg_name, + &segment_size); + if (verbose) + { + s = format (s, "%-40s%-20s%-10d%", str, seg_name, + client->api_client_index); + } + else + s = format (s, "%-40s", str); + return s; +} + +static clib_error_t * +show_app_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + application_t *app; + int do_server = 0; + int do_client = 0; + int verbose = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "server")) + do_server = 1; + else if (unformat (input, "client")) + do_client = 1; + else if (unformat (input, "verbose")) + verbose = 1; + else + break; + } + + if (do_server) + { + if (pool_elts (app_pool)) + { + vlib_cli_output (vm, "%U", format_application_server, + 0 /* header */ , + verbose); + /* *INDENT-OFF* */ + pool_foreach (app, app_pool, + ({ + if (app->mode == APP_SERVER) + vlib_cli_output (vm, "%U", format_application_server, app, + verbose); + })); + /* *INDENT-ON* */ + } + else + vlib_cli_output (vm, "No active server bindings"); + } + + if (do_client) + { + if (pool_elts (app_pool)) + { + vlib_cli_output (vm, "%U", format_application_client, + 0 /* header */ , + verbose); + /* *INDENT-OFF* */ + pool_foreach (app, app_pool, + ({ + if (app->mode == APP_CLIENT) + vlib_cli_output (vm, "%U", format_application_client, app, + verbose); + })); + /* *INDENT-ON* */ + } + else + vlib_cli_output (vm, "No active server bindings"); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_app_command, static) = +{ +.path = "show app",.short_help = + "show app [server|client] [verbose]",.function = show_app_command_fn,}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h new file mode 100644 index 00000000..027d6967 --- /dev/null +++ b/src/vnet/session/application.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_VNET_SESSION_APPLICATION_H_ +#define SRC_VNET_SESSION_APPLICATION_H_ + +#include +#include + +typedef enum +{ + APP_SERVER, + APP_CLIENT +} application_type_t; + +typedef struct _stream_session_cb_vft +{ + /** Notify server of new segment */ + int (*add_segment_callback) (u32 api_client_index, const u8 * seg_name, + u32 seg_size); + + /** Notify server of newly accepted session */ + int (*session_accept_callback) (stream_session_t * new_session); + + /* Connection request callback */ + int (*session_connected_callback) (u32 api_client_index, + stream_session_t * s, u8 code); + + /** Notify app that session is closing */ + void (*session_disconnect_callback) (stream_session_t * s); + + /** Notify app that session was reset */ + void (*session_reset_callback) (stream_session_t * s); + + /* Direct RX callback, for built-in servers */ + int (*builtin_server_rx_callback) (stream_session_t * session); + + /* Redirect connection to local server */ + int (*redirect_connect_callback) (u32 api_client_index, void *mp); +} session_cb_vft_t; + +typedef struct _application +{ + /** Index in server pool */ + u32 index; + + /** Flags */ + u32 flags; + + /** Binary API connection index, ~0 if internal */ + u32 api_client_index; + + /* */ + u32 api_context; + + /** Application listens for events on this svm queue */ + unix_shared_memory_queue_t *event_queue; + + /** Stream session type */ + u8 session_type; + + /* Stream server mode: accept or connect */ + u8 mode; + + u32 session_manager_index; + + /* + * Bind/Listen specific + */ + + /** Accept cookie, for multiple session flavors ($$$ maybe) */ + u32 accept_cookie; + + /** Index of the listen session or connect session */ + u32 session_index; + + /** Session thread index for client connect sessions */ + u32 thread_index; + + /* + * Callbacks: shoulder-taps for the server/client + */ + session_cb_vft_t cb_fns; +} application_t; + +application_t *application_new (application_type_t type, session_type_t sst, + u32 api_client_index, u32 flags, + session_cb_vft_t * cb_fns); +void application_del (application_t * app); +application_t *application_get (u32 index); +application_t *application_lookup (u32 api_client_index); +u32 application_get_index (application_t * app); + +int +application_server_init (application_t * server, u32 segment_size, + u32 add_segment_size, u32 rx_fifo_size, + u32 tx_fifo_size, u8 ** segment_name); +int application_api_queue_is_full (application_t * app); + +#endif /* SRC_VNET_SESSION_APPLICATION_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c new file mode 100644 index 00000000..0ea77fd8 --- /dev/null +++ b/src/vnet/session/application_interface.c @@ -0,0 +1,459 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include +#include + +/** @file + VPP's application/session API bind/unbind/connect/disconnect calls +*/ + +static u8 +ip_is_zero (ip46_address_t * ip46_address, u8 is_ip4) +{ + if (is_ip4) + return (ip46_address->ip4.as_u32 == 0); + else + return (ip46_address->as_u64[0] == 0 && ip46_address->as_u64[1] == 0); +} + +static u8 +ip_is_local (ip46_address_t * ip46_address, u8 is_ip4) +{ + fib_node_index_t fei; + fib_entry_flag_t flags; + fib_prefix_t prefix; + + /* Check if requester is local */ + if (is_ip4) + { + prefix.fp_len = 32; + prefix.fp_proto = FIB_PROTOCOL_IP4; + } + else + { + prefix.fp_len = 128; + prefix.fp_proto = FIB_PROTOCOL_IP6; + } + + clib_memcpy (&prefix.fp_addr, ip46_address, sizeof (ip46_address)); + fei = fib_table_lookup (0, &prefix); + flags = fib_entry_get_flags (fei); + + return (flags & FIB_ENTRY_FLAG_LOCAL); +} + +int +api_parse_session_handle (u64 handle, u32 * session_index, u32 * thread_index) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + stream_session_t *pool; + + *thread_index = handle & 0xFFFFFFFF; + *session_index = handle >> 32; + + if (*thread_index >= vec_len (smm->sessions)) + return VNET_API_ERROR_INVALID_VALUE; + + pool = smm->sessions[*thread_index]; + + if (pool_is_free_index (pool, *session_index)) + return VNET_API_ERROR_INVALID_VALUE_2; + + return 0; +} + +int +vnet_bind_i (u32 api_client_index, ip46_address_t * ip46, u16 port_host_order, + session_type_t sst, u64 * options, session_cb_vft_t * cb_fns, + application_t ** app, u32 * len_seg_name, char *seg_name) +{ + u8 *segment_name = 0; + application_t *server = 0; + stream_session_t *listener; + u8 is_ip4; + + listener = + stream_session_lookup_listener (ip46, + clib_host_to_net_u16 (port_host_order), + sst); + + if (listener) + return VNET_API_ERROR_ADDRESS_IN_USE; + + if (application_lookup (api_client_index)) + { + clib_warning ("Only one bind supported for now"); + return VNET_API_ERROR_ADDRESS_IN_USE; + } + + is_ip4 = SESSION_TYPE_IP4_UDP == sst || SESSION_TYPE_IP4_TCP == sst; + if (!ip_is_zero (ip46, is_ip4) && !ip_is_local (ip46, is_ip4)) + return VNET_API_ERROR_INVALID_VALUE; + + /* Allocate and initialize stream server */ + server = application_new (APP_SERVER, sst, api_client_index, + options[SESSION_OPTIONS_FLAGS], cb_fns); + + application_server_init (server, options[SESSION_OPTIONS_SEGMENT_SIZE], + options[SESSION_OPTIONS_ADD_SEGMENT_SIZE], + options[SESSION_OPTIONS_RX_FIFO_SIZE], + options[SESSION_OPTIONS_TX_FIFO_SIZE], + &segment_name); + + /* Setup listen path down to transport */ + stream_session_start_listen (server->index, ip46, port_host_order); + + /* + * Return values + */ + + ASSERT (vec_len (segment_name) <= 128); + *len_seg_name = vec_len (segment_name); + memcpy (seg_name, segment_name, *len_seg_name); + *app = server; + + return 0; +} + +int +vnet_unbind_i (u32 api_client_index) +{ + application_t *server; + + /* + * Find the stream_server_t corresponding to the api client + */ + server = application_lookup (api_client_index); + if (!server) + return VNET_API_ERROR_INVALID_VALUE_2; + + /* Clear the listener */ + stream_session_stop_listen (server->index); + application_del (server); + + return 0; +} + +int +vnet_connect_i (u32 api_client_index, u32 api_context, session_type_t sst, + ip46_address_t * ip46, u16 port, u64 * options, void *mp, + session_cb_vft_t * cb_fns) +{ + stream_session_t *listener; + application_t *server, *app; + + /* + * Figure out if connecting to a local server + */ + listener = stream_session_lookup_listener (ip46, + clib_host_to_net_u16 (port), + sst); + if (listener) + { + server = application_get (listener->app_index); + + /* + * Server is willing to have a direct fifo connection created + * instead of going through the state machine, etc. + */ + if (server->flags & SESSION_OPTIONS_FLAGS_USE_FIFO) + return server->cb_fns. + redirect_connect_callback (server->api_client_index, mp); + } + + /* Create client app */ + app = application_new (APP_CLIENT, sst, api_client_index, + options[SESSION_OPTIONS_FLAGS], cb_fns); + + app->api_context = api_context; + + /* + * Not connecting to a local server. Create regular session + */ + stream_session_open (sst, ip46, port, app->index); + + return 0; +} + +/** + * unformat a vnet URI + * + * fifo://name + * tcp://ip46-addr:port + * udp://ip46-addr:port + * + * u8 ip46_address[16]; + * u16 port_in_host_byte_order; + * stream_session_type_t sst; + * u8 *fifo_name; + * + * if (unformat (input, "%U", unformat_vnet_uri, &ip46_address, + * &sst, &port, &fifo_name)) + * etc... + * + */ +uword +unformat_vnet_uri (unformat_input_t * input, va_list * args) +{ + ip46_address_t *address = va_arg (*args, ip46_address_t *); + session_type_t *sst = va_arg (*args, session_type_t *); + u16 *port = va_arg (*args, u16 *); + + if (unformat (input, "tcp://%U/%d", unformat_ip4_address, &address->ip4, + port)) + { + *sst = SESSION_TYPE_IP4_TCP; + return 1; + } + if (unformat (input, "udp://%U/%d", unformat_ip4_address, &address->ip4, + port)) + { + *sst = SESSION_TYPE_IP4_UDP; + return 1; + } + if (unformat (input, "udp://%U/%d", unformat_ip6_address, &address->ip6, + port)) + { + *sst = SESSION_TYPE_IP6_UDP; + return 1; + } + if (unformat (input, "tcp://%U/%d", unformat_ip6_address, &address->ip6, + port)) + { + *sst = SESSION_TYPE_IP6_TCP; + return 1; + } + + return 0; +} + +int +parse_uri (char *uri, session_type_t * sst, ip46_address_t * addr, + u16 * port_number_host_byte_order) +{ + unformat_input_t _input, *input = &_input; + + /* Make sure */ + uri = (char *) format (0, "%s%c", uri, 0); + + /* Parse uri */ + unformat_init_string (input, uri, strlen (uri)); + if (!unformat (input, "%U", unformat_vnet_uri, addr, sst, + port_number_host_byte_order)) + { + unformat_free (input); + return VNET_API_ERROR_INVALID_VALUE; + } + unformat_free (input); + + return 0; +} + +int +vnet_bind_uri (vnet_bind_args_t * a) +{ + application_t *server = 0; + u16 port_host_order; + session_type_t sst = SESSION_N_TYPES; + ip46_address_t ip46; + int rv; + + memset (&ip46, 0, sizeof (ip46)); + rv = parse_uri (a->uri, &sst, &ip46, &port_host_order); + if (rv) + return rv; + + if ((rv = vnet_bind_i (a->api_client_index, &ip46, port_host_order, sst, + a->options, a->session_cb_vft, &server, + &a->segment_name_length, a->segment_name))) + return rv; + + a->server_event_queue_address = (u64) server->event_queue; + return 0; +} + +session_type_t +session_type_from_proto_and_ip (session_api_proto_t proto, u8 is_ip4) +{ + if (proto == SESSION_PROTO_TCP) + { + if (is_ip4) + return SESSION_TYPE_IP4_TCP; + else + return SESSION_TYPE_IP6_TCP; + } + else + { + if (is_ip4) + return SESSION_TYPE_IP4_UDP; + else + return SESSION_TYPE_IP6_UDP; + } + + return SESSION_N_TYPES; +} + +int +vnet_unbind_uri (char *uri, u32 api_client_index) +{ + u16 port_number_host_byte_order; + session_type_t sst = SESSION_N_TYPES; + ip46_address_t ip46_address; + stream_session_t *listener; + int rv; + + rv = parse_uri (uri, &sst, &ip46_address, &port_number_host_byte_order); + if (rv) + return rv; + + listener = + stream_session_lookup_listener (&ip46_address, + clib_host_to_net_u16 + (port_number_host_byte_order), sst); + + if (!listener) + return VNET_API_ERROR_ADDRESS_NOT_IN_USE; + + /* External client? */ + if (api_client_index != ~0) + { + ASSERT (vl_api_client_index_to_registration (api_client_index)); + } + + return vnet_unbind_i (api_client_index); +} + +int +vnet_connect_uri (vnet_connect_args_t * a) +{ + ip46_address_t ip46_address; + u16 port; + session_type_t sst; + application_t *app; + int rv; + + app = application_lookup (a->api_client_index); + if (app) + { + clib_warning ("Already have a connect from this app"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + + /* Parse uri */ + rv = parse_uri (a->uri, &sst, &ip46_address, &port); + if (rv) + return rv; + + return vnet_connect_i (a->api_client_index, a->api_context, sst, + &ip46_address, port, a->options, a->mp, + a->session_cb_vft); +} + +int +vnet_disconnect_session (u32 client_index, u32 session_index, + u32 thread_index) +{ + stream_session_t *session; + + session = stream_session_get (session_index, thread_index); + stream_session_disconnect (session); + + return 0; +} + + +int +vnet_bind (vnet_bind_args_t * a) +{ + application_t *server = 0; + session_type_t sst = SESSION_N_TYPES; + int rv; + + sst = session_type_from_proto_and_ip (a->proto, a->tep.is_ip4); + if ((rv = vnet_bind_i (a->api_client_index, &a->tep.ip, a->tep.port, sst, + a->options, a->session_cb_vft, &server, + &a->segment_name_length, a->segment_name))) + return rv; + + a->server_event_queue_address = (u64) server->event_queue; + a->handle = (u64) a->tep.vrf << 32 | (u64) server->session_index; + return 0; +} + +int +vnet_unbind (vnet_unbind_args_t * a) +{ + application_t *server; + + if (a->api_client_index != ~0) + { + ASSERT (vl_api_client_index_to_registration (a->api_client_index)); + } + + /* Make sure this is the right one */ + server = application_lookup (a->api_client_index); + ASSERT (server->session_index == (0xFFFFFFFF & a->handle)); + + /* TODO use handle to disambiguate namespaces/vrfs */ + return vnet_unbind_i (a->api_client_index); +} + +int +vnet_connect (vnet_connect_args_t * a) +{ + session_type_t sst; + application_t *app; + + app = application_lookup (a->api_client_index); + if (app) + { + clib_warning ("Already have a connect from this app"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + + sst = session_type_from_proto_and_ip (a->proto, a->tep.is_ip4); + return vnet_connect_i (a->api_client_index, a->api_context, sst, &a->tep.ip, + a->tep.port, a->options, a->mp, a->session_cb_vft); +} + +int +vnet_disconnect (vnet_disconnect_args_t * a) +{ + stream_session_t *session; + u32 session_index, thread_index; + + if (api_parse_session_handle (a->handle, &session_index, &thread_index)) + { + clib_warning ("Invalid handle"); + return -1; + } + + session = stream_session_get (session_index, thread_index); + stream_session_disconnect (session); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h new file mode 100644 index 00000000..8d87c067 --- /dev/null +++ b/src/vnet/session/application_interface.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_uri_h__ +#define __included_uri_h__ + +#include +#include +#include +#include +#include +#include + +typedef enum _session_api_proto +{ + SESSION_PROTO_TCP, + SESSION_PROTO_UDP +} session_api_proto_t; + +typedef struct _vnet_bind_args_t +{ + union + { + char *uri; + struct + { + transport_endpoint_t tep; + session_api_proto_t proto; + }; + }; + + u32 api_client_index; + u64 *options; + session_cb_vft_t *session_cb_vft; + + /* + * Results + */ + char *segment_name; + u32 segment_name_length; + u64 server_event_queue_address; + u64 handle; +} vnet_bind_args_t; + +typedef struct _vnet_unbind_args_t +{ + union + { + char *uri; + u64 handle; + }; + u32 api_client_index; +} vnet_unbind_args_t; + +typedef struct _vnet_connect_args +{ + union + { + char *uri; + struct + { + transport_endpoint_t tep; + session_api_proto_t proto; + }; + }; + u32 api_client_index; + u32 api_context; + u64 *options; + session_cb_vft_t *session_cb_vft; + + /* Used for redirects */ + void *mp; +} vnet_connect_args_t; + +typedef struct _vnet_disconnect_args_t +{ + u64 handle; + u32 api_client_index; +} vnet_disconnect_args_t; + +/* Bind / connect options */ +typedef enum +{ + SESSION_OPTIONS_FLAGS, + SESSION_OPTIONS_SEGMENT_SIZE, + SESSION_OPTIONS_ADD_SEGMENT_SIZE, + SESSION_OPTIONS_RX_FIFO_SIZE, + SESSION_OPTIONS_TX_FIFO_SIZE, + SESSION_OPTIONS_ACCEPT_COOKIE, + SESSION_OPTIONS_N_OPTIONS +} session_options_index_t; + +/** Server can handle delegated connect requests from local clients */ +#define SESSION_OPTIONS_FLAGS_USE_FIFO (1<<0) + +/** Server wants vpp to add segments when out of memory for fifos */ +#define SESSION_OPTIONS_FLAGS_ADD_SEGMENT (1<<1) + +#define VNET_CONNECT_REDIRECTED 123 + +int vnet_bind_uri (vnet_bind_args_t *); +int vnet_unbind_uri (char *uri, u32 api_client_index); +int vnet_connect_uri (vnet_connect_args_t * a); +int +vnet_disconnect_session (u32 client_index, u32 session_index, + u32 thread_index); + +int vnet_bind (vnet_bind_args_t * a); +int vnet_connect (vnet_connect_args_t * a); +int vnet_unbind (vnet_unbind_args_t * a); +int vnet_disconnect (vnet_disconnect_args_t * a); + +int +api_parse_session_handle (u64 handle, u32 * session_index, + u32 * thread_index); + +#endif /* __included_uri_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/hashes.c b/src/vnet/session/hashes.c new file mode 100644 index 00000000..1808dd73 --- /dev/null +++ b/src/vnet/session/hashes.c @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Generate typed init functions for multiple hash table styles... */ + +#include +#include + +#include + +#undef __included_bihash_template_h__ + +#include +#include + +#include diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c new file mode 100644 index 00000000..e467f4e9 --- /dev/null +++ b/src/vnet/session/node.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include + +vlib_node_registration_t session_queue_node; + +typedef struct +{ + u32 session_index; + u32 server_thread_index; +} session_queue_trace_t; + +/* packet trace format function */ +static u8 * +format_session_queue_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + session_queue_trace_t *t = va_arg (*args, session_queue_trace_t *); + + s = format (s, "SESSION_QUEUE: session index %d, server thread index %d", + t->session_index, t->server_thread_index); + return s; +} + +vlib_node_registration_t session_queue_node; + +#define foreach_session_queue_error \ +_(TX, "Packets transmitted") \ +_(TIMER, "Timer events") + +typedef enum +{ +#define _(sym,str) SESSION_QUEUE_ERROR_##sym, + foreach_session_queue_error +#undef _ + SESSION_QUEUE_N_ERROR, +} session_queue_error_t; + +static char *session_queue_error_strings[] = { +#define _(sym,string) string, + foreach_session_queue_error +#undef _ +}; + +static u32 session_type_to_next[] = { + SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT, + SESSION_QUEUE_NEXT_IP4_LOOKUP, + SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT, + SESSION_QUEUE_NEXT_IP6_LOOKUP, +}; + +always_inline int +session_fifo_rx_i (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, int *n_tx_packets, + u8 peek_data) +{ + u32 n_trace = vlib_get_trace_count (vm, node); + u32 left_to_snd0, max_len_to_snd0, len_to_deq0, n_bufs, snd_space0; + u32 n_frame_bytes, n_frames_per_evt; + transport_connection_t *tc0; + transport_proto_vft_t *transport_vft; + u32 next_index, next0, *to_next, n_left_to_next, bi0; + vlib_buffer_t *b0; + u32 rx_offset; + u16 snd_mss0; + u8 *data0; + int i; + + next_index = next0 = session_type_to_next[s0->session_type]; + + transport_vft = session_get_transport_vft (s0->session_type); + tc0 = transport_vft->get_connection (s0->connection_index, thread_index); + + /* Make sure we have space to send and there's something to dequeue */ + snd_space0 = transport_vft->send_space (tc0); + snd_mss0 = transport_vft->send_mss (tc0); + + if (snd_space0 == 0 || svm_fifo_max_dequeue (s0->server_tx_fifo) == 0 + || snd_mss0 == 0) + return 0; + + ASSERT (e0->enqueue_length > 0); + + /* Ensure we're not writing more than transport window allows */ + max_len_to_snd0 = clib_min (e0->enqueue_length, snd_space0); + + if (peek_data) + { + /* Offset in rx fifo from where to peek data */ + rx_offset = transport_vft->rx_fifo_offset (tc0); + } + + /* TODO check if transport is willing to send len_to_snd0 + * bytes (Nagle) */ + + n_frame_bytes = snd_mss0 * VLIB_FRAME_SIZE; + n_frames_per_evt = ceil ((double) max_len_to_snd0 / n_frame_bytes); + + n_bufs = vec_len (smm->tx_buffers[thread_index]); + left_to_snd0 = max_len_to_snd0; + for (i = 0; i < n_frames_per_evt; i++) + { + /* Make sure we have at least one full frame of buffers ready */ + if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE)) + { + vec_validate (smm->tx_buffers[thread_index], + n_bufs + VLIB_FRAME_SIZE - 1); + n_bufs += + vlib_buffer_alloc (vm, &smm->tx_buffers[thread_index][n_bufs], + VLIB_FRAME_SIZE); + + /* buffer shortage + * XXX 0.9 because when debugging we might not get a full frame */ + if (PREDICT_FALSE (n_bufs < 0.9 * VLIB_FRAME_SIZE)) + { + /* Keep track of how much we've dequeued and exit */ + e0->enqueue_length -= max_len_to_snd0 - left_to_snd0; + return -1; + } + + _vec_len (smm->tx_buffers[thread_index]) = n_bufs; + } + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while (left_to_snd0 && n_left_to_next) + { + /* Get free buffer */ + n_bufs--; + bi0 = smm->tx_buffers[thread_index][n_bufs]; + _vec_len (smm->tx_buffers[thread_index]) = n_bufs; + + b0 = vlib_get_buffer (vm, bi0); + b0->error = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID + | VNET_BUFFER_LOCALLY_ORIGINATED; + b0->current_data = 0; + + /* RX on the local interface. tx in default fib */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + /* usual speculation, or the enqueue_x1 macro will barf */ + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + if (PREDICT_FALSE (n_trace > 0)) + { + session_queue_trace_t *t0; + vlib_trace_buffer (vm, node, next_index, b0, + 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + t0->session_index = s0->session_index; + t0->server_thread_index = s0->thread_index; + } + + if (1) + { + ELOG_TYPE_DECLARE (e) = + { + .format = "evt-dequeue: id %d length %d",.format_args = + "i4i4",}; + struct + { + u32 data[2]; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->data[0] = e0->event_id; + ed->data[1] = e0->enqueue_length; + } + + len_to_deq0 = (left_to_snd0 < snd_mss0) ? left_to_snd0 : snd_mss0; + + /* Make room for headers */ + data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN); + + /* Dequeue the data + * TODO 1) peek instead of dequeue + * 2) buffer chains */ + if (peek_data) + { + int n_bytes_read; + n_bytes_read = svm_fifo_peek (s0->server_tx_fifo, s0->pid, + rx_offset, len_to_deq0, data0); + if (n_bytes_read < 0) + goto dequeue_fail; + + /* Keep track of progress locally, transport is also supposed to + * increment it independently when pushing header */ + rx_offset += n_bytes_read; + } + else + { + if (svm_fifo_dequeue_nowait (s0->server_tx_fifo, s0->pid, + len_to_deq0, data0) < 0) + goto dequeue_fail; + } + + b0->current_length = len_to_deq0; + + /* Ask transport to push header */ + transport_vft->push_header (tc0, b0); + + left_to_snd0 -= len_to_deq0; + *n_tx_packets = *n_tx_packets + 1; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* If we couldn't dequeue all bytes store progress */ + if (max_len_to_snd0 < e0->enqueue_length) + { + e0->enqueue_length -= max_len_to_snd0; + vec_add1 (smm->evts_partially_read[thread_index], *e0); + } + return 0; + +dequeue_fail: + /* Can't read from fifo. Store event rx progress, save as partially read, + * return buff to free list and return */ + e0->enqueue_length -= max_len_to_snd0 - left_to_snd0; + vec_add1 (smm->evts_partially_read[thread_index], *e0); + + to_next -= 1; + n_left_to_next += 1; + _vec_len (smm->tx_buffers[thread_index]) += 1; + + clib_warning ("dequeue fail"); + return 0; +} + +int +session_fifo_rx_peek (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, int *n_tx_pkts) +{ + return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts, + 1); +} + +int +session_fifo_rx_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, stream_session_t * s0, + u32 thread_index, int *n_tx_pkts) +{ + return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts, + 0); +} + +static uword +session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + session_fifo_event_t *my_fifo_events, *e; + u32 n_to_dequeue; + unix_shared_memory_queue_t *q; + int n_tx_packets = 0; + u32 my_thread_index = vm->cpu_index; + int i, rv; + + /* + * Update TCP time + */ + tcp_update_time (vlib_time_now (vm), my_thread_index); + + /* + * Get vpp queue events + */ + q = smm->vpp_event_queues[my_thread_index]; + if (PREDICT_FALSE (q == 0)) + return 0; + + /* min number of events we can dequeue without blocking */ + n_to_dequeue = q->cursize; + if (n_to_dequeue == 0) + return 0; + + my_fifo_events = smm->fifo_events[my_thread_index]; + + /* If we didn't manage to process previous events try going + * over them again without dequeuing new ones. + * XXX: Block senders to sessions that can't keep up */ + if (vec_len (my_fifo_events) >= 100) + goto skip_dequeue; + + /* See you in the next life, don't be late */ + if (pthread_mutex_trylock (&q->mutex)) + return 0; + + for (i = 0; i < n_to_dequeue; i++) + { + vec_add2 (my_fifo_events, e, 1); + unix_shared_memory_queue_sub_raw (q, (u8 *) e); + } + + /* The other side of the connection is not polling */ + if (q->cursize < (q->maxsize / 8)) + (void) pthread_cond_broadcast (&q->condvar); + pthread_mutex_unlock (&q->mutex); + + smm->fifo_events[my_thread_index] = my_fifo_events; + +skip_dequeue: + + for (i = 0; i < n_to_dequeue; i++) + { + svm_fifo_t *f0; /* $$$ prefetch 1 ahead maybe */ + stream_session_t *s0; + u32 server_session_index0, server_thread_index0; + session_fifo_event_t *e0; + + e0 = &my_fifo_events[i]; + f0 = e0->fifo; + server_session_index0 = f0->server_session_index; + server_thread_index0 = f0->server_thread_index; + + /* $$$ add multiple event queues, per vpp worker thread */ + ASSERT (server_thread_index0 == my_thread_index); + + s0 = pool_elt_at_index (smm->sessions[my_thread_index], + server_session_index0); + + ASSERT (s0->thread_index == my_thread_index); + + switch (e0->event_type) + { + case FIFO_EVENT_SERVER_TX: + /* Spray packets in per session type frames, since they go to + * different nodes */ + rv = (smm->session_rx_fns[s0->session_type]) (vm, node, smm, e0, s0, + my_thread_index, + &n_tx_packets); + if (rv < 0) + goto done; + + break; + + default: + clib_warning ("unhandled event type %d", e0->event_type); + } + } + +done: + + /* Couldn't process all events. Probably out of buffers */ + if (PREDICT_FALSE (i < n_to_dequeue)) + { + session_fifo_event_t *partially_read = + smm->evts_partially_read[my_thread_index]; + vec_add (partially_read, &my_fifo_events[i], n_to_dequeue - i); + vec_free (my_fifo_events); + smm->fifo_events[my_thread_index] = partially_read; + smm->evts_partially_read[my_thread_index] = 0; + } + else + { + vec_free (smm->fifo_events[my_thread_index]); + smm->fifo_events[my_thread_index] = + smm->evts_partially_read[my_thread_index]; + smm->evts_partially_read[my_thread_index] = 0; + } + + vlib_node_increment_counter (vm, session_queue_node.index, + SESSION_QUEUE_ERROR_TX, n_tx_packets); + + return n_tx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (session_queue_node) = +{ + .function = session_queue_node_fn, + .name = "session-queue", + .format_trace = format_session_queue_trace, + .type = VLIB_NODE_TYPE_INPUT, + .n_errors = ARRAY_LEN (session_queue_error_strings), + .error_strings = session_queue_error_strings, + .n_next_nodes = SESSION_QUEUE_N_NEXT, + /* .state = VLIB_NODE_STATE_DISABLED, enable on-demand? */ + /* edit / add dispositions here */ + .next_nodes = + { + [SESSION_QUEUE_NEXT_DROP] = "error-drop", + [SESSION_QUEUE_NEXT_IP4_LOOKUP] = "ip4-lookup", + [SESSION_QUEUE_NEXT_IP6_LOOKUP] = "ip6-lookup", + [SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT] = "tcp4-output", + [SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT] = "tcp6-output", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api new file mode 100644 index 00000000..a7b28c1d --- /dev/null +++ b/src/vnet/session/session.api @@ -0,0 +1,429 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /** \brief Bind to a given URI + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param accept_cookie - sender accept cookie, to identify this bind flavor + @param uri - a URI, e.g. "tcp://0.0.0.0/0/80" [ipv4] + "tcp://::/0/80" [ipv6] etc. + @param options - socket options, fifo sizes, etc. +*/ +define bind_uri { + u32 client_index; + u32 context; + u32 accept_cookie; + u32 initial_segment_size; + u8 uri[128]; + u64 options[16]; +}; + +/** \brief Unbind a given URI + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param uri - a URI, e.g. "tcp://0.0.0.0/0/80" [ipv4] + "tcp://::/0/80" [ipv6], etc. + @param options - socket options, fifo sizes, etc. +*/ +define unbind_uri { + u32 client_index; + u32 context; + u8 uri[128]; +}; + +/** \brief Connect to a given URI + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param accept_cookie - sender accept cookie, to identify this bind flavor + @param uri - a URI, e.g. "tcp4://0.0.0.0/0/80" + "tcp6://::/0/80" [ipv6], etc. + @param options - socket options, fifo sizes, etc. +*/ +define connect_uri { + u32 client_index; + u32 context; + u8 uri[128]; + u64 client_queue_address; + u64 options[16]; +}; + +/** \brief Bind reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param event_queue_address - vpp event queue address or 0 if this + connection shouldn't send events + @param segment_name_length - length of segment name + @param segment_name - name of segment client needs to attach to +*/ +define bind_uri_reply { + u32 context; + i32 retval; + u64 server_event_queue_address; + u8 segment_name_length; + u32 segment_size; + u8 segment_name[128]; +}; + +/** \brief unbind reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define unbind_uri_reply { + u32 context; + i32 retval; +}; + +/** \brief vpp->client, connect reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param server_rx_fifo - rx (vpp -> vpp-client) fifo address + @param server_tx_fifo - tx (vpp-client -> vpp) fifo address + @param session_index - session index; + @param session_thread_index - session thread index + @param session_type - session thread type + @param vpp_event_queue_address - vpp's event queue address + @param client_event_queue_address - client's event queue address + @param segment_name_length - non-zero if the client needs to attach to + the fifo segment + @param segment_name - set if the client needs to attach to the segment +*/ +define connect_uri_reply { + u32 context; + i32 retval; + u64 server_rx_fifo; + u64 server_tx_fifo; + u32 session_index; + u32 session_thread_index; + u8 session_type; + u64 client_event_queue_address; + u64 vpp_event_queue_address; + u32 segment_size; + u8 segment_name_length; + u8 segment_name[128]; +}; + +/** \brief vpp->client, please map an additional shared memory segment + @param context - sender context, to match reply w/ request + @param segment_name - +*/ +define map_another_segment { + u32 client_index; + u32 context; + u32 segment_size; + u8 segment_name[128]; +}; + +/** \brief client->vpp + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define map_another_segment_reply { + u32 context; + i32 retval; +}; + +/** \brief vpp->client, accept this session + @param context - sender context, to match reply w/ request + @param accept_cookie - tells client which bind flavor just occurred + @param rx_fifo_address - rx (vpp -> vpp-client) fifo address + @param tx_fifo_address - tx (vpp-client -> vpp) fifo address + @param session_index - index of new session + @param session_thread_index - thread index of new session + @param vpp_event_queue_address - vpp's event queue address + @param session_type - type of session + +*/ +define accept_session { + u32 client_index; + u32 context; + u32 accept_cookie; + u64 server_rx_fifo; + u64 server_tx_fifo; + u32 session_index; + u32 session_thread_index; + u64 vpp_event_queue_address; + u8 session_type; +}; + +/** \brief client->vpp, reply to an accept message + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param session_index - session index from accept_session / connect_reply + @param session_thread_index - thread index from accept_session / + connect_reply +*/ +define accept_session_reply { + u32 context; + i32 retval; + u8 session_type; + u8 session_thread_index; + u32 session_index; +}; + +/** \brief bidirectional disconnect API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param session_index - cookie #1 from accept_session / connect_reply + @param session_thread_index - cookie #2 +*/ +define disconnect_session { + u32 client_index; + u32 context; + u32 session_index; + u32 session_thread_index; +}; + +/** \brief bidirectional disconnect reply API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param session_index - session index from accept_session / connect_reply + @param session_thread_index - thread index from accept_session / + connect_reply +*/ +define disconnect_session_reply { + u32 client_index; + u32 context; + i32 retval; + u32 session_index; + u32 session_thread_index; +}; + +/** \brief vpp->client reset session API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param session_index - session index from accept_session / connect_reply + @param session_thread_index - thread index from accept_session / + connect_reply +*/ +define reset_session { + u32 client_index; + u32 context; + u32 session_index; + u32 session_thread_index; +}; + +/** \brief client->vpp reset session reply + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param session_index - session index from accept_session / connect_reply + @param session_thread_index - thread index from accept_session / + connect_reply +*/ +define reset_session_reply { + u32 client_index; + u32 context; + i32 retval; + u32 session_index; + u32 session_thread_index; +}; + +/** \brief Bind to an ip:port pair for a given transport protocol + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf - bind namespace + @param is_ip4 - flag that is 1 if ip address family is IPv4 + @param ip - ip address + @param port - port + @param proto - protocol 0 - TCP 1 - UDP + @param options - socket options, fifo sizes, etc. +*/ +define bind_sock { + u32 client_index; + u32 context; + u32 vrf; + u8 is_ip4; + u8 ip[16]; + u16 port; + u8 proto; + u64 options[16]; +}; + +/** \brief Unbind + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param handle - bind handle obtained from bind reply +*/ +define unbind_sock { + u32 client_index; + u32 context; + u64 handle; +}; + +/** \brief Connect to a remote peer + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf - connection namespace + @param is_ip4 - flag that is 1 if ip address family is IPv4 + @param ip - ip address + @param port - port + @param proto - protocol 0 - TCP 1 - UDP + @param client_queue_address - client's API queue address. Non-zero when + used to perform redirects + @param options - socket options, fifo sizes, etc. +*/ +define connect_sock { + u32 client_index; + u32 context; + u32 vrf; + u8 is_ip4; + u8 ip[16]; + u16 port; + u8 proto; + u64 client_queue_address; + u64 options[16]; +}; + +/** \brief Bind reply + @param context - sender context, to match reply w/ request + @param handle - bind handle + @param retval - return code for the request + @param event_queue_address - vpp event queue address or 0 if this + connection shouldn't send events + @param segment_name_length - length of segment name + @param segment_name - name of segment client needs to attach to +*/ +define bind_sock_reply { + u32 context; + u64 handle; + i32 retval; + u64 server_event_queue_address; + u32 segment_size; + u8 segment_name_length; + u8 segment_name[128]; +}; + +/** \brief unbind reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define unbind_sock_reply { + u32 context; + i32 retval; +}; + +/** \brief vpp/server->client, connect reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param handle - connection handle + @param server_rx_fifo - rx (vpp -> vpp-client) fifo address + @param server_tx_fifo - tx (vpp-client -> vpp) fifo address + @param vpp_event_queue_address - vpp's event queue address + @param client_event_queue_address - client's event queue address + @param segment_name_length - non-zero if the client needs to attach to + the fifo segment + @param segment_name - set if the client needs to attach to the segment +*/ +define connect_sock_reply { + u32 context; + i32 retval; + u64 handle; + u64 server_rx_fifo; + u64 server_tx_fifo; + u64 client_event_queue_address; + u64 vpp_event_queue_address; + u32 segment_size; + u8 segment_name_length; + u8 segment_name[128]; +}; + +/** \brief bidirectional disconnect API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param handle - session handle obtained through accept/connect +*/ +define disconnect_sock { + u32 client_index; + u32 context; + u64 handle; +}; + +/** \brief bidirectional disconnect reply API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param client_context - sender context, to match reply w/ request + @param handle - session handle obtained through accept/connect +*/ +define disconnect_sock_reply { + u32 client_index; + u32 context; + i32 retval; + u64 handle; +}; + +/** \brief vpp->client, accept this session + @param context - sender context, to match reply w/ request + @param accept_cookie - tells client which bind flavor just occurred + @param handle - session handle obtained through accept/connect + @param rx_fifo_address - rx (vpp -> vpp-client) fifo address + @param tx_fifo_address - tx (vpp-client -> vpp) fifo address + @param vpp_event_queue_address - vpp's event queue address +*/ +define accept_sock { + u32 client_index; + u32 context; + u32 accept_cookie; + u64 handle; + u64 server_rx_fifo; + u64 server_tx_fifo; + u64 vpp_event_queue_address; +}; + +/** \brief client->vpp, reply to an accept message + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param handle - session handle obtained through accept/connect +*/ +define accept_sock_reply { + u32 context; + i32 retval; + u64 handle; +}; + +/** \brief vpp->client reset session API + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param handle - session handle obtained through accept/connect +*/ +define reset_sock { + u32 client_index; + u32 context; + u64 handle; +}; + +/** \brief client->vpp reset session reply + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param handle - session handle obtained through accept/connect +*/ +define reset_sock_reply { + u32 client_index; + u32 context; + i32 retval; + u64 handle; +}; +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ \ No newline at end of file diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c new file mode 100644 index 00000000..539da613 --- /dev/null +++ b/src/vnet/session/session.c @@ -0,0 +1,1286 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief Session and session manager + */ + +#include +#include +#include +#include +#include + +/** + * Per-type vector of transport protocol virtual function tables + */ +static transport_proto_vft_t *tp_vfts; + +session_manager_main_t session_manager_main; + +/* + * Session lookup key; (src-ip, dst-ip, src-port, dst-port, session-type) + * Value: (owner thread index << 32 | session_index); + */ +static void +stream_session_table_add_for_tc (u8 sst, transport_connection_t * tc, + u64 value) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + kv4.value = value; + clib_bihash_add_del_16_8 (&smm->v4_session_hash, &kv4, 1 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + kv6.value = value; + clib_bihash_add_del_48_8 (&smm->v6_session_hash, &kv6, 1 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } +} + +void +stream_session_table_add (session_manager_main_t * smm, stream_session_t * s, + u64 value) +{ + transport_connection_t *tc; + + tc = tp_vfts[s->session_type].get_connection (s->connection_index, + s->thread_index); + stream_session_table_add_for_tc (s->session_type, tc, value); +} + +static void +stream_session_half_open_table_add (u8 sst, transport_connection_t * tc, + u64 value) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + kv4.value = value; + clib_bihash_add_del_16_8 (&smm->v4_half_open_hash, &kv4, + 1 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + kv6.value = value; + clib_bihash_add_del_48_8 (&smm->v6_half_open_hash, &kv6, + 1 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } +} + +static int +stream_session_table_del_for_tc (session_manager_main_t * smm, u8 sst, + transport_connection_t * tc) +{ + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + return clib_bihash_add_del_16_8 (&smm->v4_session_hash, &kv4, + 0 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + return clib_bihash_add_del_48_8 (&smm->v6_session_hash, &kv6, + 0 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } + + return 0; +} + +static int +stream_session_table_del (session_manager_main_t * smm, stream_session_t * s) +{ + transport_connection_t *ts; + + ts = tp_vfts[s->session_type].get_connection (s->connection_index, + s->thread_index); + return stream_session_table_del_for_tc (smm, s->session_type, ts); +} + +static void +stream_session_half_open_table_del (session_manager_main_t * smm, u8 sst, + transport_connection_t * tc) +{ + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + clib_bihash_add_del_16_8 (&smm->v4_half_open_hash, &kv4, + 0 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + clib_bihash_add_del_48_8 (&smm->v6_half_open_hash, &kv6, + 0 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } +} + +stream_session_t * +stream_session_lookup_listener4 (ip4_address_t * lcl, u16 lcl_port, u8 proto) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv4_t kv4; + int rv; + + make_v4_listener_kv (&kv4, lcl, lcl_port, proto); + rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); + if (rv == 0) + return pool_elt_at_index (smm->listen_sessions[proto], (u32) kv4.value); + + /* Zero out the lcl ip */ + kv4.key[0] = 0; + rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); + if (rv == 0) + return pool_elt_at_index (smm->listen_sessions[proto], kv4.value); + + return 0; +} + +/** Looks up a session based on the 5-tuple passed as argument. + * + * First it tries to find an established session, if this fails, it tries + * finding a listener session if this fails, it tries a lookup with a + * wildcarded local source (listener bound to all interfaces) + */ +stream_session_t * +stream_session_lookup4 (ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv4_t kv4; + int rv; + + /* Lookup session amongst established ones */ + make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); + if (rv == 0) + return stream_session_get_tsi (kv4.value, my_thread_index); + + /* If nothing is found, check if any listener is available */ + return stream_session_lookup_listener4 (lcl, lcl_port, proto); +} + +stream_session_t * +stream_session_lookup_listener6 (ip6_address_t * lcl, u16 lcl_port, u8 proto) +{ + session_manager_main_t *smm = &session_manager_main; + session_kv6_t kv6; + int rv; + + make_v6_listener_kv (&kv6, lcl, lcl_port, proto); + rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); + if (rv == 0) + return pool_elt_at_index (smm->listen_sessions[proto], kv6.value); + + /* Zero out the lcl ip */ + kv6.key[0] = kv6.key[1] = 0; + rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); + if (rv == 0) + return pool_elt_at_index (smm->listen_sessions[proto], kv6.value); + + return 0; +} + +/* Looks up a session based on the 5-tuple passed as argument. + * First it tries to find an established session, if this fails, it tries + * finding a listener session if this fails, it tries a lookup with a + * wildcarded local source (listener bound to all interfaces) */ +stream_session_t * +stream_session_lookup6 (ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + session_kv6_t kv6; + int rv; + + make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); + if (rv == 0) + return stream_session_get_tsi (kv6.value, my_thread_index); + + /* If nothing is found, check if any listener is available */ + return stream_session_lookup_listener6 (lcl, lcl_port, proto); +} + +stream_session_t * +stream_session_lookup_listener (ip46_address_t * lcl, u16 lcl_port, u8 proto) +{ + switch (proto) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + return stream_session_lookup_listener4 (&lcl->ip4, lcl_port, proto); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + return stream_session_lookup_listener6 (&lcl->ip6, lcl_port, proto); + break; + } + return 0; +} + +static u64 +stream_session_half_open_lookup (session_manager_main_t * smm, + ip46_address_t * lcl, ip46_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + session_kv4_t kv4; + session_kv6_t kv6; + int rv; + + switch (proto) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv (&kv4, &lcl->ip4, &rmt->ip4, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4); + + if (rv == 0) + return kv4.value; + + return (u64) ~ 0; + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv (&kv6, &lcl->ip6, &rmt->ip6, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6); + + if (rv == 0) + return kv6.value; + + return (u64) ~ 0; + break; + } + return 0; +} + +transport_connection_t * +stream_session_lookup_transport4 (session_manager_main_t * smm, + ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + session_kv4_t kv4; + stream_session_t *s; + int rv; + + /* Lookup session amongst established ones */ + make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); + if (rv == 0) + { + s = stream_session_get_tsi (kv4.value, my_thread_index); + + return tp_vfts[s->session_type].get_connection (s->connection_index, + my_thread_index); + } + + /* If nothing is found, check if any listener is available */ + s = stream_session_lookup_listener4 (lcl, lcl_port, proto); + if (s) + return tp_vfts[s->session_type].get_listener (s->connection_index); + + /* Finally, try half-open connections */ + rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4); + if (rv == 0) + return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF); + + return 0; +} + +transport_connection_t * +stream_session_lookup_transport6 (session_manager_main_t * smm, + ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + stream_session_t *s; + session_kv6_t kv6; + int rv; + + make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); + if (rv == 0) + { + s = stream_session_get_tsi (kv6.value, my_thread_index); + + return tp_vfts[s->session_type].get_connection (s->connection_index, + my_thread_index); + } + + /* If nothing is found, check if any listener is available */ + s = stream_session_lookup_listener6 (lcl, lcl_port, proto); + if (s) + return tp_vfts[s->session_type].get_listener (s->connection_index); + + /* Finally, try half-open connections */ + rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6); + if (rv == 0) + return tp_vfts[s->session_type].get_half_open (kv6.value & 0xFFFFFFFF); + + return 0; +} + +/** + * Allocate vpp event queue (once) per worker thread + */ +void +vpp_session_event_queue_allocate (session_manager_main_t * smm, + u32 thread_index) +{ + api_main_t *am = &api_main; + void *oldheap; + + if (smm->vpp_event_queues[thread_index] == 0) + { + /* Allocate event fifo in the /vpe-api shared-memory segment */ + oldheap = svm_push_data_heap (am->vlib_rp); + + smm->vpp_event_queues[thread_index] = + unix_shared_memory_queue_init (2048 /* nels $$$$ config */ , + sizeof (session_fifo_event_t), + 0 /* consumer pid */ , + 0 + /* (do not) send signal when queue non-empty */ + ); + + svm_pop_heap (oldheap); + } +} + +void +session_manager_get_segment_info (u32 index, u8 ** name, u32 * size) +{ + svm_fifo_segment_private_t *s; + s = svm_fifo_get_segment (index); + *name = s->h->segment_name; + *size = s->ssvm.ssvm_size; +} + +always_inline int +session_manager_add_segment_i (session_manager_main_t * smm, + session_manager_t * sm, + u32 segment_size, u8 * segment_name) +{ + svm_fifo_segment_create_args_t _ca, *ca = &_ca; + int rv; + + memset (ca, 0, sizeof (*ca)); + + ca->segment_name = (char *) segment_name; + ca->segment_size = segment_size; + + rv = svm_fifo_segment_create (ca); + if (rv) + { + clib_warning ("svm_fifo_segment_create ('%s', %d) failed", + ca->segment_name, ca->segment_size); + vec_free (segment_name); + return -1; + } + + vec_add1 (sm->segment_indices, ca->new_segment_index); + + return 0; +} + +static int +session_manager_add_segment (session_manager_main_t * smm, + session_manager_t * sm) +{ + u8 *segment_name; + svm_fifo_segment_create_args_t _ca, *ca = &_ca; + u32 add_segment_size; + u32 default_segment_size = 128 << 10; + + memset (ca, 0, sizeof (*ca)); + segment_name = format (0, "%d-%d%c", getpid (), + smm->unique_segment_name_counter++, 0); + add_segment_size = + sm->add_segment_size ? sm->add_segment_size : default_segment_size; + + return session_manager_add_segment_i (smm, sm, add_segment_size, + segment_name); +} + +int +session_manager_add_first_segment (session_manager_main_t * smm, + session_manager_t * sm, u32 segment_size, + u8 ** segment_name) +{ + svm_fifo_segment_create_args_t _ca, *ca = &_ca; + memset (ca, 0, sizeof (*ca)); + *segment_name = format (0, "%d-%d%c", getpid (), + smm->unique_segment_name_counter++, 0); + return session_manager_add_segment_i (smm, sm, segment_size, *segment_name); +} + +void +session_manager_del (session_manager_main_t * smm, session_manager_t * sm) +{ + u32 *deleted_sessions = 0; + u32 *deleted_thread_indices = 0; + int i, j; + + /* Across all fifo segments used by the server */ + for (j = 0; j < vec_len (sm->segment_indices); j++) + { + svm_fifo_segment_private_t *fifo_segment; + svm_fifo_t **fifos; + /* Vector of fifos allocated in the segment */ + fifo_segment = svm_fifo_get_segment (sm->segment_indices[j]); + fifos = (svm_fifo_t **) fifo_segment->h->fifos; + + /* + * Remove any residual sessions from the session lookup table + * Don't bother deleting the individual fifos, we're going to + * throw away the fifo segment in a minute. + */ + for (i = 0; i < vec_len (fifos); i++) + { + svm_fifo_t *fifo; + u32 session_index, thread_index; + stream_session_t *session; + + fifo = fifos[i]; + session_index = fifo->server_session_index; + thread_index = fifo->server_thread_index; + + session = pool_elt_at_index (smm->sessions[thread_index], + session_index); + + /* Add to the deleted_sessions vector (once!) */ + if (!session->is_deleted) + { + session->is_deleted = 1; + vec_add1 (deleted_sessions, + session - smm->sessions[thread_index]); + vec_add1 (deleted_thread_indices, thread_index); + } + } + + for (i = 0; i < vec_len (deleted_sessions); i++) + { + stream_session_t *session; + + session = + pool_elt_at_index (smm->sessions[deleted_thread_indices[i]], + deleted_sessions[i]); + + /* Instead of directly removing the session call disconnect */ + stream_session_disconnect (session); + + /* + stream_session_table_del (smm, session); + pool_put(smm->sessions[deleted_thread_indices[i]], session); + */ + } + + vec_reset_length (deleted_sessions); + vec_reset_length (deleted_thread_indices); + + /* Instead of removing the segment, test when removing the session if + * the segment can be removed + */ + /* svm_fifo_segment_delete (fifo_segment); */ + } + + vec_free (deleted_sessions); + vec_free (deleted_thread_indices); +} + +int +session_manager_allocate_session_fifos (session_manager_main_t * smm, + session_manager_t * sm, + svm_fifo_t ** server_rx_fifo, + svm_fifo_t ** server_tx_fifo, + u32 * fifo_segment_index, + u8 * added_a_segment) +{ + svm_fifo_segment_private_t *fifo_segment; + u32 fifo_size, default_fifo_size = 8192 /* TODO config */ ; + int i; + + *added_a_segment = 0; + + /* Allocate svm fifos */ + ASSERT (vec_len (sm->segment_indices)); + +again: + for (i = 0; i < vec_len (sm->segment_indices); i++) + { + *fifo_segment_index = sm->segment_indices[i]; + fifo_segment = svm_fifo_get_segment (*fifo_segment_index); + + fifo_size = sm->rx_fifo_size; + fifo_size = (fifo_size == 0) ? default_fifo_size : fifo_size; + *server_rx_fifo = svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size); + + fifo_size = sm->tx_fifo_size; + fifo_size = (fifo_size == 0) ? default_fifo_size : fifo_size; + *server_tx_fifo = svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size); + + if (*server_rx_fifo == 0) + { + /* This would be very odd, but handle it... */ + if (*server_tx_fifo != 0) + { + svm_fifo_segment_free_fifo (fifo_segment, *server_tx_fifo); + *server_tx_fifo = 0; + } + continue; + } + if (*server_tx_fifo == 0) + { + if (*server_rx_fifo != 0) + { + svm_fifo_segment_free_fifo (fifo_segment, *server_rx_fifo); + *server_rx_fifo = 0; + } + continue; + } + break; + } + + /* See if we're supposed to create another segment */ + if (*server_rx_fifo == 0) + { + if (sm->add_segment) + { + if (*added_a_segment) + { + clib_warning ("added a segment, still cant allocate a fifo"); + return SESSION_ERROR_NEW_SEG_NO_SPACE; + } + + if (session_manager_add_segment (smm, sm)) + return VNET_API_ERROR_URI_FIFO_CREATE_FAILED; + + *added_a_segment = 1; + goto again; + } + else + return SESSION_ERROR_NO_SPACE; + } + return 0; +} + +int +stream_session_create_i (session_manager_main_t * smm, application_t * app, + transport_connection_t * tc, + stream_session_t ** ret_s) +{ + int rv; + svm_fifo_t *server_rx_fifo = 0, *server_tx_fifo = 0; + u32 fifo_segment_index; + u32 pool_index, seg_size; + stream_session_t *s; + u64 value; + u32 thread_index = tc->thread_index; + session_manager_t *sm; + u8 segment_added; + u8 *seg_name; + + sm = session_manager_get (app->session_manager_index); + + /* Check the API queue */ + if (app->mode == APP_SERVER && application_api_queue_is_full (app)) + return SESSION_ERROR_API_QUEUE_FULL; + + if ((rv = session_manager_allocate_session_fifos (smm, sm, &server_rx_fifo, + &server_tx_fifo, + &fifo_segment_index, + &segment_added))) + return rv; + + if (segment_added && app->mode == APP_SERVER) + { + /* Send an API message to the external server, to map new segment */ + ASSERT (app->cb_fns.add_segment_callback); + + session_manager_get_segment_info (fifo_segment_index, &seg_name, + &seg_size); + if (app->cb_fns.add_segment_callback (app->api_client_index, seg_name, + seg_size)) + return VNET_API_ERROR_URI_FIFO_CREATE_FAILED; + } + + /* Create the session */ + pool_get (smm->sessions[thread_index], s); + memset (s, 0, sizeof (*s)); + + /* Initialize backpointers */ + pool_index = s - smm->sessions[thread_index]; + server_rx_fifo->server_session_index = pool_index; + server_rx_fifo->server_thread_index = thread_index; + + server_tx_fifo->server_session_index = pool_index; + server_tx_fifo->server_thread_index = thread_index; + + s->server_rx_fifo = server_rx_fifo; + s->server_tx_fifo = server_tx_fifo; + + /* Initialize state machine, such as it is... */ + s->session_type = app->session_type; + s->session_state = SESSION_STATE_CONNECTING; + s->app_index = application_get_index (app); + s->server_segment_index = fifo_segment_index; + s->thread_index = thread_index; + s->session_index = pool_index; + + /* Attach transport to session */ + s->connection_index = tc->c_index; + + /* Attach session to transport */ + tc->s_index = s->session_index; + + /* Add to the main lookup table */ + value = (((u64) thread_index) << 32) | (u64) s->session_index; + stream_session_table_add_for_tc (app->session_type, tc, value); + + *ret_s = s; + + return 0; +} + +/* + * Enqueue data for delivery to session peer. Does not notify peer of enqueue + * event but on request can queue notification events for later delivery by + * calling stream_server_flush_enqueue_events(). + * + * @param tc Transport connection which is to be enqueued data + * @param data Data to be enqueued + * @param len Length of data to be enqueued + * @param queue_event Flag to indicate if peer is to be notified or if event + * is to be queued. The former is useful when more data is + * enqueued and only one event is to be generated. + * @return Number of bytes enqueued or a negative value if enqueueing failed. + */ +int +stream_session_enqueue_data (transport_connection_t * tc, u8 * data, u16 len, + u8 queue_event) +{ + stream_session_t *s; + int enqueued; + + s = stream_session_get (tc->s_index, tc->thread_index); + + /* Make sure there's enough space left. We might've filled the pipes */ + if (PREDICT_FALSE (len > svm_fifo_max_enqueue (s->server_rx_fifo))) + return -1; + + enqueued = svm_fifo_enqueue_nowait (s->server_rx_fifo, s->pid, len, data); + + if (queue_event) + { + /* Queue RX event on this fifo. Eventually these will need to be flushed + * by calling stream_server_flush_enqueue_events () */ + session_manager_main_t *smm = vnet_get_session_manager_main (); + u32 thread_index = s->thread_index; + u32 my_enqueue_epoch = smm->current_enqueue_epoch[thread_index]; + + if (s->enqueue_epoch != my_enqueue_epoch) + { + s->enqueue_epoch = my_enqueue_epoch; + vec_add1 (smm->session_indices_to_enqueue_by_thread[thread_index], + s - smm->sessions[thread_index]); + } + } + + return enqueued; +} + +/** Check if we have space in rx fifo to push more bytes */ +u8 +stream_session_no_space (transport_connection_t * tc, u32 thread_index, + u16 data_len) +{ + stream_session_t *s = stream_session_get (tc->c_index, thread_index); + + if (PREDICT_FALSE (s->session_state != SESSION_STATE_READY)) + return 1; + + if (data_len > svm_fifo_max_enqueue (s->server_rx_fifo)) + return 1; + + return 0; +} + +u32 +stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer, + u32 offset, u32 max_bytes) +{ + stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index); + return svm_fifo_peek (s->server_tx_fifo, s->pid, offset, max_bytes, buffer); +} + +u32 +stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes) +{ + stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index); + return svm_fifo_dequeue_drop (s->server_tx_fifo, s->pid, max_bytes); +} + +/** + * Notify session peer that new data has been enqueued. + * + * @param s Stream session for which the event is to be generated. + * @param block Flag to indicate if call should block if event queue is full. + * + * @return 0 on succes or negative number if failed to send notification. + */ +static int +stream_session_enqueue_notify (stream_session_t * s, u8 block) +{ + application_t *app; + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + static u32 serial_number; + + if (PREDICT_FALSE (s->session_state == SESSION_STATE_CLOSED)) + return 0; + + /* Get session's server */ + app = application_get (s->app_index); + + /* Fabricate event */ + evt.fifo = s->server_rx_fifo; + evt.event_type = FIFO_EVENT_SERVER_RX; + evt.event_id = serial_number++; + evt.enqueue_length = svm_fifo_max_dequeue (s->server_rx_fifo); + + /* Add event to server's event queue */ + q = app->event_queue; + + /* Based on request block (or not) for lack of space */ + if (block || PREDICT_TRUE (q->cursize < q->maxsize)) + unix_shared_memory_queue_add (app->event_queue, (u8 *) & evt, + 0 /* do wait for mutex */ ); + else + return -1; + + if (1) + { + ELOG_TYPE_DECLARE (e) = + { + .format = "evt-enqueue: id %d length %d",.format_args = "i4i4",}; + struct + { + u32 data[2]; + } *ed; + ed = ELOG_DATA (&vlib_global_main.elog_main, e); + ed->data[0] = evt.event_id; + ed->data[1] = evt.enqueue_length; + } + + return 0; +} + +/** + * Flushes queue of sessions that are to be notified of new data + * enqueued events. + * + * @param thread_index Thread index for which the flush is to be performed. + * @return 0 on success or a positive number indicating the number of + * failures due to API queue being full. + */ +int +session_manager_flush_enqueue_events (u32 thread_index) +{ + session_manager_main_t *smm = &session_manager_main; + u32 *session_indices_to_enqueue; + int i, errors = 0; + + session_indices_to_enqueue = + smm->session_indices_to_enqueue_by_thread[thread_index]; + + for (i = 0; i < vec_len (session_indices_to_enqueue); i++) + { + stream_session_t *s0; + + /* Get session */ + s0 = stream_session_get (session_indices_to_enqueue[i], thread_index); + if (stream_session_enqueue_notify (s0, 0 /* don't block */ )) + { + errors++; + } + } + + vec_reset_length (session_indices_to_enqueue); + + smm->session_indices_to_enqueue_by_thread[thread_index] = + session_indices_to_enqueue; + + /* Increment enqueue epoch for next round */ + smm->current_enqueue_epoch[thread_index]++; + + return errors; +} + +/* + * Start listening on server's ip/port pair for requested transport. + * + * Creates a 'dummy' stream session with state LISTENING to be used in session + * lookups, prior to establishing connection. Requests transport to build + * it's own specific listening connection. + */ +int +stream_session_start_listen (u32 server_index, ip46_address_t * ip, u16 port) +{ + session_manager_main_t *smm = &session_manager_main; + stream_session_t *s; + transport_connection_t *tc; + application_t *srv; + u32 tci; + + srv = application_get (server_index); + + pool_get (smm->listen_sessions[srv->session_type], s); + memset (s, 0, sizeof (*s)); + + s->session_type = srv->session_type; + s->session_state = SESSION_STATE_LISTENING; + s->session_index = s - smm->listen_sessions[srv->session_type]; + s->app_index = srv->index; + + /* Transport bind/listen */ + tci = tp_vfts[srv->session_type].bind (smm->vlib_main, s->session_index, ip, + port); + + /* Attach transport to session */ + s->connection_index = tci; + tc = tp_vfts[srv->session_type].get_listener (tci); + + srv->session_index = s->session_index; + + /* Add to the main lookup table */ + stream_session_table_add_for_tc (s->session_type, tc, s->session_index); + + return 0; +} + +void +stream_session_stop_listen (u32 server_index) +{ + session_manager_main_t *smm = &session_manager_main; + stream_session_t *listener; + transport_connection_t *tc; + application_t *srv; + + srv = application_get (server_index); + listener = pool_elt_at_index (smm->listen_sessions[srv->session_type], + srv->session_index); + + tc = tp_vfts[srv->session_type].get_listener (listener->connection_index); + stream_session_table_del_for_tc (smm, listener->session_type, tc); + + tp_vfts[srv->session_type].unbind (smm->vlib_main, + listener->connection_index); + pool_put (smm->listen_sessions[srv->session_type], listener); +} + +int +connect_server_add_segment_cb (application_t * ss, char *segment_name, + u32 segment_size) +{ + /* Does exactly nothing, but die */ + ASSERT (0); + return 0; +} + +void +connects_session_manager_init (session_manager_main_t * smm, u8 session_type) +{ + session_manager_t *sm; + u32 connect_fifo_size = 8 << 10; /* Config? */ + u32 default_segment_size = 1 << 20; + + pool_get (smm->session_managers, sm); + memset (sm, 0, sizeof (*sm)); + + sm->add_segment_size = default_segment_size; + sm->rx_fifo_size = connect_fifo_size; + sm->tx_fifo_size = connect_fifo_size; + sm->add_segment = 1; + + session_manager_add_segment (smm, sm); + smm->connect_manager_index[session_type] = sm - smm->session_managers; +} + +void +stream_session_connect_notify (transport_connection_t * tc, u8 sst, + u8 is_fail) +{ + session_manager_main_t *smm = &session_manager_main; + application_t *app; + stream_session_t *new_s = 0; + u64 value; + + value = stream_session_half_open_lookup (smm, &tc->lcl_ip, &tc->rmt_ip, + tc->lcl_port, tc->rmt_port, + tc->proto); + if (value == HALF_OPEN_LOOKUP_INVALID_VALUE) + { + clib_warning ("This can't be good!"); + return; + } + + app = application_get (value >> 32); + + if (!is_fail) + { + /* Create new session (server segments are allocated if needed) */ + if (stream_session_create_i (smm, app, tc, &new_s)) + return; + + app->session_index = stream_session_get_index (new_s); + app->thread_index = new_s->thread_index; + + /* Allocate vpp event queue for this thread if needed */ + vpp_session_event_queue_allocate (smm, tc->thread_index); + } + + /* Notify client */ + app->cb_fns.session_connected_callback (app->api_client_index, new_s, + is_fail); + + /* Cleanup session lookup */ + stream_session_half_open_table_del (smm, sst, tc); +} + +void +stream_session_accept_notify (transport_connection_t * tc) +{ + application_t *server; + stream_session_t *s; + + s = stream_session_get (tc->s_index, tc->thread_index); + server = application_get (s->app_index); + server->cb_fns.session_accept_callback (s); +} + +/** + * Notification from transport that connection is being closed. + * + * A disconnect is sent to application but state is not removed. Once + * disconnect is acknowledged by application, session disconnect is called. + * Ultimately this leads to close being called on transport (passive close). + */ +void +stream_session_disconnect_notify (transport_connection_t * tc) +{ + application_t *server; + stream_session_t *s; + + s = stream_session_get (tc->s_index, tc->thread_index); + server = application_get (s->app_index); + server->cb_fns.session_disconnect_callback (s); +} + +/** + * Cleans up session and associated app if needed. + */ +void +stream_session_delete (stream_session_t * s) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + svm_fifo_segment_private_t *fifo_segment; + application_t *app; + int rv; + + /* delete from the main lookup table */ + rv = stream_session_table_del (smm, s); + + if (rv) + clib_warning ("hash delete error, rv %d", rv); + + /* Cleanup fifo segments */ + fifo_segment = svm_fifo_get_segment (s->server_segment_index); + svm_fifo_segment_free_fifo (fifo_segment, s->server_rx_fifo); + svm_fifo_segment_free_fifo (fifo_segment, s->server_tx_fifo); + + /* Cleanup app if client */ + app = application_get (s->app_index); + if (app->mode == APP_CLIENT) + { + application_del (app); + } + else if (app->mode == APP_SERVER) + { + session_manager_t *sm; + svm_fifo_segment_private_t *fifo_segment; + svm_fifo_t **fifos; + u32 fifo_index; + + sm = session_manager_get (app->session_manager_index); + + /* Delete fifo */ + fifo_segment = svm_fifo_get_segment (s->server_segment_index); + fifos = (svm_fifo_t **) fifo_segment->h->fifos; + + fifo_index = svm_fifo_segment_index (fifo_segment); + + /* Remove segment only if it holds no fifos and not the first */ + if (sm->segment_indices[0] != fifo_index && vec_len (fifos) == 0) + svm_fifo_segment_delete (fifo_segment); + } + + pool_put (smm->sessions[s->thread_index], s); +} + +/** + * Notification from transport that connection is being deleted + * + * This should be called only on previously fully established sessions. For + * instance failed connects should call stream_session_connect_notify and + * indicate that the connect has failed. + */ +void +stream_session_delete_notify (transport_connection_t * tc) +{ + stream_session_t *s; + + s = stream_session_get_if_valid (tc->s_index, tc->thread_index); + if (!s) + { + clib_warning ("Surprised!"); + return; + } + stream_session_delete (s); +} + +/** + * Notify application that connection has been reset. + */ +void +stream_session_reset_notify (transport_connection_t * tc) +{ + stream_session_t *s; + application_t *app; + s = stream_session_get (tc->s_index, tc->thread_index); + + app = application_get (s->app_index); + app->cb_fns.session_reset_callback (s); +} + +/** + * Accept a stream session. Optionally ping the server by callback. + */ +int +stream_session_accept (transport_connection_t * tc, u32 listener_index, + u8 sst, u8 notify) +{ + session_manager_main_t *smm = &session_manager_main; + application_t *server; + stream_session_t *s, *listener; + + int rv; + + /* Find the server */ + listener = pool_elt_at_index (smm->listen_sessions[sst], listener_index); + server = application_get (listener->app_index); + + if ((rv = stream_session_create_i (smm, server, tc, &s))) + return rv; + + /* Allocate vpp event queue for this thread if needed */ + vpp_session_event_queue_allocate (smm, tc->thread_index); + + /* Shoulder-tap the server */ + if (notify) + { + server->cb_fns.session_accept_callback (s); + } + + return 0; +} + +void +stream_session_open (u8 sst, ip46_address_t * addr, u16 port_host_byte_order, + u32 app_index) +{ + transport_connection_t *tc; + u32 tci; + u64 value; + + /* Ask transport to open connection */ + tci = tp_vfts[sst].open (addr, port_host_byte_order); + + /* Get transport connection */ + tc = tp_vfts[sst].get_half_open (tci); + + /* Store api_client_index and transport connection index */ + value = (((u64) app_index) << 32) | (u64) tc->c_index; + + /* Add to the half-open lookup table */ + stream_session_half_open_table_add (sst, tc, value); +} + +/** + * Disconnect session and propagate to transport. This should eventually + * result in a delete notification that allows us to cleanup session state. + * Called for both active/passive disconnects. + */ +void +stream_session_disconnect (stream_session_t * s) +{ + tp_vfts[s->session_type].close (s->connection_index, s->thread_index); + s->session_state = SESSION_STATE_CLOSED; +} + +/** + * Cleanup transport and session state. + */ +void +stream_session_cleanup (stream_session_t * s) +{ + tp_vfts[s->session_type].cleanup (s->connection_index, s->thread_index); + stream_session_delete (s); +} + +void +session_register_transport (u8 type, const transport_proto_vft_t * vft) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + + vec_validate (tp_vfts, type); + tp_vfts[type] = *vft; + + /* If an offset function is provided, then peek instead of dequeue */ + smm->session_rx_fns[type] = + (vft->rx_fifo_offset) ? session_fifo_rx_peek : session_fifo_rx_dequeue; +} + +transport_proto_vft_t * +session_get_transport_vft (u8 type) +{ + if (type >= vec_len (tp_vfts)) + return 0; + return &tp_vfts[type]; +} + +static clib_error_t * +session_manager_main_init (vlib_main_t * vm) +{ + u32 num_threads; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + session_manager_main_t *smm = &session_manager_main; + int i; + + smm->vlib_main = vm; + smm->vnet_main = vnet_get_main (); + + num_threads = 1 /* main thread */ + vtm->n_threads; + + if (num_threads < 1) + return clib_error_return (0, "n_thread_stacks not set"); + + /* $$$ config parameters */ + svm_fifo_segment_init (0x200000000ULL /* first segment base VA */ , + 20 /* timeout in seconds */ ); + + /* configure per-thread ** vectors */ + vec_validate (smm->sessions, num_threads - 1); + vec_validate (smm->session_indices_to_enqueue_by_thread, num_threads - 1); + vec_validate (smm->tx_buffers, num_threads - 1); + vec_validate (smm->fifo_events, num_threads - 1); + vec_validate (smm->evts_partially_read, num_threads - 1); + vec_validate (smm->current_enqueue_epoch, num_threads - 1); + vec_validate (smm->vpp_event_queues, num_threads - 1); + + /* $$$$ preallocate hack config parameter */ + for (i = 0; i < 200000; i++) + { + stream_session_t *ss; + pool_get (smm->sessions[0], ss); + memset (ss, 0, sizeof (*ss)); + } + + for (i = 0; i < 200000; i++) + pool_put_index (smm->sessions[0], i); + + clib_bihash_init_16_8 (&smm->v4_session_hash, "v4 session table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + clib_bihash_init_48_8 (&smm->v6_session_hash, "v6 session table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + + clib_bihash_init_16_8 (&smm->v4_half_open_hash, "v4 half-open table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + clib_bihash_init_48_8 (&smm->v6_half_open_hash, "v6 half-open table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + + for (i = 0; i < SESSION_N_TYPES; i++) + smm->connect_manager_index[i] = INVALID_INDEX; + + return 0; +} + +VLIB_INIT_FUNCTION (session_manager_main_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h new file mode 100644 index 00000000..cf14cca9 --- /dev/null +++ b/src/vnet/session/session.h @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_session_h__ +#define __included_session_h__ + +#include +#include +#include +#include +#include + +#define HALF_OPEN_LOOKUP_INVALID_VALUE ((u64)~0) +#define INVALID_INDEX ((u32)~0) + +/* TODO decide how much since we have pre-data as well */ +#define MAX_HDRS_LEN 100 /* Max number of bytes for headers */ + +typedef enum +{ + FIFO_EVENT_SERVER_RX, + FIFO_EVENT_SERVER_TX, + FIFO_EVENT_TIMEOUT, + FIFO_EVENT_SERVER_EXIT, +} fifo_event_type_t; + +#define foreach_session_input_error \ +_(NO_SESSION, "No session drops") \ +_(NO_LISTENER, "No listener for dst port drops") \ +_(ENQUEUED, "Packets pushed into rx fifo") \ +_(NOT_READY, "Session not ready packets") \ +_(FIFO_FULL, "Packets dropped for lack of rx fifo space") \ +_(EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") \ +_(API_QUEUE_FULL, "Sessions not created for lack of API queue space") \ +_(NEW_SEG_NO_SPACE, "Created segment, couldn't allocate a fifo pair") \ +_(NO_SPACE, "Couldn't allocate a fifo pair") + +typedef enum +{ +#define _(sym,str) SESSION_ERROR_##sym, + foreach_session_input_error +#undef _ + SESSION_N_ERROR, +} session_error_t; + +/* Event queue input node static next indices */ +typedef enum +{ + SESSION_QUEUE_NEXT_DROP, + SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT, + SESSION_QUEUE_NEXT_IP4_LOOKUP, + SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT, + SESSION_QUEUE_NEXT_IP6_LOOKUP, + SESSION_QUEUE_N_NEXT, +} session_queue_next_t; + +#define foreach_session_type \ + _(IP4_TCP, ip4_tcp) \ + _(IP4_UDP, ip4_udp) \ + _(IP6_TCP, ip6_tcp) \ + _(IP6_UDP, ip6_udp) + +typedef enum +{ +#define _(A, a) SESSION_TYPE_##A, + foreach_session_type +#undef _ + SESSION_N_TYPES, +} session_type_t; + +/* + * Application session state + */ +typedef enum +{ + SESSION_STATE_LISTENING, + SESSION_STATE_CONNECTING, + SESSION_STATE_READY, + SESSION_STATE_CLOSED, + SESSION_STATE_N_STATES, +} stream_session_state_t; + +typedef CLIB_PACKED (struct + { + svm_fifo_t * fifo; + u8 event_type; + /* $$$$ for event logging */ + u16 event_id; + u32 enqueue_length; + }) session_fifo_event_t; + +typedef struct _stream_session_t +{ + /** Type */ + u8 session_type; + + /** State */ + u8 session_state; + + /** Session index in per_thread pool */ + u32 session_index; + + /** Transport specific */ + u32 connection_index; + + u8 thread_index; + + /** Application specific */ + u32 pid; + + /** fifo pointers. Once allocated, these do not move */ + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; + + /** To avoid n**2 "one event per frame" check */ + u8 enqueue_epoch; + + /** used during unbind processing */ + u8 is_deleted; + + /** stream server pool index */ + u32 app_index; + + /** svm segment index */ + u32 server_segment_index; +} stream_session_t; + +typedef struct _session_manager +{ + /** segments mapped by this server */ + u32 *segment_indices; + + /** Session fifo sizes. They are provided for binds and take default + * values for connects */ + u32 rx_fifo_size; + u32 tx_fifo_size; + + /** Configured additional segment size */ + u32 add_segment_size; + + /** Flag that indicates if additional segments should be created */ + u8 add_segment; +} session_manager_t; + +/* Forward definition */ +typedef struct _session_manager_main session_manager_main_t; + +typedef int + (session_fifo_rx_fn) (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, stream_session_t * s0, + u32 thread_index, int *n_tx_pkts); + +extern session_fifo_rx_fn session_fifo_rx_peek; +extern session_fifo_rx_fn session_fifo_rx_dequeue; + +struct _session_manager_main +{ + /** Lookup tables for established sessions and listeners */ + clib_bihash_16_8_t v4_session_hash; + clib_bihash_48_8_t v6_session_hash; + + /** Lookup tables for half-open sessions */ + clib_bihash_16_8_t v4_half_open_hash; + clib_bihash_48_8_t v6_half_open_hash; + + /** Per worker thread session pools */ + stream_session_t **sessions; + + /** Pool of listen sessions. Same type as stream sessions to ease lookups */ + stream_session_t *listen_sessions[SESSION_N_TYPES]; + + /** Sparse vector to map dst port to stream server */ + u16 *stream_server_by_dst_port[SESSION_N_TYPES]; + + /** per-worker enqueue epoch counters */ + u8 *current_enqueue_epoch; + + /** Per-worker thread vector of sessions to enqueue */ + u32 **session_indices_to_enqueue_by_thread; + + /** per-worker tx buffer free lists */ + u32 **tx_buffers; + + /** Per worker-thread vector of partially read events */ + session_fifo_event_t **evts_partially_read; + + /** per-worker active event vectors */ + session_fifo_event_t **fifo_events; + + /** vpp fifo event queue */ + unix_shared_memory_queue_t **vpp_event_queues; + + /** Unique segment name counter */ + u32 unique_segment_name_counter; + + /* Connection manager used by incoming connects */ + u32 connect_manager_index[SESSION_N_TYPES]; + + session_manager_t *session_managers; + + /** Per transport rx function that can either dequeue or peek */ + session_fifo_rx_fn *session_rx_fns[SESSION_N_TYPES]; + + /* Convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +}; + +extern session_manager_main_t session_manager_main; + +/* + * Session manager function + */ +always_inline session_manager_main_t * +vnet_get_session_manager_main () +{ + return &session_manager_main; +} + +always_inline session_manager_t * +session_manager_get (u32 index) +{ + return pool_elt_at_index (session_manager_main.session_managers, index); +} + +always_inline unix_shared_memory_queue_t * +session_manager_get_vpp_event_queue (u32 thread_index) +{ + return session_manager_main.vpp_event_queues[thread_index]; +} + +always_inline session_manager_t * +connects_session_manager_get (session_manager_main_t * smm, + session_type_t session_type) +{ + return pool_elt_at_index (smm->session_managers, + smm->connect_manager_index[session_type]); +} + +void session_manager_get_segment_info (u32 index, u8 ** name, u32 * size); +int session_manager_flush_enqueue_events (u32 thread_index); +int +session_manager_add_first_segment (session_manager_main_t * smm, + session_manager_t * sm, u32 segment_size, + u8 ** segment_name); +void +session_manager_del (session_manager_main_t * smm, session_manager_t * sm); +void +connects_session_manager_init (session_manager_main_t * smm, u8 session_type); + +/* + * Stream session functions + */ + +stream_session_t *stream_session_lookup_listener4 (ip4_address_t * lcl, + u16 lcl_port, u8 proto); +stream_session_t *stream_session_lookup4 (ip4_address_t * lcl, + ip4_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto, + u32 thread_index); +stream_session_t *stream_session_lookup_listener6 (ip6_address_t * lcl, + u16 lcl_port, u8 proto); +stream_session_t *stream_session_lookup6 (ip6_address_t * lcl, + ip6_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8, u32 thread_index); +transport_connection_t + * stream_session_lookup_transport4 (session_manager_main_t * smm, + ip4_address_t * lcl, + ip4_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto, + u32 thread_index); +transport_connection_t + * stream_session_lookup_transport6 (session_manager_main_t * smm, + ip6_address_t * lcl, + ip6_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto, + u32 thread_index); +stream_session_t *stream_session_lookup_listener (ip46_address_t * lcl, + u16 lcl_port, u8 proto); + +always_inline stream_session_t * +stream_session_get_tsi (u64 ti_and_si, u32 thread_index) +{ + ASSERT ((u32) (ti_and_si >> 32) == thread_index); + return pool_elt_at_index (session_manager_main.sessions[thread_index], + ti_and_si & 0xFFFFFFFFULL); +} + +always_inline stream_session_t * +stream_session_get (u64 si, u32 thread_index) +{ + return pool_elt_at_index (session_manager_main.sessions[thread_index], si); +} + +always_inline stream_session_t * +stream_session_get_if_valid (u64 si, u32 thread_index) +{ + if (thread_index >= vec_len (session_manager_main.sessions)) + return 0; + + if (pool_is_free_index (session_manager_main.sessions[thread_index], si)) + return 0; + + return pool_elt_at_index (session_manager_main.sessions[thread_index], si); +} + +always_inline stream_session_t * +stream_session_listener_get (u8 sst, u64 si) +{ + return pool_elt_at_index (session_manager_main.listen_sessions[sst], si); +} + +always_inline u32 +stream_session_get_index (stream_session_t * s) +{ + if (s->session_state == SESSION_STATE_LISTENING) + return s - session_manager_main.listen_sessions[s->session_type]; + + return s - session_manager_main.sessions[s->thread_index]; +} + +always_inline u32 +stream_session_max_enqueue (transport_connection_t * tc) +{ + stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index); + return svm_fifo_max_enqueue (s->server_rx_fifo); +} + +int +stream_session_enqueue_data (transport_connection_t * tc, u8 * data, u16 len, + u8 queue_event); +u32 +stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer, + u32 offset, u32 max_bytes); +u32 stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes); + +void +stream_session_connect_notify (transport_connection_t * tc, u8 sst, + u8 is_fail); +void stream_session_accept_notify (transport_connection_t * tc); +void stream_session_disconnect_notify (transport_connection_t * tc); +void stream_session_delete_notify (transport_connection_t * tc); +void stream_session_reset_notify (transport_connection_t * tc); +int +stream_session_accept (transport_connection_t * tc, u32 listener_index, + u8 sst, u8 notify); +void stream_session_open (u8 sst, ip46_address_t * addr, + u16 port_host_byte_order, u32 api_client_index); +void stream_session_disconnect (stream_session_t * s); +void stream_session_cleanup (stream_session_t * s); +int +stream_session_start_listen (u32 server_index, ip46_address_t * ip, u16 port); +void stream_session_stop_listen (u32 server_index); + +u8 *format_stream_session (u8 * s, va_list * args); + +void session_register_transport (u8 type, const transport_proto_vft_t * vft); +transport_proto_vft_t *session_get_transport_vft (u8 type); + +#endif /* __included_session_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c new file mode 100644 index 00000000..9d068684 --- /dev/null +++ b/src/vnet/session/session_api.c @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include "application_interface.h" + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +#include + +#define foreach_session_api_msg \ +_(MAP_ANOTHER_SEGMENT_REPLY, map_another_segment_reply) \ +_(BIND_URI, bind_uri) \ +_(UNBIND_URI, unbind_uri) \ +_(CONNECT_URI, connect_uri) \ +_(DISCONNECT_SESSION, disconnect_session) \ +_(DISCONNECT_SESSION_REPLY, disconnect_session_reply) \ +_(ACCEPT_SESSION_REPLY, accept_session_reply) \ +_(RESET_SESSION_REPLY, reset_session_reply) \ +_(BIND_SOCK, bind_sock) \ +_(UNBIND_SOCK, unbind_sock) \ +_(CONNECT_SOCK, connect_sock) \ +_(DISCONNECT_SOCK, disconnect_sock) \ +_(DISCONNECT_SOCK_REPLY, disconnect_sock_reply) \ +_(ACCEPT_SOCK_REPLY, accept_sock_reply) \ +_(RESET_SOCK_REPLY, reset_sock_reply) \ + +static int +send_add_segment_callback (u32 api_client_index, const u8 * segment_name, + u32 segment_size) +{ + vl_api_map_another_segment_t *mp; + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (api_client_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_MAP_ANOTHER_SEGMENT); + mp->segment_size = segment_size; + strncpy ((char *) mp->segment_name, (char *) segment_name, + sizeof (mp->segment_name) - 1); + + vl_msg_api_send_shmem (q, (u8 *) & mp); + + return 0; +} + +static int +send_session_accept_uri_callback (stream_session_t * s) +{ + vl_api_accept_session_t *mp; + unix_shared_memory_queue_t *q, *vpp_queue; + application_t *server = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (server->api_client_index); + vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_ACCEPT_SESSION); + + /* Note: session_type is the first octet in all types of sessions */ + + mp->accept_cookie = server->accept_cookie; + mp->server_rx_fifo = (u64) s->server_rx_fifo; + mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->session_thread_index = s->thread_index; + mp->session_index = s->session_index; + mp->session_type = s->session_type; + mp->vpp_event_queue_address = (u64) vpp_queue; + vl_msg_api_send_shmem (q, (u8 *) & mp); + + return 0; +} + +static void +send_session_disconnect_uri_callback (stream_session_t * s) +{ + vl_api_disconnect_session_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_DISCONNECT_SESSION); + + mp->session_thread_index = s->thread_index; + mp->session_index = s->session_index; + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static int +send_session_connected_uri_callback (u32 api_client_index, + stream_session_t * s, u8 is_fail) +{ + vl_api_connect_uri_reply_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_lookup (api_client_index); + u8 *seg_name; + unix_shared_memory_queue_t *vpp_queue; + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_URI_REPLY); + mp->context = app->api_context; + mp->retval = is_fail; + if (!is_fail) + { + vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); + mp->server_rx_fifo = (u64) s->server_rx_fifo; + mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->session_thread_index = s->thread_index; + mp->session_index = s->session_index; + mp->session_type = s->session_type; + mp->vpp_event_queue_address = (u64) vpp_queue; + mp->client_event_queue_address = (u64) app->event_queue; + + session_manager_get_segment_info (s->server_segment_index, &seg_name, + &mp->segment_size); + mp->segment_name_length = vec_len (seg_name); + if (mp->segment_name_length) + clib_memcpy (mp->segment_name, seg_name, mp->segment_name_length); + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); + + /* Remove client if connect failed */ + if (is_fail) + application_del (app); + + return 0; +} + +/** + * Redirect a connect_uri message to the indicated server. + * Only sent if the server has bound the related port with + * URI_OPTIONS_FLAGS_USE_FIFO + */ +static int +redirect_connect_uri_callback (u32 server_api_client_index, void *mp_arg) +{ + vl_api_connect_uri_t *mp = mp_arg; + unix_shared_memory_queue_t *server_q, *client_q; + vlib_main_t *vm = vlib_get_main (); + f64 timeout = vlib_time_now (vm) + 0.5; + int rv = 0; + + server_q = vl_api_client_index_to_input_queue (server_api_client_index); + + if (!server_q) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + client_q = vl_api_client_index_to_input_queue (mp->client_index); + if (!client_q) + { + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto out; + } + + /* Tell the server the client's API queue address, so it can reply */ + mp->client_queue_address = (u64) client_q; + + /* + * Bounce message handlers MUST NOT block the data-plane. + * Spin waiting for the queue lock, but + */ + + while (vlib_time_now (vm) < timeout) + { + rv = + unix_shared_memory_queue_add (server_q, (u8 *) & mp, 1 /*nowait */ ); + switch (rv) + { + /* correctly enqueued */ + case 0: + return VNET_CONNECT_REDIRECTED; + + /* continue spinning, wait for pthread_mutex_trylock to work */ + case -1: + continue; + + /* queue stuffed, drop the msg */ + case -2: + rv = VNET_API_ERROR_QUEUE_FULL; + goto out; + } + } +out: + /* Dispose of the message */ + vl_msg_api_free (mp); + return rv; +} + +static u64 +make_session_handle (stream_session_t * s) +{ + return (u64) s->session_index << 32 | (u64) s->thread_index; +} + +static int +send_session_accept_callback (stream_session_t * s) +{ + vl_api_accept_sock_t *mp; + unix_shared_memory_queue_t *q, *vpp_queue; + application_t *server = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (server->api_client_index); + vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_ACCEPT_SOCK); + + /* Note: session_type is the first octet in all types of sessions */ + + mp->accept_cookie = server->accept_cookie; + mp->server_rx_fifo = (u64) s->server_rx_fifo; + mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->handle = make_session_handle (s); + mp->vpp_event_queue_address = (u64) vpp_queue; + vl_msg_api_send_shmem (q, (u8 *) & mp); + + return 0; +} + +static int +send_session_connected_callback (u32 api_client_index, stream_session_t * s, + u8 is_fail) +{ + vl_api_connect_sock_reply_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_lookup (api_client_index); + u8 *seg_name; + unix_shared_memory_queue_t *vpp_queue; + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return -1; + + mp = vl_msg_api_alloc (sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_SOCK_REPLY); + mp->context = app->api_context; + mp->retval = is_fail; + if (!is_fail) + { + vpp_queue = session_manager_get_vpp_event_queue (s->thread_index); + mp->server_rx_fifo = (u64) s->server_rx_fifo; + mp->server_tx_fifo = (u64) s->server_tx_fifo; + mp->handle = make_session_handle (s); + mp->vpp_event_queue_address = (u64) vpp_queue; + mp->client_event_queue_address = (u64) app->event_queue; + + session_manager_get_segment_info (s->server_segment_index, &seg_name, + &mp->segment_size); + mp->segment_name_length = vec_len (seg_name); + if (mp->segment_name_length) + clib_memcpy (mp->segment_name, seg_name, mp->segment_name_length); + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); + + /* Remove client if connect failed */ + if (is_fail) + application_del (app); + + return 0; +} + +static void +send_session_disconnect_callback (stream_session_t * s) +{ + vl_api_disconnect_sock_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_DISCONNECT_SOCK); + + mp->handle = make_session_handle (s); + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +/** + * Redirect a connect_uri message to the indicated server. + * Only sent if the server has bound the related port with + * URI_OPTIONS_FLAGS_USE_FIFO + */ +static int +redirect_connect_callback (u32 server_api_client_index, void *mp_arg) +{ + vl_api_connect_sock_t *mp = mp_arg; + unix_shared_memory_queue_t *server_q, *client_q; + vlib_main_t *vm = vlib_get_main (); + f64 timeout = vlib_time_now (vm) + 0.5; + int rv = 0; + + server_q = vl_api_client_index_to_input_queue (server_api_client_index); + + if (!server_q) + { + rv = VNET_API_ERROR_INVALID_VALUE; + goto out; + } + + client_q = vl_api_client_index_to_input_queue (mp->client_index); + if (!client_q) + { + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto out; + } + + /* Tell the server the client's API queue address, so it can reply */ + mp->client_queue_address = (u64) client_q; + + /* + * Bounce message handlers MUST NOT block the data-plane. + * Spin waiting for the queue lock, but + */ + + while (vlib_time_now (vm) < timeout) + { + rv = + unix_shared_memory_queue_add (server_q, (u8 *) & mp, 1 /*nowait */ ); + switch (rv) + { + /* correctly enqueued */ + case 0: + return VNET_CONNECT_REDIRECTED; + + /* continue spinning, wait for pthread_mutex_trylock to work */ + case -1: + continue; + + /* queue stuffed, drop the msg */ + case -2: + rv = VNET_API_ERROR_QUEUE_FULL; + goto out; + } + } +out: + /* Dispose of the message */ + vl_msg_api_free (mp); + return rv; +} + +static session_cb_vft_t uri_session_cb_vft = { + .session_accept_callback = send_session_accept_uri_callback, + .session_disconnect_callback = send_session_disconnect_uri_callback, + .session_connected_callback = send_session_connected_uri_callback, + .add_segment_callback = send_add_segment_callback, + .redirect_connect_callback = redirect_connect_uri_callback +}; + +static session_cb_vft_t session_cb_vft = { + .session_accept_callback = send_session_accept_callback, + .session_disconnect_callback = send_session_disconnect_callback, + .session_connected_callback = send_session_connected_callback, + .add_segment_callback = send_add_segment_callback, + .redirect_connect_callback = redirect_connect_callback +}; + +static int +api_session_not_valid (u32 session_index, u32 thread_index) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + stream_session_t *pool; + + if (thread_index >= vec_len (smm->sessions)) + return VNET_API_ERROR_INVALID_VALUE; + + pool = smm->sessions[thread_index]; + + if (pool_is_free_index (pool, session_index)) + return VNET_API_ERROR_INVALID_VALUE_2; + + return 0; +} + +static void +vl_api_bind_uri_t_handler (vl_api_bind_uri_t * mp) +{ + vl_api_bind_uri_reply_t *rmp; + vnet_bind_args_t _a, *a = &_a; + char segment_name[128]; + u32 segment_name_length; + int rv; + + _Static_assert (sizeof (u64) * SESSION_OPTIONS_N_OPTIONS <= + sizeof (mp->options), + "Out of options, fix api message definition"); + + segment_name_length = ARRAY_LEN (segment_name); + + memset (a, 0, sizeof (*a)); + + a->uri = (char *) mp->uri; + a->api_client_index = mp->client_index; + a->options = mp->options; + a->segment_name = segment_name; + a->segment_name_length = segment_name_length; + a->session_cb_vft = &uri_session_cb_vft; + + a->options[SESSION_OPTIONS_SEGMENT_SIZE] = mp->initial_segment_size; + a->options[SESSION_OPTIONS_ACCEPT_COOKIE] = mp->accept_cookie; + rv = vnet_bind_uri (a); + + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_BIND_URI_REPLY, ({ + rmp->retval = rv; + if (!rv) + { + rmp->segment_name_length = 0; + /* $$$$ policy? */ + rmp->segment_size = mp->initial_segment_size; + if (segment_name_length) + { + memcpy (rmp->segment_name, segment_name, segment_name_length); + rmp->segment_name_length = segment_name_length; + } + rmp->server_event_queue_address = a->server_event_queue_address; + } + })); + /* *INDENT-ON* */ + +} + +static void +vl_api_unbind_uri_t_handler (vl_api_unbind_uri_t * mp) +{ + vl_api_unbind_uri_reply_t *rmp; + int rv; + + rv = vnet_unbind_uri ((char *) mp->uri, mp->client_index); + + REPLY_MACRO (VL_API_UNBIND_URI_REPLY); +} + +static void +vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp) +{ + vnet_connect_args_t _a, *a = &_a; + + a->uri = (char *) mp->uri; + a->api_client_index = mp->client_index; + a->api_context = mp->context; + a->options = mp->options; + a->session_cb_vft = &uri_session_cb_vft; + a->mp = mp; + vnet_connect_uri (a); +} + +static void +vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) +{ + vl_api_disconnect_session_reply_t *rmp; + int rv; + + rv = api_session_not_valid (mp->session_index, mp->session_thread_index); + if (!rv) + rv = vnet_disconnect_session (mp->client_index, mp->session_index, + mp->session_thread_index); + + REPLY_MACRO (VL_API_DISCONNECT_SESSION_REPLY); +} + +static void +vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t * + mp) +{ + if (api_session_not_valid (mp->session_index, mp->session_thread_index)) + { + clib_warning ("Invalid session!"); + return; + } + + /* Client objected to disconnecting the session, log and continue */ + if (mp->retval) + { + clib_warning ("client retval %d", mp->retval); + return; + } + + /* Disconnect has been confirmed. Confirm close to transport */ + vnet_disconnect_session (mp->client_index, mp->session_index, + mp->session_thread_index); +} + +static void +vl_api_reset_session_reply_t_handler (vl_api_reset_session_reply_t * mp) +{ + stream_session_t *s; + + if (api_session_not_valid (mp->session_index, mp->session_thread_index)) + { + clib_warning ("Invalid session!"); + return; + } + + /* Client objected to resetting the session, log and continue */ + if (mp->retval) + { + clib_warning ("client retval %d", mp->retval); + return; + } + + s = stream_session_get (mp->session_index, mp->session_thread_index); + + /* This comes as a response to a reset, transport only waiting for + * confirmation to remove connection state, no need to disconnect */ + stream_session_cleanup (s); +} + +static void +vl_api_accept_session_reply_t_handler (vl_api_accept_session_reply_t * mp) +{ + stream_session_t *s; + int rv; + + if (api_session_not_valid (mp->session_index, mp->session_thread_index)) + return; + + s = stream_session_get (mp->session_index, mp->session_thread_index); + rv = mp->retval; + + if (rv) + { + /* Server isn't interested, kill the session */ + stream_session_disconnect (s); + return; + } + + s->session_state = SESSION_STATE_READY; +} + +static void +vl_api_map_another_segment_reply_t_handler (vl_api_map_another_segment_reply_t + * mp) +{ + clib_warning ("not implemented"); +} + +static void +vl_api_bind_sock_t_handler (vl_api_bind_sock_t * mp) +{ + vl_api_bind_sock_reply_t *rmp; + vnet_bind_args_t _a, *a = &_a; + char segment_name[128]; + u32 segment_name_length; + int rv; + + STATIC_ASSERT (sizeof (u64) * SESSION_OPTIONS_N_OPTIONS <= + sizeof (mp->options), + "Out of options, fix api message definition"); + + segment_name_length = ARRAY_LEN (segment_name); + + memset (a, 0, sizeof (*a)); + + clib_memcpy (&a->tep.ip, mp->ip, + (mp->is_ip4 ? sizeof (ip4_address_t) : + sizeof (ip6_address_t))); + a->tep.is_ip4 = mp->is_ip4; + a->tep.port = mp->port; + a->tep.vrf = mp->vrf; + + a->api_client_index = mp->client_index; + a->options = mp->options; + a->segment_name = segment_name; + a->segment_name_length = segment_name_length; + a->session_cb_vft = &session_cb_vft; + + rv = vnet_bind_uri (a); + + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_BIND_SOCK_REPLY, ({ + rmp->retval = rv; + if (!rv) + { + rmp->segment_name_length = 0; + rmp->segment_size = mp->options[SESSION_OPTIONS_SEGMENT_SIZE]; + if (segment_name_length) + { + memcpy(rmp->segment_name, segment_name, segment_name_length); + rmp->segment_name_length = segment_name_length; + } + rmp->server_event_queue_address = a->server_event_queue_address; + } + })); + /* *INDENT-ON* */ +} + +static void +vl_api_unbind_sock_t_handler (vl_api_unbind_sock_t * mp) +{ + vl_api_unbind_sock_reply_t *rmp; + vnet_unbind_args_t _a, *a = &_a; + int rv; + + a->api_client_index = mp->client_index; + a->handle = mp->handle; + + rv = vnet_unbind (a); + + REPLY_MACRO (VL_API_UNBIND_SOCK_REPLY); +} + +static void +vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp) +{ + vnet_connect_args_t _a, *a = &_a; + + clib_memcpy (&a->tep.ip, mp->ip, + (mp->is_ip4 ? sizeof (ip4_address_t) : + sizeof (ip6_address_t))); + a->tep.is_ip4 = mp->is_ip4; + a->tep.port = mp->port; + a->tep.vrf = mp->vrf; + a->options = mp->options; + a->session_cb_vft = &session_cb_vft; + a->api_context = mp->context; + a->mp = mp; + + vnet_connect (a); +} + +static void +vl_api_disconnect_sock_t_handler (vl_api_disconnect_sock_t * mp) +{ + vnet_disconnect_args_t _a, *a = &_a; + vl_api_disconnect_sock_reply_t *rmp; + int rv; + + a->api_client_index = mp->client_index; + a->handle = mp->handle; + rv = vnet_disconnect (a); + + REPLY_MACRO (VL_API_DISCONNECT_SOCK_REPLY); +} + +static void +vl_api_disconnect_sock_reply_t_handler (vl_api_disconnect_sock_reply_t * mp) +{ + vnet_disconnect_args_t _a, *a = &_a; + + /* Client objected to disconnecting the session, log and continue */ + if (mp->retval) + { + clib_warning ("client retval %d", mp->retval); + return; + } + + a->api_client_index = mp->client_index; + a->handle = mp->handle; + + vnet_disconnect (a); +} + +static void +vl_api_reset_sock_reply_t_handler (vl_api_reset_sock_reply_t * mp) +{ + stream_session_t *s; + u32 session_index, thread_index; + + /* Client objected to resetting the session, log and continue */ + if (mp->retval) + { + clib_warning ("client retval %d", mp->retval); + return; + } + + if (api_parse_session_handle (mp->handle, &session_index, &thread_index)) + { + clib_warning ("Invalid handle"); + return; + } + + s = stream_session_get (session_index, thread_index); + + /* This comes as a response to a reset, transport only waiting for + * confirmation to remove connection state, no need to disconnect */ + stream_session_cleanup (s); +} + +static void +vl_api_accept_sock_reply_t_handler (vl_api_accept_sock_reply_t * mp) +{ + stream_session_t *s; + u32 session_index, thread_index; + + if (api_parse_session_handle (mp->handle, &session_index, &thread_index)) + { + clib_warning ("Invalid handle"); + return; + } + s = stream_session_get (session_index, thread_index); + + if (mp->retval) + { + /* Server isn't interested, kill the session */ + stream_session_disconnect (s); + return; + } + + s->session_state = SESSION_STATE_READY; +} + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_session; +#undef _ +} + +/* + * session_api_hookup + * Add uri's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../open-repo/vlib/memclnt_vlib.c:memclnt_process() + */ +static clib_error_t * +session_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_session_api_msg; +#undef _ + + /* + * Messages which bounce off the data-plane to + * an API client. Simply tells the message handling infra not + * to free the message. + * + * Bounced message handlers MUST NOT block the data plane + */ + am->message_bounce[VL_API_CONNECT_URI] = 1; + am->message_bounce[VL_API_CONNECT_SOCK] = 1; + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (session_api_hookup); +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c new file mode 100644 index 00000000..b2943a1c --- /dev/null +++ b/src/vnet/session/session_cli.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +/** + * Format stream session as per the following format + * + * verbose: + * "Connection", "Rx fifo", "Tx fifo", "Session Index" + * non-verbose: + * "Connection" + */ +u8 * +format_stream_session (u8 * s, va_list * args) +{ + stream_session_t *ss = va_arg (*args, stream_session_t *); + int verbose = va_arg (*args, int); + transport_proto_vft_t *tp_vft; + u8 *str = 0; + + tp_vft = session_get_transport_vft (ss->session_type); + + if (verbose) + str = format (0, "%-20llp%-20llp%-15lld", ss->server_rx_fifo, + ss->server_tx_fifo, stream_session_get_index (ss)); + + if (ss->session_state == SESSION_STATE_READY) + { + s = format (s, "%-40U%v", tp_vft->format_connection, + ss->connection_index, ss->thread_index, str); + } + else if (ss->session_state == SESSION_STATE_LISTENING) + { + s = format (s, "%-40U%v", tp_vft->format_listener, ss->connection_index, + str); + } + else if (ss->session_state == SESSION_STATE_READY) + { + s = + format (s, "%-40U%v", tp_vft->format_half_open, ss->connection_index, + str); + } + else if (ss->session_state == SESSION_STATE_CLOSED) + { + s = format (s, "[CL] %-40U%v", tp_vft->format_connection, + ss->connection_index, ss->thread_index, str); + } + else + { + clib_warning ("Session in unknown state!"); + } + + vec_free (str); + + return s; +} + +static clib_error_t * +show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + session_manager_main_t *smm = &session_manager_main; + int verbose = 0, i; + stream_session_t *pool; + stream_session_t *s; + u8 *str = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + else + break; + } + + for (i = 0; i < vec_len (smm->sessions); i++) + { + u32 once_per_pool; + pool = smm->sessions[i]; + + once_per_pool = 1; + + if (pool_elts (pool)) + { + + vlib_cli_output (vm, "Thread %d: %d active sessions", + i, pool_elts (pool)); + if (verbose) + { + if (once_per_pool) + { + str = format (str, "%-40s%-20s%-20s%-15s", + "Connection", "Rx fifo", "Tx fifo", + "Session Index"); + vlib_cli_output (vm, "%v", str); + vec_reset_length (str); + once_per_pool = 0; + } + + /* *INDENT-OFF* */ + pool_foreach (s, pool, + ({ + vlib_cli_output (vm, "%U", format_stream_session, s, verbose); + })); + /* *INDENT-ON* */ + } + } + else + vlib_cli_output (vm, "Thread %d: no active sessions", i); + } + vec_free (str); + + return 0; +} + +VLIB_CLI_COMMAND (show_uri_command, static) = +{ +.path = "show session",.short_help = "show session [verbose]",.function = + show_session_command_fn,}; + + +static clib_error_t * +clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + session_manager_main_t *smm = &session_manager_main; + u32 thread_index = 0; + u32 session_index = ~0; + stream_session_t *pool, *session; + application_t *server; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "thread %d", &thread_index)) + ; + else if (unformat (input, "session %d", &session_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (session_index == ~0) + return clib_error_return (0, "session required, but not set."); + + if (thread_index > vec_len (smm->sessions)) + return clib_error_return (0, "thread %d out of range [0-%d]", + thread_index, vec_len (smm->sessions)); + + pool = smm->sessions[thread_index]; + + if (pool_is_free_index (pool, session_index)) + return clib_error_return (0, "session %d not active", session_index); + + session = pool_elt_at_index (pool, session_index); + server = application_get (session->app_index); + + /* Disconnect both app and transport */ + server->cb_fns.session_disconnect_callback (session); + + return 0; +} + +VLIB_CLI_COMMAND (clear_uri_session_command, static) = +{ +.path = "clear session",.short_help = + "clear session thread session ",.function = + clear_session_command_fn,}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c new file mode 100644 index 00000000..abd94ba4 --- /dev/null +++ b/src/vnet/session/transport.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +u32 +transport_endpoint_lookup (transport_endpoint_table_t *ht, ip46_address_t *ip, + u16 port) +{ + clib_bihash_kv_24_8_t kv; + int rv; + + kv.key[0] = ip->as_u64[0]; + kv.key[1] = ip->as_u64[1]; + kv.key[2] = port; + + rv = clib_bihash_search_inline_24_8 (ht, &kv); + if (rv == 0) + return kv.value; + + return TRANSPORT_ENDPOINT_INVALID_INDEX; +} + +void +transport_endpoint_table_add (transport_endpoint_table_t *ht, + transport_endpoint_t *te, u32 value) +{ + clib_bihash_kv_24_8_t kv; + + kv.key[0] = te->ip.as_u64[0]; + kv.key[1] = te->ip.as_u64[1]; + kv.key[2] = te->port; + kv.value = value; + + clib_bihash_add_del_24_8 (ht, &kv, 1); +} + +void +transport_endpoint_table_del (transport_endpoint_table_t *ht, + transport_endpoint_t *te) +{ + clib_bihash_kv_24_8_t kv; + + kv.key[0] = te->ip.as_u64[0]; + kv.key[1] = te->ip.as_u64[1]; + kv.key[2] = te->port; + + clib_bihash_add_del_24_8 (ht, &kv, 0); +} + + + diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h new file mode 100644 index 00000000..2d4415ba --- /dev/null +++ b/src/vnet/session/transport.h @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef VNET_VNET_URI_TRANSPORT_H_ +#define VNET_VNET_URI_TRANSPORT_H_ + +#include +#include +#include +#include + +/* + * Protocol independent transport properties associated to a session + */ +typedef struct _transport_connection +{ + ip46_address_t rmt_ip; /**< Remote IP */ + ip46_address_t lcl_ip; /**< Local IP */ + u16 lcl_port; /**< Local port */ + u16 rmt_port; /**< Remote port */ + u8 proto; /**< Transport protocol id */ + + u32 s_index; /**< Parent session index */ + u32 c_index; /**< Connection index in transport pool */ + u8 is_ip4; /**< Flag if IP4 connection */ + u32 thread_index; /**< Worker-thread index */ + + /** Macros for 'derived classes' where base is named "connection" */ +#define c_lcl_ip connection.lcl_ip +#define c_rmt_ip connection.rmt_ip +#define c_lcl_ip4 connection.lcl_ip.ip4 +#define c_rmt_ip4 connection.rmt_ip.ip4 +#define c_lcl_ip6 connection.lcl_ip.ip6 +#define c_rmt_ip6 connection.rmt_ip.ip6 +#define c_lcl_port connection.lcl_port +#define c_rmt_port connection.rmt_port +#define c_proto connection.proto +#define c_state connection.state +#define c_s_index connection.s_index +#define c_c_index connection.c_index +#define c_is_ip4 connection.is_ip4 +#define c_thread_index connection.thread_index +} transport_connection_t; + +/* + * Transport protocol virtual function table + */ +typedef struct _transport_proto_vft +{ + /* + * Setup + */ + u32 (*bind) (vlib_main_t *, u32, ip46_address_t *, u16); + u32 (*unbind) (vlib_main_t *, u32); + int (*open) (ip46_address_t * addr, u16 port_host_byte_order); + void (*close) (u32 conn_index, u32 thread_index); + void (*cleanup) (u32 conn_index, u32 thread_index); + + /* + * Transmission + */ + u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b); + u16 (*send_mss) (transport_connection_t * tc); + u32 (*send_space) (transport_connection_t * tc); + u32 (*rx_fifo_offset) (transport_connection_t * tc); + + /* + * Connection retrieval + */ + transport_connection_t *(*get_connection) (u32 conn_idx, u32 thread_idx); + transport_connection_t *(*get_listener) (u32 conn_index); + transport_connection_t *(*get_half_open) (u32 conn_index); + + /* + * Format + */ + u8 *(*format_connection) (u8 * s, va_list * args); + u8 *(*format_listener) (u8 * s, va_list * args); + u8 *(*format_half_open) (u8 * s, va_list * args); + +} transport_proto_vft_t; + +/* 16 octets */ +typedef CLIB_PACKED (struct + { + union + { + struct + { + ip4_address_t src; ip4_address_t dst; + u16 src_port; + u16 dst_port; + /* align by making this 4 octets even though its a 1-bit field + * NOTE: avoid key overlap with other transports that use 5 tuples for + * session identification. + */ + u32 proto; + }; + u64 as_u64[2]; + }; + }) v4_connection_key_t; + +typedef CLIB_PACKED (struct + { + union + { + struct + { + /* 48 octets */ + ip6_address_t src; ip6_address_t dst; + u16 src_port; + u16 dst_port; u32 proto; u8 unused_for_now[8]; + }; u64 as_u64[6]; + }; + }) v6_connection_key_t; + +typedef clib_bihash_kv_16_8_t session_kv4_t; +typedef clib_bihash_kv_48_8_t session_kv6_t; + +always_inline void +make_v4_ss_kv (session_kv4_t * kv, ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + v4_connection_key_t key; + memset (&key, 0, sizeof (v4_connection_key_t)); + + key.src.as_u32 = lcl->as_u32; + key.dst.as_u32 = rmt->as_u32; + key.src_port = lcl_port; + key.dst_port = rmt_port; + key.proto = proto; + + kv->key[0] = key.as_u64[0]; + kv->key[1] = key.as_u64[1]; + kv->value = ~0ULL; +} + +always_inline void +make_v4_listener_kv (session_kv4_t * kv, ip4_address_t * lcl, u16 lcl_port, + u8 proto) +{ + v4_connection_key_t key; + memset (&key, 0, sizeof (v4_connection_key_t)); + + key.src.as_u32 = lcl->as_u32; + key.dst.as_u32 = 0; + key.src_port = lcl_port; + key.dst_port = 0; + key.proto = proto; + + kv->key[0] = key.as_u64[0]; + kv->key[1] = key.as_u64[1]; + kv->value = ~0ULL; +} + +always_inline void +make_v4_ss_kv_from_tc (session_kv4_t * kv, transport_connection_t * t) +{ + return make_v4_ss_kv (kv, &t->lcl_ip.ip4, &t->rmt_ip.ip4, t->lcl_port, + t->rmt_port, t->proto); +} + +always_inline void +make_v6_ss_kv (session_kv6_t * kv, ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + v6_connection_key_t key; + memset (&key, 0, sizeof (v6_connection_key_t)); + + key.src.as_u64[0] = lcl->as_u64[0]; + key.src.as_u64[1] = lcl->as_u64[1]; + key.dst.as_u64[0] = rmt->as_u64[0]; + key.dst.as_u64[1] = rmt->as_u64[1]; + key.src_port = lcl_port; + key.dst_port = rmt_port; + key.proto = proto; + + kv->key[0] = key.as_u64[0]; + kv->key[1] = key.as_u64[1]; + kv->value = ~0ULL; +} + +always_inline void +make_v6_listener_kv (session_kv6_t * kv, ip6_address_t * lcl, u16 lcl_port, + u8 proto) +{ + v6_connection_key_t key; + memset (&key, 0, sizeof (v6_connection_key_t)); + + key.src.as_u64[0] = lcl->as_u64[0]; + key.src.as_u64[1] = lcl->as_u64[1]; + key.dst.as_u64[0] = 0; + key.dst.as_u64[1] = 0; + key.src_port = lcl_port; + key.dst_port = 0; + key.proto = proto; + + kv->key[0] = key.as_u64[0]; + kv->key[1] = key.as_u64[1]; + kv->value = ~0ULL; +} + +always_inline void +make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * t) +{ + make_v6_ss_kv (kv, &t->lcl_ip.ip6, &t->rmt_ip.ip6, t->lcl_port, + t->rmt_port, t->proto); +} + +typedef struct _transport_endpoint +{ + ip46_address_t ip; + u16 port; + u8 is_ip4; + u32 vrf; +} transport_endpoint_t; + +typedef clib_bihash_24_8_t transport_endpoint_table_t; + +#define TRANSPORT_ENDPOINT_INVALID_INDEX ((u32)~0) + +u32 +transport_endpoint_lookup (transport_endpoint_table_t * ht, + ip46_address_t * ip, u16 port); +void transport_endpoint_table_add (transport_endpoint_table_t * ht, + transport_endpoint_t * te, u32 value); +void transport_endpoint_table_del (transport_endpoint_table_t * ht, + transport_endpoint_t * te); + +#endif /* VNET_VNET_URI_TRANSPORT_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c new file mode 100644 index 00000000..0f9b7097 --- /dev/null +++ b/src/vnet/tcp/tcp.c @@ -0,0 +1,708 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +tcp_main_t tcp_main; + +static u32 +tcp_connection_bind (vlib_main_t * vm, u32 session_index, ip46_address_t * ip, + u16 port_host_byte_order, u8 is_ip4) +{ + tcp_main_t *tm = &tcp_main; + tcp_connection_t *listener; + + pool_get (tm->listener_pool, listener); + memset (listener, 0, sizeof (*listener)); + + listener->c_c_index = listener - tm->listener_pool; + listener->c_lcl_port = clib_host_to_net_u16 (port_host_byte_order); + + if (is_ip4) + listener->c_lcl_ip4.as_u32 = ip->ip4.as_u32; + else + clib_memcpy (&listener->c_lcl_ip6, &ip->ip6, sizeof (ip6_address_t)); + + listener->c_s_index = session_index; + listener->c_proto = SESSION_TYPE_IP4_TCP; + listener->state = TCP_STATE_LISTEN; + listener->c_is_ip4 = 1; + + return listener->c_c_index; +} + +u32 +tcp_session_bind_ip4 (vlib_main_t * vm, u32 session_index, + ip46_address_t * ip, u16 port_host_byte_order) +{ + return tcp_connection_bind (vm, session_index, ip, port_host_byte_order, 1); +} + +u32 +tcp_session_bind_ip6 (vlib_main_t * vm, u32 session_index, + ip46_address_t * ip, u16 port_host_byte_order) +{ + return tcp_connection_bind (vm, session_index, ip, port_host_byte_order, 0); + +} + +static void +tcp_session_unbind (u32 listener_index) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + pool_put_index (tm->listener_pool, listener_index); +} + +u32 +tcp_session_unbind_ip4 (vlib_main_t * vm, u32 listener_index) +{ + tcp_session_unbind (listener_index); + return 0; +} + +u32 +tcp_session_unbind_ip6 (vlib_main_t * vm, u32 listener_index) +{ + tcp_session_unbind (listener_index); + return 0; +} + +transport_connection_t * +tcp_session_get_listener (u32 listener_index) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + tcp_connection_t *tc; + tc = pool_elt_at_index (tm->listener_pool, listener_index); + return &tc->connection; +} + +/** + * Cleans up connection state. + * + * No notifications. + */ +void +tcp_connection_cleanup (tcp_connection_t * tc) +{ + tcp_main_t *tm = &tcp_main; + u32 tepi; + transport_endpoint_t *tep; + + /* Cleanup local endpoint if this was an active connect */ + tepi = transport_endpoint_lookup (&tm->local_endpoints_table, &tc->c_lcl_ip, + tc->c_lcl_port); + + /*XXX lock */ + if (tepi != TRANSPORT_ENDPOINT_INVALID_INDEX) + { + tep = pool_elt_at_index (tm->local_endpoints, tepi); + transport_endpoint_table_del (&tm->local_endpoints_table, tep); + pool_put (tm->local_endpoints, tep); + } + + /* Make sure all timers are cleared */ + tcp_connection_timers_reset (tc); + + /* Check if half-open */ + if (tc->state == TCP_STATE_SYN_SENT) + pool_put (tm->half_open_connections, tc); + else + pool_put (tm->connections[tc->c_thread_index], tc); +} + +/** + * Connection removal. + * + * This should be called only once connection enters CLOSED state. Note + * that it notifies the session of the removal event, so if the goal is to + * just remove the connection, call tcp_connection_cleanup instead. + */ +void +tcp_connection_del (tcp_connection_t * tc) +{ + stream_session_delete_notify (&tc->connection); + tcp_connection_cleanup (tc); +} + +/** + * Begin connection closing procedure. + * + * If at the end the connection is not in CLOSED state, it is not removed. + * Instead, we rely on on TCP to advance through state machine to either + * 1) LAST_ACK (passive close) whereby when the last ACK is received + * tcp_connection_del is called. This notifies session of the delete and + * calls cleanup. + * 2) TIME_WAIT (active close) whereby after 2MSL the 2MSL timer triggers + * and cleanup is called. + */ +void +tcp_connection_close (tcp_connection_t * tc) +{ + /* Send FIN if needed */ + if (tc->state == TCP_STATE_ESTABLISHED || tc->state == TCP_STATE_SYN_RCVD + || tc->state == TCP_STATE_CLOSE_WAIT) + tcp_send_fin (tc); + + /* Switch state */ + if (tc->state == TCP_STATE_ESTABLISHED || tc->state == TCP_STATE_SYN_RCVD) + tc->state = TCP_STATE_FIN_WAIT_1; + else if (tc->state == TCP_STATE_SYN_SENT) + tc->state = TCP_STATE_CLOSED; + else if (tc->state == TCP_STATE_CLOSE_WAIT) + tc->state = TCP_STATE_LAST_ACK; + + /* Half-close connections are not supported XXX */ + + if (tc->state == TCP_STATE_CLOSED) + tcp_connection_del (tc); +} + +void +tcp_session_close (u32 conn_index, u32 thread_index) +{ + tcp_connection_t *tc; + tc = tcp_connection_get (conn_index, thread_index); + tcp_connection_close (tc); +} + +void +tcp_session_cleanup (u32 conn_index, u32 thread_index) +{ + tcp_connection_t *tc; + tc = tcp_connection_get (conn_index, thread_index); + tcp_connection_cleanup (tc); +} + +void * +ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4) +{ + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_interface_address_t *ia = 0; + + if (is_ip4) + { + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ , + ({ + return ip_interface_address_get_address (lm4, ia); + })); + /* *INDENT-ON* */ + } + else + { + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ , + ({ + return ip_interface_address_get_address (lm6, ia); + })); + /* *INDENT-ON* */ + } + + return 0; +} + +/** + * Allocate local port and add if successful add entry to local endpoint + * table to mark the pair as used. + */ +u16 +tcp_allocate_local_port (tcp_main_t * tm, ip46_address_t * ip) +{ + u8 unique = 0; + transport_endpoint_t *tep; + u32 time_now, tei; + u16 min = 1024, max = 65535, tries; /* XXX configurable ? */ + + tries = max - min; + time_now = tcp_time_now (); + + /* Start at random point or max */ + pool_get (tm->local_endpoints, tep); + clib_memcpy (&tep->ip, ip, sizeof (*ip)); + tep->port = random_u32 (&time_now) << 16; + tep->port = tep->port < min ? max : tep->port; + + /* Search for first free slot */ + while (tries) + { + tei = transport_endpoint_lookup (&tm->local_endpoints_table, &tep->ip, + tep->port); + if (tei == TRANSPORT_ENDPOINT_INVALID_INDEX) + { + unique = 1; + break; + } + + tep->port--; + + if (tep->port < min) + tep->port = max; + + tries--; + } + + if (unique) + { + transport_endpoint_table_add (&tm->local_endpoints_table, tep, + tep - tm->local_endpoints); + + return tep->port; + } + + /* Failed */ + pool_put (tm->local_endpoints, tep); + return -1; +} + +/** + * Initialize all connection timers as invalid + */ +void +tcp_connection_timers_init (tcp_connection_t * tc) +{ + int i; + + /* Set all to invalid */ + for (i = 0; i < TCP_N_TIMERS; i++) + { + tc->timers[i] = TCP_TIMER_HANDLE_INVALID; + } + + tc->rto = TCP_RTO_INIT; +} + +/** + * Stop all connection timers + */ +void +tcp_connection_timers_reset (tcp_connection_t * tc) +{ + int i; + for (i = 0; i < TCP_N_TIMERS; i++) + { + tcp_timer_reset (tc, i); + } +} + +/** Initialize tcp connection variables + * + * Should be called after having received a msg from the peer, i.e., a SYN or + * a SYNACK, such that connection options have already been exchanged. */ +void +tcp_connection_init_vars (tcp_connection_t * tc) +{ + tcp_connection_timers_init (tc); + tcp_set_snd_mss (tc); + tc->sack_sb.head = TCP_INVALID_SACK_HOLE_INDEX; + tcp_cc_init (tc); +} + +int +tcp_connection_open (ip46_address_t * rmt_addr, u16 rmt_port, u8 is_ip4) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + tcp_connection_t *tc; + fib_prefix_t prefix; + u32 fei, sw_if_index; + ip46_address_t lcl_addr; + u16 lcl_port; + + /* + * Find the local address and allocate port + */ + memset (&lcl_addr, 0, sizeof (lcl_addr)); + + /* Find a FIB path to the destination */ + clib_memcpy (&prefix.fp_addr, rmt_addr, sizeof (*rmt_addr)); + prefix.fp_proto = is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; + prefix.fp_len = is_ip4 ? 32 : 128; + + fei = fib_table_lookup (0, &prefix); + + /* Couldn't find route to destination. Bail out. */ + if (fei == FIB_NODE_INDEX_INVALID) + return -1; + + sw_if_index = fib_entry_get_resolving_interface (fei); + + if (sw_if_index == (u32) ~ 0) + return -1; + + if (is_ip4) + { + ip4_address_t *ip4; + ip4 = ip_interface_get_first_ip (sw_if_index, 1); + lcl_addr.ip4.as_u32 = ip4->as_u32; + } + else + { + ip6_address_t *ip6; + ip6 = ip_interface_get_first_ip (sw_if_index, 0); + clib_memcpy (&lcl_addr.ip6, ip6, sizeof (*ip6)); + } + + /* Allocate source port */ + lcl_port = tcp_allocate_local_port (tm, &lcl_addr); + if (lcl_port < 1) + return -1; + + /* + * Create connection and send SYN + */ + + pool_get (tm->half_open_connections, tc); + memset (tc, 0, sizeof (*tc)); + + clib_memcpy (&tc->c_rmt_ip, rmt_addr, sizeof (ip46_address_t)); + clib_memcpy (&tc->c_lcl_ip, &lcl_addr, sizeof (ip46_address_t)); + tc->c_rmt_port = clib_host_to_net_u16 (rmt_port); + tc->c_lcl_port = clib_host_to_net_u16 (lcl_port); + tc->c_c_index = tc - tm->half_open_connections; + tc->c_is_ip4 = is_ip4; + + /* The other connection vars will be initialized after SYN ACK */ + tcp_connection_timers_init (tc); + + tcp_send_syn (tc); + + tc->state = TCP_STATE_SYN_SENT; + + return tc->c_c_index; +} + +int +tcp_session_open_ip4 (ip46_address_t * addr, u16 port) +{ + return tcp_connection_open (addr, port, 1); +} + +int +tcp_session_open_ip6 (ip46_address_t * addr, u16 port) +{ + return tcp_connection_open (addr, port, 0); +} + +u8 * +format_tcp_session_ip4 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + tcp_connection_t *tc; + + tc = tcp_connection_get (tci, thread_index); + + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip4_address, + &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip4_address, &tc->c_rmt_ip4, + clib_net_to_host_u16 (tc->c_rmt_port)); + + return s; +} + +u8 * +format_tcp_session_ip6 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + tcp_connection_t *tc = tcp_connection_get (tci, thread_index); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_tcp_listener_session_ip4 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + tcp_connection_t *tc = tcp_listener_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip4_address, + &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip4_address, &tc->c_rmt_ip4, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_tcp_listener_session_ip6 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + tcp_connection_t *tc = tcp_listener_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_tcp_half_open_session_ip4 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + tcp_connection_t *tc = tcp_half_open_connection_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip4_address, + &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip4_address, &tc->c_rmt_ip4, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_tcp_half_open_session_ip6 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + tcp_connection_t *tc = tcp_half_open_connection_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +transport_connection_t * +tcp_session_get_transport (u32 conn_index, u32 thread_index) +{ + tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index); + return &tc->connection; +} + +transport_connection_t * +tcp_half_open_session_get_transport (u32 conn_index) +{ + tcp_connection_t *tc = tcp_half_open_connection_get (conn_index); + return &tc->connection; +} + +u16 +tcp_session_send_mss (transport_connection_t * trans_conn) +{ + tcp_connection_t *tc = (tcp_connection_t *) trans_conn; + return tc->snd_mss; +} + +u32 +tcp_session_send_space (transport_connection_t * trans_conn) +{ + tcp_connection_t *tc = (tcp_connection_t *) trans_conn; + return tcp_available_snd_space (tc); +} + +u32 +tcp_session_rx_fifo_offset (transport_connection_t * trans_conn) +{ + tcp_connection_t *tc = (tcp_connection_t *) trans_conn; + return (tc->snd_una_max - tc->snd_una); +} + +/* *INDENT-OFF* */ +const static transport_proto_vft_t tcp4_proto = { + .bind = tcp_session_bind_ip4, + .unbind = tcp_session_unbind_ip4, + .push_header = tcp_push_header, + .get_connection = tcp_session_get_transport, + .get_listener = tcp_session_get_listener, + .get_half_open = tcp_half_open_session_get_transport, + .open = tcp_session_open_ip4, + .close = tcp_session_close, + .cleanup = tcp_session_cleanup, + .send_mss = tcp_session_send_mss, + .send_space = tcp_session_send_space, + .rx_fifo_offset = tcp_session_rx_fifo_offset, + .format_connection = format_tcp_session_ip4, + .format_listener = format_tcp_listener_session_ip4, + .format_half_open = format_tcp_half_open_session_ip4 +}; + +const static transport_proto_vft_t tcp6_proto = { + .bind = tcp_session_bind_ip6, + .unbind = tcp_session_unbind_ip6, + .push_header = tcp_push_header, + .get_connection = tcp_session_get_transport, + .get_listener = tcp_session_get_listener, + .get_half_open = tcp_half_open_session_get_transport, + .open = tcp_session_open_ip6, + .close = tcp_session_close, + .cleanup = tcp_session_cleanup, + .send_mss = tcp_session_send_mss, + .send_space = tcp_session_send_space, + .rx_fifo_offset = tcp_session_rx_fifo_offset, + .format_connection = format_tcp_session_ip6, + .format_listener = format_tcp_listener_session_ip6, + .format_half_open = format_tcp_half_open_session_ip6 +}; +/* *INDENT-ON* */ + +void +tcp_timer_keep_handler (u32 conn_index) +{ + u32 cpu_index = os_get_cpu_number (); + tcp_connection_t *tc; + + tc = tcp_connection_get (conn_index, cpu_index); + tc->timers[TCP_TIMER_KEEP] = TCP_TIMER_HANDLE_INVALID; + + tcp_connection_close (tc); +} + +void +tcp_timer_establish_handler (u32 conn_index) +{ + tcp_connection_t *tc; + u8 sst; + + tc = tcp_half_open_connection_get (conn_index); + tc->timers[TCP_TIMER_ESTABLISH] = TCP_TIMER_HANDLE_INVALID; + + ASSERT (tc->state == TCP_STATE_SYN_SENT); + + sst = tc->c_is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP; + stream_session_connect_notify (&tc->connection, sst, 1 /* fail */ ); + + tcp_connection_cleanup (tc); +} + +void +tcp_timer_2msl_handler (u32 conn_index) +{ + u32 cpu_index = os_get_cpu_number (); + tcp_connection_t *tc; + + tc = tcp_connection_get (conn_index, cpu_index); + tc->timers[TCP_TIMER_2MSL] = TCP_TIMER_HANDLE_INVALID; + + tcp_connection_del (tc); +} + +/* *INDENT-OFF* */ +static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] = +{ + tcp_timer_retransmit_handler, + tcp_timer_delack_handler, + 0, + tcp_timer_keep_handler, + tcp_timer_2msl_handler, + tcp_timer_retransmit_syn_handler, + tcp_timer_establish_handler +}; +/* *INDENT-ON* */ + +static void +tcp_expired_timers_dispatch (u32 * expired_timers) +{ + int i; + u32 connection_index, timer_id; + + for (i = 0; i < vec_len (expired_timers); i++) + { + /* Get session index and timer id */ + connection_index = expired_timers[i] & 0x0FFFFFFF; + timer_id = expired_timers[i] >> 28; + + /* Handle expiration */ + (*timer_expiration_handlers[timer_id]) (connection_index); + } +} + +void +tcp_initialize_timer_wheels (tcp_main_t * tm) +{ + tw_timer_wheel_16t_2w_512sl_t *tw; + vec_foreach (tw, tm->timer_wheels) + { + tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch, + 100e-3 /* timer period 100ms */ , ~0); + tw->last_run_time = vlib_time_now (tm->vlib_main); + } +} + +clib_error_t * +tcp_init (vlib_main_t * vm) +{ + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi; + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_thread_main_t *vtm = vlib_get_thread_main (); + clib_error_t *error = 0; + u32 num_threads; + + tm->vlib_main = vm; + tm->vnet_main = vnet_get_main (); + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + if ((error = vlib_call_init_function (vm, ip4_lookup_init))) + return error; + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + /* + * Registrations + */ + + /* Register with IP */ + pi = ip_get_protocol_info (im, IP_PROTOCOL_TCP); + if (pi == 0) + return clib_error_return (0, "TCP protocol info AWOL"); + pi->format_header = format_tcp_header; + pi->unformat_pg_edit = unformat_pg_tcp_header; + + ip4_register_protocol (IP_PROTOCOL_TCP, tcp4_input_node.index); + + /* Register as transport with URI */ + session_register_transport (SESSION_TYPE_IP4_TCP, &tcp4_proto); + session_register_transport (SESSION_TYPE_IP6_TCP, &tcp6_proto); + + /* + * Initialize data structures + */ + + num_threads = 1 /* main thread */ + vtm->n_threads; + vec_validate (tm->connections, num_threads - 1); + + /* Initialize per worker thread tx buffers (used for control messages) */ + vec_validate (tm->tx_buffers, num_threads - 1); + + /* Initialize timer wheels */ + vec_validate (tm->timer_wheels, num_threads - 1); + tcp_initialize_timer_wheels (tm); + + vec_validate (tm->delack_connections, num_threads - 1); + + /* Initialize clocks per tick for TCP timestamp. Used to compute + * monotonically increasing timestamps. */ + tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock + / TCP_TSTAMP_RESOLUTION; + + clib_bihash_init_24_8 (&tm->local_endpoints_table, "local endpoint table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + + return error; +} + +VLIB_INIT_FUNCTION (tcp_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h new file mode 100644 index 00000000..22f00a63 --- /dev/null +++ b/src/vnet/tcp/tcp.h @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _vnet_tcp_h_ +#define _vnet_tcp_h_ + +#include +#include +#include +#include +#include +#include + +#define TCP_TICK 10e-3 /**< TCP tick period (s) */ +#define THZ 1/TCP_TICK /**< TCP tick frequency */ +#define TCP_TSTAMP_RESOLUTION TCP_TICK /**< Time stamp resolution */ +#define TCP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ /**< 24 days */ +#define TCP_MAX_OPTION_SPACE 40 + +#define TCP_DUPACK_THRESHOLD 3 +#define TCP_DEFAULT_RX_FIFO_SIZE 64 << 10 + +/** TCP FSM state definitions as per RFC793. */ +#define foreach_tcp_fsm_state \ + _(CLOSED, "CLOSED") \ + _(LISTEN, "LISTEN") \ + _(SYN_SENT, "SYN_SENT") \ + _(SYN_RCVD, "SYN_RCVD") \ + _(ESTABLISHED, "ESTABLISHED") \ + _(CLOSE_WAIT, "CLOSE_WAIT") \ + _(FIN_WAIT_1, "FIN_WAIT_1") \ + _(LAST_ACK, "LAST_ACK") \ + _(CLOSING, "CLOSING") \ + _(FIN_WAIT_2, "FIN_WAIT_2") \ + _(TIME_WAIT, "TIME_WAIT") + +typedef enum _tcp_state +{ +#define _(sym, str) TCP_STATE_##sym, + foreach_tcp_fsm_state +#undef _ + TCP_N_STATES +} tcp_state_t; + +format_function_t format_tcp_state; + +/** TCP timers */ +#define foreach_tcp_timer \ + _(RETRANSMIT, "RETRANSMIT") \ + _(DELACK, "DELAYED ACK") \ + _(PERSIST, "PERSIST") \ + _(KEEP, "KEEP") \ + _(2MSL, "2MSL") \ + _(RETRANSMIT_SYN, "RETRANSMIT_SYN") \ + _(ESTABLISH, "ESTABLISH") + +typedef enum _tcp_timers +{ +#define _(sym, str) TCP_TIMER_##sym, + foreach_tcp_timer +#undef _ + TCP_N_TIMERS +} tcp_timers_e; + +typedef void (timer_expiration_handler) (u32 index); + +extern timer_expiration_handler tcp_timer_delack_handler; +extern timer_expiration_handler tcp_timer_retransmit_handler; +extern timer_expiration_handler tcp_timer_retransmit_syn_handler; + +#define TCP_TIMER_HANDLE_INVALID ((u32) ~0) + +/* Timer delays as multiples of 100ms */ +#define TCP_TO_TIMER_TICK TCP_TICK*10 /* Period for converting from TCP + * ticks to timer units */ +#define TCP_DELACK_TIME 1 /* 0.1s */ +#define TCP_ESTABLISH_TIME 750 /* 75s */ +#define TCP_2MSL_TIME 300 /* 30s */ + +#define TCP_RTO_MAX 60 * THZ /* Min max RTO (60s) as per RFC6298 */ +#define TCP_RTT_MAX 30 * THZ /* 30s (probably too much) */ +#define TCP_RTO_SYN_RETRIES 3 /* SYN retries without doubling RTO */ +#define TCP_RTO_INIT 1 * THZ /* Initial retransmit timer */ + +void tcp_update_time (f64 now, u32 thread_index); + +/** TCP connection flags */ +#define foreach_tcp_connection_flag \ + _(DELACK, "Delay ACK") \ + _(SNDACK, "Send ACK") \ + _(BURSTACK, "Burst ACK set") \ + _(SENT_RCV_WND0, "Sent 0 receive window") \ + _(RECOVERY, "Recovery on") \ + _(FAST_RECOVERY, "Fast Recovery on") + +typedef enum _tcp_connection_flag_bits +{ +#define _(sym, str) TCP_CONN_##sym##_BIT, + foreach_tcp_connection_flag +#undef _ + TCP_CONN_N_FLAG_BITS +} tcp_connection_flag_bits_e; + +typedef enum _tcp_connection_flag +{ +#define _(sym, str) TCP_CONN_##sym = 1 << TCP_CONN_##sym##_BIT, + foreach_tcp_connection_flag +#undef _ + TCP_CONN_N_FLAGS +} tcp_connection_flags_e; + +/** TCP buffer flags */ +#define foreach_tcp_buf_flag \ + _ (ACK) /**< Sending ACK. */ \ + _ (DUPACK) /**< Sending DUPACK. */ \ + +enum +{ +#define _(f) TCP_BUF_BIT_##f, + foreach_tcp_buf_flag +#undef _ + TCP_N_BUF_BITS, +}; + +enum +{ +#define _(f) TCP_BUF_FLAG_##f = 1 << TCP_BUF_BIT_##f, + foreach_tcp_buf_flag +#undef _ +}; + +#define TCP_MAX_SACK_BLOCKS 5 /**< Max number of SACK blocks stored */ +#define TCP_INVALID_SACK_HOLE_INDEX ((u32)~0) + +typedef struct _sack_scoreboard_hole +{ + u32 next; /**< Index for next entry in linked list */ + u32 prev; /**< Index for previous entry in linked list */ + u32 start; /**< Start sequence number */ + u32 end; /**< End sequence number */ +} sack_scoreboard_hole_t; + +typedef struct _sack_scoreboard +{ + sack_scoreboard_hole_t *holes; /**< Pool of holes */ + u32 head; /**< Index to first entry */ + u32 sacked_bytes; /**< Number of bytes sacked in sb */ +} sack_scoreboard_t; + +typedef enum _tcp_cc_algorithm_type +{ + TCP_CC_NEWRENO, +} tcp_cc_algorithm_type_e; + +typedef struct _tcp_cc_algorithm tcp_cc_algorithm_t; + +typedef enum _tcp_cc_ack_t +{ + TCP_CC_ACK, + TCP_CC_DUPACK, + TCP_CC_PARTIALACK +} tcp_cc_ack_t; + +typedef struct _tcp_connection +{ + transport_connection_t connection; /**< Common transport data. First! */ + + u8 state; /**< TCP state as per tcp_state_t */ + u16 flags; /**< Connection flags (see tcp_conn_flags_e) */ + u32 timers[TCP_N_TIMERS]; /**< Timer handles into timer wheel */ + + /* TODO RFC4898 */ + + /** Send sequence variables RFC793 */ + u32 snd_una; /**< oldest unacknowledged sequence number */ + u32 snd_una_max; /**< newest unacknowledged sequence number + 1*/ + u32 snd_wnd; /**< send window */ + u32 snd_wl1; /**< seq number used for last snd.wnd update */ + u32 snd_wl2; /**< ack number used for last snd.wnd update */ + u32 snd_nxt; /**< next seq number to be sent */ + + /** Receive sequence variables RFC793 */ + u32 rcv_nxt; /**< next sequence number expected */ + u32 rcv_wnd; /**< receive window we expect */ + + u32 rcv_las; /**< rcv_nxt at last ack sent/rcv_wnd update */ + u32 iss; /**< initial sent sequence */ + u32 irs; /**< initial remote sequence */ + + /* Options */ + tcp_options_t opt; /**< TCP connection options parsed */ + u8 rcv_wscale; /**< Window scale to advertise to peer */ + u8 snd_wscale; /**< Window scale to use when sending */ + u32 tsval_recent; /**< Last timestamp received */ + u32 tsval_recent_age; /**< When last updated tstamp_recent*/ + + sack_block_t *snd_sacks; /**< Vector of SACKs to send. XXX Fixed size? */ + sack_scoreboard_t sack_sb; /**< SACK "scoreboard" that tracks holes */ + + u8 rcv_dupacks; /**< Number of DUPACKs received */ + u8 snt_dupacks; /**< Number of DUPACKs sent in a burst */ + + /* Congestion control */ + u32 cwnd; /**< Congestion window */ + u32 ssthresh; /**< Slow-start threshold */ + u32 prev_ssthresh; /**< ssthresh before congestion */ + u32 bytes_acked; /**< Bytes acknowledged by current segment */ + u32 rtx_bytes; /**< Retransmitted bytes */ + u32 tsecr_last_ack; /**< Timestamp echoed to us in last health ACK */ + tcp_cc_algorithm_t *cc_algo; /**< Congestion control algorithm */ + + /* RTT and RTO */ + u32 rto; /**< Retransmission timeout */ + u32 rto_boff; /**< Index for RTO backoff */ + u32 srtt; /**< Smoothed RTT */ + u32 rttvar; /**< Smoothed mean RTT difference. Approximates variance */ + u32 rtt_ts; /**< Timestamp for tracked ACK */ + u32 rtt_seq; /**< Sequence number for tracked ACK */ + + u16 snd_mss; /**< Send MSS */ +} tcp_connection_t; + +struct _tcp_cc_algorithm +{ + void (*rcv_ack) (tcp_connection_t * tc); + void (*rcv_cong_ack) (tcp_connection_t * tc, tcp_cc_ack_t ack); + void (*congestion) (tcp_connection_t * tc); + void (*recovered) (tcp_connection_t * tc); + void (*init) (tcp_connection_t * tc); +}; + +#define tcp_fastrecovery_on(tc) (tc)->flags |= TCP_CONN_FAST_RECOVERY +#define tcp_fastrecovery_off(tc) (tc)->flags &= ~TCP_CONN_FAST_RECOVERY +#define tcp_in_fastrecovery(tc) ((tc)->flags & TCP_CONN_FAST_RECOVERY) +#define tcp_in_recovery(tc) ((tc)->flags & (TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY)) +#define tcp_recovery_off(tc) ((tc)->flags &= ~(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY)) +#define tcp_in_slowstart(tc) (tc->cwnd < tc->ssthresh) + +typedef enum +{ + TCP_IP4, + TCP_IP6, + TCP_N_AF, +} tcp_af_t; + +typedef enum _tcp_error +{ +#define tcp_error(n,s) TCP_ERROR_##n, +#include +#undef tcp_error + TCP_N_ERROR, +} tcp_error_t; + +typedef struct _tcp_lookup_dispatch +{ + u8 next, error; +} tcp_lookup_dispatch_t; + +typedef struct _tcp_main +{ + /* Per-worker thread tcp connection pools */ + tcp_connection_t **connections; + + /* Pool of listeners. */ + tcp_connection_t *listener_pool; + + /** Dispatch table by state and flags */ + tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64]; + + u8 log2_tstamp_clocks_per_tick; + f64 tstamp_ticks_per_clock; + + /** per-worker tx buffer free lists */ + u32 **tx_buffers; + + /* Per worker-thread timer wheel for connections timers */ + tw_timer_wheel_16t_2w_512sl_t *timer_wheels; + + /* Convenience per worker-thread vector of connections to DELACK */ + u32 **delack_connections; + + /* Pool of half-open connections on which we've sent a SYN */ + tcp_connection_t *half_open_connections; + + /* Pool of local TCP endpoints */ + transport_endpoint_t *local_endpoints; + + /* Local endpoints lookup table */ + transport_endpoint_table_t local_endpoints_table; + + /* Congestion control algorithms registered */ + tcp_cc_algorithm_t *cc_algos; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + ip4_main_t *ip4_main; + ip6_main_t *ip6_main; +} tcp_main_t; + +extern tcp_main_t tcp_main; +extern vlib_node_registration_t tcp4_input_node; +extern vlib_node_registration_t tcp6_input_node; +extern vlib_node_registration_t tcp4_output_node; +extern vlib_node_registration_t tcp6_output_node; + +always_inline tcp_main_t * +vnet_get_tcp_main () +{ + return &tcp_main; +} + +always_inline tcp_connection_t * +tcp_connection_get (u32 conn_index, u32 thread_index) +{ + return pool_elt_at_index (tcp_main.connections[thread_index], conn_index); +} + +always_inline tcp_connection_t * +tcp_connection_get_if_valid (u32 conn_index, u32 thread_index) +{ + if (tcp_main.connections[thread_index] == 0) + return 0; + if (pool_is_free_index (tcp_main.connections[thread_index], conn_index)) + return 0; + return pool_elt_at_index (tcp_main.connections[thread_index], conn_index); +} + +void tcp_connection_close (tcp_connection_t * tc); +void tcp_connection_cleanup (tcp_connection_t * tc); +void tcp_connection_del (tcp_connection_t * tc); + +always_inline tcp_connection_t * +tcp_listener_get (u32 tli) +{ + return pool_elt_at_index (tcp_main.listener_pool, tli); +} + +always_inline tcp_connection_t * +tcp_half_open_connection_get (u32 conn_index) +{ + return pool_elt_at_index (tcp_main.half_open_connections, conn_index); +} + +void tcp_make_ack (tcp_connection_t * ts, vlib_buffer_t * b); +void tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b); +void tcp_make_synack (tcp_connection_t * ts, vlib_buffer_t * b); +void tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4); +void tcp_send_syn (tcp_connection_t * tc); +void tcp_send_fin (tcp_connection_t * tc); +void tcp_set_snd_mss (tcp_connection_t * tc); + +always_inline u32 +tcp_end_seq (tcp_header_t * th, u32 len) +{ + return th->seq_number + tcp_is_syn (th) + tcp_is_fin (th) + len; +} + +/* Modulo arithmetic for TCP sequence numbers */ +#define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0) +#define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0) +#define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0) +#define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0) + +/* Modulo arithmetic for timestamps */ +#define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0) +#define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0) + +always_inline u32 +tcp_flight_size (const tcp_connection_t * tc) +{ + return tc->snd_una_max - tc->snd_una - tc->sack_sb.sacked_bytes + + tc->rtx_bytes; +} + +/** + * Initial cwnd as per RFC5681 + */ +always_inline u32 +tcp_initial_cwnd (const tcp_connection_t * tc) +{ + if (tc->snd_mss > 2190) + return 2 * tc->snd_mss; + else if (tc->snd_mss > 1095) + return 3 * tc->snd_mss; + else + return 4 * tc->snd_mss; +} + +always_inline u32 +tcp_loss_wnd (const tcp_connection_t * tc) +{ + return tc->snd_mss; +} + +always_inline u32 +tcp_available_wnd (const tcp_connection_t * tc) +{ + return clib_min (tc->cwnd, tc->snd_wnd); +} + +always_inline u32 +tcp_available_snd_space (const tcp_connection_t * tc) +{ + u32 available_wnd = tcp_available_wnd (tc); + u32 flight_size = tcp_flight_size (tc); + + if (available_wnd <= flight_size) + return 0; + + return available_wnd - flight_size; +} + +void tcp_retransmit_first_unacked (tcp_connection_t * tc); + +void tcp_fast_retransmit (tcp_connection_t * tc); + +always_inline u32 +tcp_time_now (void) +{ + return clib_cpu_time_now () * tcp_main.tstamp_ticks_per_clock; +} + +u32 tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b); + +u32 +tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b, + u32 max_bytes); + +void tcp_connection_timers_init (tcp_connection_t * tc); +void tcp_connection_timers_reset (tcp_connection_t * tc); + +void tcp_connection_init_vars (tcp_connection_t * tc); + +always_inline void +tcp_connection_force_ack (tcp_connection_t * tc, vlib_buffer_t * b) +{ + /* Reset flags, make sure ack is sent */ + tc->flags = TCP_CONN_SNDACK; + vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK; +} + +always_inline void +tcp_timer_set (tcp_connection_t * tc, u8 timer_id, u32 interval) +{ + tc->timers[timer_id] + = tw_timer_start_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], + tc->c_c_index, timer_id, interval); +} + +always_inline void +tcp_retransmit_timer_set (tcp_main_t * tm, tcp_connection_t * tc) +{ + /* XXX Switch to faster TW */ + tcp_timer_set (tc, TCP_TIMER_RETRANSMIT, + clib_max (tc->rto * TCP_TO_TIMER_TICK, 1)); +} + +always_inline void +tcp_timer_reset (tcp_connection_t * tc, u8 timer_id) +{ + if (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID) + return; + + tw_timer_stop_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], + tc->timers[timer_id]); + tc->timers[timer_id] = TCP_TIMER_HANDLE_INVALID; +} + +always_inline void +tcp_timer_update (tcp_connection_t * tc, u8 timer_id, u32 interval) +{ + if (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID) + tw_timer_stop_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], + tc->timers[timer_id]); + tc->timers[timer_id] = + tw_timer_start_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], + tc->c_c_index, timer_id, interval); +} + +always_inline u8 +tcp_timer_is_active (tcp_connection_t * tc, tcp_timers_e timer) +{ + return tc->timers[timer] != TCP_TIMER_HANDLE_INVALID; +} + +void +scoreboard_remove_hole (sack_scoreboard_t * sb, + sack_scoreboard_hole_t * hole); + +always_inline sack_scoreboard_hole_t * +scoreboard_next_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole) +{ + if (hole->next != TCP_INVALID_SACK_HOLE_INDEX) + return pool_elt_at_index (sb->holes, hole->next); + return 0; +} + +always_inline sack_scoreboard_hole_t * +scoreboard_first_hole (sack_scoreboard_t * sb) +{ + if (sb->head != TCP_INVALID_SACK_HOLE_INDEX) + return pool_elt_at_index (sb->holes, sb->head); + return 0; +} + +always_inline void +scoreboard_clear (sack_scoreboard_t * sb) +{ + sack_scoreboard_hole_t *hole = scoreboard_first_hole (sb); + while ((hole = scoreboard_first_hole (sb))) + { + scoreboard_remove_hole (sb, hole); + } +} + +always_inline u32 +scoreboard_hole_bytes (sack_scoreboard_hole_t * hole) +{ + return hole->end - hole->start; +} + +always_inline void +tcp_cc_algo_register (tcp_cc_algorithm_type_e type, + const tcp_cc_algorithm_t * vft) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vec_validate (tm->cc_algos, type); + + tm->cc_algos[type] = *vft; +} + +always_inline tcp_cc_algorithm_t * +tcp_cc_algo_get (tcp_cc_algorithm_type_e type) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + return &tm->cc_algos[type]; +} + +void tcp_cc_init (tcp_connection_t * tc); + +/** + * Push TCP header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param sp_net - source port net order + * @param dp_net - destination port net order + * @param seq - sequence number net order + * @param ack - ack number net order + * @param tcp_hdr_opts_len - header and options length in bytes + * @param flags - header flags + * @param wnd - window size + * + * @return - pointer to start of TCP header + */ +always_inline void * +vlib_buffer_push_tcp_net_order (vlib_buffer_t * b, u16 sp, u16 dp, u32 seq, + u32 ack, u8 tcp_hdr_opts_len, u8 flags, + u16 wnd) +{ + tcp_header_t *th; + + th = vlib_buffer_push_uninit (b, tcp_hdr_opts_len); + + th->src_port = sp; + th->dst_port = dp; + th->seq_number = seq; + th->ack_number = ack; + th->data_offset_and_reserved = (tcp_hdr_opts_len >> 2) << 4; + th->flags = flags; + th->window = wnd; + th->checksum = 0; + th->urgent_pointer = 0; + return th; +} + +/** + * Push TCP header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param sp_net - source port net order + * @param dp_net - destination port net order + * @param seq - sequence number host order + * @param ack - ack number host order + * @param tcp_hdr_opts_len - header and options length in bytes + * @param flags - header flags + * @param wnd - window size + * + * @return - pointer to start of TCP header + */ +always_inline void * +vlib_buffer_push_tcp (vlib_buffer_t * b, u16 sp_net, u16 dp_net, u32 seq, + u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd) +{ + return vlib_buffer_push_tcp_net_order (b, sp_net, dp_net, + clib_host_to_net_u32 (seq), + clib_host_to_net_u32 (ack), + tcp_hdr_opts_len, flags, + clib_host_to_net_u16 (wnd)); +} + +#endif /* _vnet_tcp_h_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def new file mode 100644 index 00000000..cff5ec13 --- /dev/null +++ b/src/vnet/tcp/tcp_error.def @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +tcp_error (NONE, "no error") +tcp_error (NO_LISTENER, "no listener for dst port") +tcp_error (LOOKUP_DROPS, "lookup drops") +tcp_error (DISPATCH, "Dispatch error") +tcp_error (ENQUEUED, "Packets pushed into rx fifo") +tcp_error (PURE_ACK, "Pure acks") +tcp_error (SYNS_RCVD, "SYNs received") +tcp_error (SYN_ACKS_RCVD, "SYN-ACKs received") +tcp_error (NOT_READY, "Session not ready for packets") +tcp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space") +tcp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") +tcp_error (API_QUEUE_FULL, "Sessions not created for lack of API queue space") +tcp_error (CREATE_SESSION_FAIL, "Sessions couldn't be allocated") +tcp_error (SEGMENT_INVALID, "Invalid segment") +tcp_error (ACK_INVALID, "Invalid ACK") +tcp_error (ACK_DUP, "Duplicate ACK") +tcp_error (ACK_OLD, "Old ACK") +tcp_error (PKTS_SENT, "Packets sent") +tcp_error (FILTERED_DUPACKS, "Filtered duplicate ACKs") +tcp_error (RST_SENT, "Resets sent") \ No newline at end of file diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c new file mode 100644 index 00000000..7136741d --- /dev/null +++ b/src/vnet/tcp/tcp_format.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * tcp/tcp_format.c: tcp formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +static u8 * +format_tcp_flags (u8 * s, va_list * args) +{ + int flags = va_arg (*args, int); + +#define _(f) if (flags & TCP_FLAG_##f) s = format (s, "%s, ", #f); + foreach_tcp_flag +#undef _ + return s; +} + +/* Format TCP header. */ +u8 * +format_tcp_header (u8 * s, va_list * args) +{ + tcp_header_t *tcp = va_arg (*args, tcp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 header_bytes; + uword indent; + + /* Nothing to do. */ + if (max_header_bytes < sizeof (tcp[0])) + return format (s, "TCP header truncated"); + + indent = format_get_indent (s); + indent += 2; + header_bytes = tcp_header_bytes (tcp); + + s = format (s, "TCP: %d -> %d", clib_net_to_host_u16 (tcp->src), + clib_net_to_host_u16 (tcp->dst)); + + s = format (s, "\n%Useq. 0x%08x ack 0x%08x", format_white_space, indent, + clib_net_to_host_u32 (tcp->seq_number), + clib_net_to_host_u32 (tcp->ack_number)); + + s = format (s, "\n%Uflags %U, tcp header: %d bytes", format_white_space, + indent, format_tcp_flags, tcp->flags, header_bytes); + + s = format (s, "\n%Uwindow %d, checksum 0x%04x", format_white_space, indent, + clib_net_to_host_u16 (tcp->window), + clib_net_to_host_u16 (tcp->checksum)); + + +#if 0 + /* Format TCP options. */ + { + u8 *o; + u8 *option_start = (void *) (tcp + 1); + u8 *option_end = (void *) tcp + header_bytes; + + for (o = option_start; o < option_end;) + { + u32 length = o[1]; + switch (o[0]) + { + case TCP_OPTION_END: + length = 1; + o = option_end; + break; + + case TCP_OPTION_NOOP: + length = 1; + break; + + } + } + } +#endif + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ip_main_t *im = &ip_main; + tcp_udp_port_info_t *pi; + + pi = ip_get_tcp_udp_port_info (im, tcp->dst); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", format_white_space, indent - 2, + pi->format_header, + /* next protocol header */ (void *) tcp + header_bytes, + max_header_bytes - header_bytes); + } + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c new file mode 100644 index 00000000..daa0683b --- /dev/null +++ b/src/vnet/tcp/tcp_input.c @@ -0,0 +1,2316 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +static char *tcp_error_strings[] = { +#define tcp_error(n,s) s, +#include +#undef tcp_error +}; + +/* All TCP nodes have the same outgoing arcs */ +#define foreach_tcp_state_next \ + _ (DROP, "error-drop") \ + _ (TCP4_OUTPUT, "tcp4-output") \ + _ (TCP6_OUTPUT, "tcp6-output") + +typedef enum _tcp_established_next +{ +#define _(s,n) TCP_ESTABLISHED_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_ESTABLISHED_N_NEXT, +} tcp_established_next_t; + +typedef enum _tcp_rcv_process_next +{ +#define _(s,n) TCP_RCV_PROCESS_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_RCV_PROCESS_N_NEXT, +} tcp_rcv_process_next_t; + +typedef enum _tcp_syn_sent_next +{ +#define _(s,n) TCP_SYN_SENT_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_SYN_SENT_N_NEXT, +} tcp_syn_sent_next_t; + +typedef enum _tcp_listen_next +{ +#define _(s,n) TCP_LISTEN_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_LISTEN_N_NEXT, +} tcp_listen_next_t; + +/* Generic, state independent indices */ +typedef enum _tcp_state_next +{ +#define _(s,n) TCP_NEXT_##s, + foreach_tcp_state_next +#undef _ + TCP_STATE_N_NEXT, +} tcp_state_next_t; + +#define tcp_next_output(is_ip4) (is_ip4 ? TCP_NEXT_TCP4_OUTPUT \ + : TCP_NEXT_TCP6_OUTPUT) + +vlib_node_registration_t tcp4_established_node; +vlib_node_registration_t tcp6_established_node; + +/** + * Validate segment sequence number. As per RFC793: + * + * Segment Receive Test + * Length Window + * ------- ------- ------------------------------------------- + * 0 0 SEG.SEQ = RCV.NXT + * 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + * >0 0 not acceptable + * >0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + * or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND + * + * This ultimately consists in checking if segment falls within the window. + * The one important difference compared to RFC793 is that we use rcv_las, + * or the rcv_nxt at last ack sent instead of rcv_nxt since that's the + * peer's reference when computing our receive window. + * + * This accepts only segments within the window. + */ +always_inline u8 +tcp_segment_in_rcv_wnd (tcp_connection_t * tc, u32 seq, u32 end_seq) +{ + return seq_leq (end_seq, tc->rcv_las + tc->rcv_wnd) + && seq_geq (seq, tc->rcv_nxt); +} + +void +tcp_options_parse (tcp_header_t * th, tcp_options_t * to) +{ + const u8 *data; + u8 opt_len, opts_len, kind; + int j; + sack_block_t b; + + opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t); + data = (const u8 *) (th + 1); + + /* Zero out all flags but those set in SYN */ + to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE); + + for (; opts_len > 0; opts_len -= opt_len, data += opt_len) + { + kind = data[0]; + + /* Get options length */ + if (kind == TCP_OPTION_EOL) + break; + else if (kind == TCP_OPTION_NOOP) + opt_len = 1; + else + { + /* broken options */ + if (opts_len < 2) + break; + opt_len = data[1]; + + /* weird option length */ + if (opt_len < 2 || opt_len > opts_len) + break; + } + + /* Parse options */ + switch (kind) + { + case TCP_OPTION_MSS: + if ((opt_len == TCP_OPTION_LEN_MSS) && tcp_syn (th)) + { + to->flags |= TCP_OPTS_FLAG_MSS; + to->mss = clib_net_to_host_u16 (*(u16 *) (data + 2)); + } + break; + case TCP_OPTION_WINDOW_SCALE: + if ((opt_len == TCP_OPTION_LEN_WINDOW_SCALE) && tcp_syn (th)) + { + to->flags |= TCP_OPTS_FLAG_WSCALE; + to->wscale = data[2]; + if (to->wscale > TCP_MAX_WND_SCALE) + { + clib_warning ("Illegal window scaling value: %d", + to->wscale); + to->wscale = TCP_MAX_WND_SCALE; + } + } + break; + case TCP_OPTION_TIMESTAMP: + if (opt_len == TCP_OPTION_LEN_TIMESTAMP) + { + to->flags |= TCP_OPTS_FLAG_TSTAMP; + to->tsval = clib_net_to_host_u32 (*(u32 *) (data + 2)); + to->tsecr = clib_net_to_host_u32 (*(u32 *) (data + 6)); + } + break; + case TCP_OPTION_SACK_PERMITTED: + if (opt_len == TCP_OPTION_LEN_SACK_PERMITTED && tcp_syn (th)) + to->flags |= TCP_OPTS_FLAG_SACK_PERMITTED; + break; + case TCP_OPTION_SACK_BLOCK: + /* If SACK permitted was not advertised or a SYN, break */ + if ((to->flags & TCP_OPTS_FLAG_SACK_PERMITTED) == 0 || tcp_syn (th)) + break; + + /* If too short or not correctly formatted, break */ + if (opt_len < 10 || ((opt_len - 2) % TCP_OPTION_LEN_SACK_BLOCK)) + break; + + to->flags |= TCP_OPTS_FLAG_SACK; + to->n_sack_blocks = (opt_len - 2) / TCP_OPTION_LEN_SACK_BLOCK; + vec_reset_length (to->sacks); + for (j = 0; j < to->n_sack_blocks; j++) + { + b.start = clib_net_to_host_u32 (*(u32 *) (data + 2 + 4 * j)); + b.end = clib_net_to_host_u32 (*(u32 *) (data + 6 + 4 * j)); + vec_add1 (to->sacks, b); + } + break; + default: + /* Nothing to see here */ + continue; + } + } +} + +always_inline int +tcp_segment_check_paws (tcp_connection_t * tc) +{ + /* XXX normally test for timestamp should be lt instead of leq, but for + * local testing this is not enough */ + return tcp_opts_tstamp (&tc->opt) && tc->tsval_recent + && timestamp_lt (tc->opt.tsval, tc->tsval_recent); +} + +/** + * Validate incoming segment as per RFC793 p. 69 and RFC1323 p. 19 + * + * It first verifies if segment has a wrapped sequence number (PAWS) and then + * does the processing associated to the first four steps (ignoring security + * and precedence): sequence number, rst bit and syn bit checks. + * + * @return 0 if segments passes validation. + */ +static int +tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0, + vlib_buffer_t * b0, tcp_header_t * th0, u32 * next0) +{ + u8 paws_failed; + + if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0))) + return -1; + + tcp_options_parse (th0, &tc0->opt); + + /* RFC1323: Check against wrapped sequence numbers (PAWS). If we have + * timestamp to echo and it's less than tsval_recent, drop segment + * but still send an ACK in order to retain TCP's mechanism for detecting + * and recovering from half-open connections */ + paws_failed = tcp_segment_check_paws (tc0); + if (paws_failed) + { + clib_warning ("paws failed"); + + /* If it just so happens that a segment updates tsval_recent for a + * segment over 24 days old, invalidate tsval_recent. */ + if (timestamp_lt (tc0->tsval_recent_age + TCP_PAWS_IDLE, + tcp_time_now ())) + { + /* Age isn't reset until we get a valid tsval (bsd inspired) */ + tc0->tsval_recent = 0; + } + else + { + /* Drop after ack if not rst */ + if (!tcp_rst (th0)) + { + tcp_make_ack (tc0, b0); + *next0 = tcp_next_output (tc0->c_is_ip4); + return -1; + } + } + } + + /* 1st: check sequence number */ + if (!tcp_segment_in_rcv_wnd (tc0, vnet_buffer (b0)->tcp.seq_number, + vnet_buffer (b0)->tcp.seq_end)) + { + if (!tcp_rst (th0)) + { + /* Send dup ack */ + tcp_make_ack (tc0, b0); + *next0 = tcp_next_output (tc0->c_is_ip4); + } + return -1; + } + + /* 2nd: check the RST bit */ + if (tcp_rst (th0)) + { + /* Notify session that connection has been reset. Switch + * state to closed and await for session to do the cleanup. */ + stream_session_reset_notify (&tc0->connection); + tc0->state = TCP_STATE_CLOSED; + return -1; + } + + /* 3rd: check security and precedence (skip) */ + + /* 4th: check the SYN bit */ + if (tcp_syn (th0)) + { + tcp_send_reset (b0, tc0->c_is_ip4); + return -1; + } + + /* If PAWS passed and segment in window, save timestamp */ + if (!paws_failed) + { + tc0->tsval_recent = tc0->opt.tsval; + tc0->tsval_recent_age = tcp_time_now (); + } + + return 0; +} + +always_inline int +tcp_rcv_ack_is_acceptable (tcp_connection_t * tc0, vlib_buffer_t * tb0) +{ + /* SND.UNA =< SEG.ACK =< SND.NXT */ + return (seq_leq (tc0->snd_una, vnet_buffer (tb0)->tcp.ack_number) + && seq_leq (vnet_buffer (tb0)->tcp.ack_number, tc0->snd_nxt)); +} + +/** + * Compute smoothed RTT as per VJ's '88 SIGCOMM and RFC6298 + * + * Note that although the original article, srtt and rttvar are scaled + * to minimize round-off errors, here we don't. Instead, we rely on + * better precision time measurements. + * + * TODO support us rtt resolution + */ +static void +tcp_estimate_rtt (tcp_connection_t * tc, u32 mrtt) +{ + int err; + + if (tc->srtt != 0) + { + err = mrtt - tc->srtt; + tc->srtt += err >> 3; + + /* XXX Drop in RTT results in RTTVAR increase and bigger RTO. + * The increase should be bound */ + tc->rttvar += (clib_abs (err) - tc->rttvar) >> 2; + } + else + { + /* First measurement. */ + tc->srtt = mrtt; + tc->rttvar = mrtt << 1; + } +} + +/** Update RTT estimate and RTO timer + * + * Measure RTT: We have two sources of RTT measurements: TSOPT and ACK + * timing. Middle boxes are known to fiddle with TCP options so we + * should give higher priority to ACK timing. + * + * return 1 if valid rtt 0 otherwise + */ +static int +tcp_update_rtt (tcp_connection_t * tc, u32 ack) +{ + u32 mrtt = 0; + + /* Karn's rule, part 1. Don't use retransmitted segments to estimate + * RTT because they're ambiguous. */ + if (tc->rtt_seq && seq_gt (ack, tc->rtt_seq) && !tc->rto_boff) + { + mrtt = tcp_time_now () - tc->rtt_ts; + tc->rtt_seq = 0; + } + + /* As per RFC7323 TSecr can be used for RTTM only if the segment advances + * snd_una, i.e., the left side of the send window: + * seq_lt (tc->snd_una, ack). Note: last condition could be dropped, we don't + * try to update rtt for dupacks */ + else if (tcp_opts_tstamp (&tc->opt) && tc->opt.tsecr && tc->bytes_acked) + { + mrtt = tcp_time_now () - tc->opt.tsecr; + } + + /* Ignore dubious measurements */ + if (mrtt == 0 || mrtt > TCP_RTT_MAX) + return 0; + + tcp_estimate_rtt (tc, mrtt); + + tc->rto = clib_min (tc->srtt + (tc->rttvar << 2), TCP_RTO_MAX); + + return 1; +} + +/** + * Dequeue bytes that have been acked and while at it update RTT estimates. + */ +static void +tcp_dequeue_acked (tcp_connection_t * tc, u32 ack) +{ + /* Dequeue the newly ACKed bytes */ + stream_session_dequeue_drop (&tc->connection, tc->bytes_acked); + + /* Update rtt and rto */ + if (tcp_update_rtt (tc, ack)) + { + /* Good ACK received and valid RTT, make sure retransmit backoff is 0 */ + tc->rto_boff = 0; + } +} + +/** Check if dupack as per RFC5681 Sec. 2 */ +always_inline u8 +tcp_ack_is_dupack (tcp_connection_t * tc, vlib_buffer_t * b, u32 new_snd_wnd) +{ + return ((vnet_buffer (b)->tcp.ack_number == tc->snd_una) + && seq_gt (tc->snd_una_max, tc->snd_una) + && (vnet_buffer (b)->tcp.seq_end == vnet_buffer (b)->tcp.seq_number) + && (new_snd_wnd == tc->snd_wnd)); +} + +void +scoreboard_remove_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole) +{ + sack_scoreboard_hole_t *next, *prev; + + if (hole->next != TCP_INVALID_SACK_HOLE_INDEX) + { + next = pool_elt_at_index (sb->holes, hole->next); + next->prev = hole->prev; + } + + if (hole->prev != TCP_INVALID_SACK_HOLE_INDEX) + { + prev = pool_elt_at_index (sb->holes, hole->prev); + prev->next = hole->next; + } + else + { + sb->head = hole->next; + } + + pool_put (sb->holes, hole); +} + +sack_scoreboard_hole_t * +scoreboard_insert_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * prev, + u32 start, u32 end) +{ + sack_scoreboard_hole_t *hole, *next; + u32 hole_index; + + pool_get (sb->holes, hole); + memset (hole, 0, sizeof (*hole)); + + hole->start = start; + hole->end = end; + hole_index = hole - sb->holes; + + if (prev) + { + hole->prev = prev - sb->holes; + hole->next = prev->next; + + if ((next = scoreboard_next_hole (sb, hole))) + next->prev = hole_index; + + prev->next = hole_index; + } + else + { + sb->head = hole_index; + hole->prev = TCP_INVALID_SACK_HOLE_INDEX; + hole->next = TCP_INVALID_SACK_HOLE_INDEX; + } + + return hole; +} + +static void +tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) +{ + sack_scoreboard_t *sb = &tc->sack_sb; + sack_block_t *blk, tmp; + sack_scoreboard_hole_t *hole, *next_hole; + u32 blk_index = 0; + int i, j; + + if (!tcp_opts_sack (tc) && sb->head == TCP_INVALID_SACK_HOLE_INDEX) + return; + + /* Remove invalid blocks */ + vec_foreach (blk, tc->opt.sacks) + { + if (seq_lt (blk->start, blk->end) + && seq_gt (blk->start, tc->snd_una) + && seq_gt (blk->start, ack) && seq_lt (blk->end, tc->snd_nxt)) + continue; + + vec_del1 (tc->opt.sacks, blk - tc->opt.sacks); + } + + /* Add block for cumulative ack */ + if (seq_gt (ack, tc->snd_una)) + { + tmp.start = tc->snd_una; + tmp.end = ack; + vec_add1 (tc->opt.sacks, tmp); + } + + if (vec_len (tc->opt.sacks) == 0) + return; + + /* Make sure blocks are ordered */ + for (i = 0; i < vec_len (tc->opt.sacks); i++) + for (j = i; j < vec_len (tc->opt.sacks); j++) + if (seq_lt (tc->opt.sacks[j].start, tc->opt.sacks[i].start)) + { + tmp = tc->opt.sacks[i]; + tc->opt.sacks[i] = tc->opt.sacks[j]; + tc->opt.sacks[j] = tmp; + } + + /* If no holes, insert the first that covers all outstanding bytes */ + if (sb->head == TCP_INVALID_SACK_HOLE_INDEX) + { + scoreboard_insert_hole (sb, 0, tc->snd_una, tc->snd_una_max); + } + + /* Walk the holes with the SACK blocks */ + hole = pool_elt_at_index (sb->holes, sb->head); + while (hole && blk_index < vec_len (tc->opt.sacks)) + { + blk = &tc->opt.sacks[blk_index]; + + if (seq_leq (blk->start, hole->start)) + { + /* Block covers hole. Remove hole */ + if (seq_geq (blk->end, hole->end)) + { + next_hole = scoreboard_next_hole (sb, hole); + + /* Byte accounting */ + if (seq_lt (hole->end, ack)) + { + /* Bytes lost because snd wnd left edge advances */ + if (seq_lt (next_hole->start, ack)) + sb->sacked_bytes -= next_hole->start - hole->end; + else + sb->sacked_bytes -= ack - hole->end; + } + else + { + sb->sacked_bytes += scoreboard_hole_bytes (hole); + } + + scoreboard_remove_hole (sb, hole); + hole = next_hole; + } + /* Partial overlap */ + else + { + sb->sacked_bytes += blk->end - hole->start; + hole->start = blk->end; + blk_index++; + } + } + else + { + /* Hole must be split */ + if (seq_leq (blk->end, hole->end)) + { + sb->sacked_bytes += blk->end - blk->start; + scoreboard_insert_hole (sb, hole, blk->end, hole->end); + hole->end = blk->start - 1; + blk_index++; + } + else + { + sb->sacked_bytes += hole->end - blk->start + 1; + hole->end = blk->start - 1; + hole = scoreboard_next_hole (sb, hole); + } + } + } +} + +/** Update snd_wnd + * + * If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and SND.WL2 =< SEG.ACK)), set + * SND.WND <- SEG.WND, set SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK */ +static void +tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd) +{ + if (tc->snd_wl1 < seq || (tc->snd_wl1 == seq && tc->snd_wl2 <= ack)) + { + tc->snd_wnd = snd_wnd; + tc->snd_wl1 = seq; + tc->snd_wl2 = ack; + } +} + +static void +tcp_cc_congestion (tcp_connection_t * tc) +{ + tc->cc_algo->congestion (tc); +} + +static void +tcp_cc_recover (tcp_connection_t * tc) +{ + if (tcp_in_fastrecovery (tc)) + { + tc->cc_algo->recovered (tc); + tcp_recovery_off (tc); + } + else if (tcp_in_recovery (tc)) + { + tcp_recovery_off (tc); + tc->cwnd = tcp_loss_wnd (tc); + } +} + +static void +tcp_cc_rcv_ack (tcp_connection_t * tc) +{ + u8 partial_ack; + + if (tcp_in_recovery (tc)) + { + partial_ack = seq_lt (tc->snd_una, tc->snd_una_max); + if (!partial_ack) + { + /* Clear retransmitted bytes. */ + tc->rtx_bytes = 0; + tcp_cc_recover (tc); + } + else + { + /* Clear retransmitted bytes. XXX should we clear all? */ + tc->rtx_bytes = 0; + tc->cc_algo->rcv_cong_ack (tc, TCP_CC_PARTIALACK); + + /* Retransmit first unacked segment */ + tcp_retransmit_first_unacked (tc); + } + } + else + { + tc->cc_algo->rcv_ack (tc); + } + + tc->rcv_dupacks = 0; + tc->tsecr_last_ack = tc->opt.tsecr; +} + +static void +tcp_cc_rcv_dupack (tcp_connection_t * tc, u32 ack) +{ + ASSERT (tc->snd_una == ack); + + tc->rcv_dupacks++; + if (tc->rcv_dupacks == TCP_DUPACK_THRESHOLD) + { + /* RFC6582 NewReno heuristic to avoid multiple fast retransmits */ + if (tc->opt.tsecr != tc->tsecr_last_ack) + { + tc->rcv_dupacks = 0; + return; + } + + tcp_fastrecovery_on (tc); + + /* Handle congestion and dupack */ + tcp_cc_congestion (tc); + tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK); + + tcp_fast_retransmit (tc); + + /* Post retransmit update cwnd to ssthresh and account for the + * three segments that have left the network and should've been + * buffered at the receiver */ + tc->cwnd = tc->ssthresh + TCP_DUPACK_THRESHOLD * tc->snd_mss; + } + else if (tc->rcv_dupacks > TCP_DUPACK_THRESHOLD) + { + ASSERT (tcp_in_fastrecovery (tc)); + + tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK); + } +} + +void +tcp_cc_init (tcp_connection_t * tc) +{ + tc->cc_algo = tcp_cc_algo_get (TCP_CC_NEWRENO); + tc->cc_algo->init (tc); +} + +static int +tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b, + tcp_header_t * th, u32 * next, u32 * error) +{ + u32 new_snd_wnd; + + /* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) then send an + * ACK, drop the segment, and return */ + if (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)) + { + tcp_make_ack (tc, b); + *next = tcp_next_output (tc->c_is_ip4); + *error = TCP_ERROR_ACK_INVALID; + return -1; + } + + /* If old ACK, discard */ + if (seq_lt (vnet_buffer (b)->tcp.ack_number, tc->snd_una)) + { + *error = TCP_ERROR_ACK_OLD; + return -1; + } + + if (tcp_opts_sack_permitted (&tc->opt)) + tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number); + + new_snd_wnd = clib_net_to_host_u32 (th->window) << tc->snd_wscale; + + if (tcp_ack_is_dupack (tc, b, new_snd_wnd)) + { + tcp_cc_rcv_dupack (tc, vnet_buffer (b)->tcp.ack_number); + *error = TCP_ERROR_ACK_DUP; + return -1; + } + + /* Valid ACK */ + tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una; + tc->snd_una = vnet_buffer (b)->tcp.ack_number; + + /* Dequeue ACKed packet and update RTT */ + tcp_dequeue_acked (tc, vnet_buffer (b)->tcp.ack_number); + + tcp_update_snd_wnd (tc, vnet_buffer (b)->tcp.seq_number, + vnet_buffer (b)->tcp.ack_number, new_snd_wnd); + + /* Updates congestion control (slow start/congestion avoidance) */ + tcp_cc_rcv_ack (tc); + + /* If everything has been acked, stop retransmit timer + * otherwise update */ + if (tc->snd_una == tc->snd_una_max) + tcp_timer_reset (tc, TCP_TIMER_RETRANSMIT); + else + tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, tc->rto); + + return 0; +} + +/** + * Build SACK list as per RFC2018. + * + * Makes sure the first block contains the segment that generated the current + * ACK and the following ones are the ones most recently reported in SACK + * blocks. + * + * @param tc TCP connection for which the SACK list is updated + * @param start Start sequence number of the newest SACK block + * @param end End sequence of the newest SACK block + */ +static void +tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end) +{ + sack_block_t *new_list = 0, block; + u32 n_elts; + int i; + u8 new_head = 0; + + /* If the first segment is ooo add it to the list. Last write might've moved + * rcv_nxt over the first segment. */ + if (seq_lt (tc->rcv_nxt, start)) + { + block.start = start; + block.end = end; + vec_add1 (new_list, block); + new_head = 1; + } + + /* Find the blocks still worth keeping. */ + for (i = 0; i < vec_len (tc->snd_sacks); i++) + { + /* Discard if: + * 1) rcv_nxt advanced beyond current block OR + * 2) Segment overlapped by the first segment, i.e., it has been merged + * into it.*/ + if (seq_leq (tc->snd_sacks[i].start, tc->rcv_nxt) + || seq_leq (tc->snd_sacks[i].start, end)) + continue; + + /* Save subsequent segments to new SACK list. */ + n_elts = clib_min (vec_len (tc->snd_sacks) - i, + TCP_MAX_SACK_BLOCKS - new_head); + vec_insert_elts (new_list, &tc->snd_sacks[i], n_elts, new_head); + break; + } + + /* Replace old vector with new one */ + vec_free (tc->snd_sacks); + tc->snd_sacks = new_list; +} + +/** Enqueue data for delivery to application */ +always_inline u32 +tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b, + u16 data_len) +{ + int written; + + /* Pure ACK. Update rcv_nxt and be done. */ + if (PREDICT_FALSE (data_len == 0)) + { + tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end; + return TCP_ERROR_PURE_ACK; + } + + written = stream_session_enqueue_data (&tc->connection, + vlib_buffer_get_current (b), + data_len, 1 /* queue event */ ); + + /* Update rcv_nxt */ + if (PREDICT_TRUE (written == data_len)) + { + tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end; + } + /* If more data written than expected, account for out-of-order bytes. */ + else if (written > data_len) + { + tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end + written - data_len; + + /* Send ACK confirming the update */ + tc->flags |= TCP_CONN_SNDACK; + + /* Update SACK list if need be */ + if (tcp_opts_sack_permitted (&tc->opt)) + { + /* Remove SACK blocks that have been delivered */ + tcp_update_sack_list (tc, tc->rcv_nxt, tc->rcv_nxt); + } + } + else + { + ASSERT (0); + return TCP_ERROR_FIFO_FULL; + } + + return TCP_ERROR_ENQUEUED; +} + +/** Enqueue out-of-order data */ +always_inline u32 +tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b, + u16 data_len) +{ + stream_session_t *s0; + u32 offset, seq; + + s0 = stream_session_get (tc->c_s_index, tc->c_thread_index); + seq = vnet_buffer (b)->tcp.seq_number; + offset = seq - tc->rcv_nxt; + + if (svm_fifo_enqueue_with_offset (s0->server_rx_fifo, s0->pid, offset, + data_len, vlib_buffer_get_current (b))) + return TCP_ERROR_FIFO_FULL; + + /* Update SACK list if in use */ + if (tcp_opts_sack_permitted (&tc->opt)) + { + ooo_segment_t *newest; + u32 start, end; + + /* Get the newest segment from the fifo */ + newest = svm_fifo_newest_ooo_segment (s0->server_rx_fifo); + start = tc->rcv_nxt + ooo_segment_offset (s0->server_rx_fifo, newest); + end = tc->rcv_nxt + ooo_segment_end_offset (s0->server_rx_fifo, newest); + + tcp_update_sack_list (tc, start, end); + } + + return TCP_ERROR_ENQUEUED; +} + +/** + * Check if ACK could be delayed. DELACK timer is set only after frame is + * processed so this can return true for a full bursts of packets. + */ +always_inline int +tcp_can_delack (tcp_connection_t * tc) +{ + /* If there's no DELACK timer set and the last window sent wasn't 0 we + * can safely delay. */ + if (!tcp_timer_is_active (tc, TCP_TIMER_DELACK) + && (tc->flags & TCP_CONN_SENT_RCV_WND0) == 0 + && (tc->flags & TCP_CONN_SNDACK) == 0) + return 1; + + return 0; +} + +static int +tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b, + u16 n_data_bytes, u32 * next0) +{ + u32 error = 0; + + /* Handle out-of-order data */ + if (PREDICT_FALSE (vnet_buffer (b)->tcp.seq_number != tc->rcv_nxt)) + { + error = tcp_session_enqueue_ooo (tc, b, n_data_bytes); + + /* Don't send more than 3 dupacks per burst + * XXX decide if this is good */ + if (tc->snt_dupacks < 3) + { + /* RFC2581: Send DUPACK for fast retransmit */ + tcp_make_ack (tc, b); + *next0 = tcp_next_output (tc->c_is_ip4); + + /* Mark as DUPACK. We may filter these in output if + * the burst fills the holes. */ + vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_DUPACK; + + tc->snt_dupacks++; + } + + goto done; + } + + /* In order data, enqueue. Fifo figures out by itself if any out-of-order + * segments can be enqueued after fifo tail offset changes. */ + error = tcp_session_enqueue_data (tc, b, n_data_bytes); + + /* Check if ACK can be delayed */ + if (tcp_can_delack (tc)) + { + /* Nothing to do for pure ACKs */ + if (n_data_bytes == 0) + goto done; + + /* If connection has not been previously marked for delay ack + * add it to the list and flag it */ + if (!tc->flags & TCP_CONN_DELACK) + { + vec_add1 (tm->delack_connections[tc->c_thread_index], + tc->c_c_index); + tc->flags |= TCP_CONN_DELACK; + } + } + else + { + /* Check if a packet has already been enqueued to output for burst. + * If yes, then drop this one, otherwise, let it pass through to + * output */ + if ((tc->flags & TCP_CONN_BURSTACK) == 0) + { + *next0 = tcp_next_output (tc->c_is_ip4); + tcp_make_ack (tc, b); + error = TCP_ERROR_ENQUEUED; + + /* TODO: maybe add counter to ensure N acks will be sent/burst */ + tc->flags |= TCP_CONN_BURSTACK; + } + } + +done: + return error; +} + +void +delack_timers_init (tcp_main_t * tm, u32 thread_index) +{ + tcp_connection_t *tc; + u32 i, *conns; + tw_timer_wheel_16t_2w_512sl_t *tw; + + tw = &tm->timer_wheels[thread_index]; + conns = tm->delack_connections[thread_index]; + for (i = 0; i < vec_len (conns); i++) + { + tc = pool_elt_at_index (tm->connections[thread_index], conns[i]); + ASSERT (0 != tc); + + tc->timers[TCP_TIMER_DELACK] + = tw_timer_start_16t_2w_512sl (tw, conns[i], + TCP_TIMER_DELACK, TCP_DELACK_TIME); + } + vec_reset_length (tm->delack_connections[thread_index]); +} + +always_inline uword +tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index, errors = 0; + tcp_main_t *tm = vnet_get_tcp_main (); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_header_t *th0 = 0; + tcp_connection_t *tc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 n_advance_bytes0, n_data_bytes0; + u32 next0 = TCP_ESTABLISHED_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, + my_thread_index); + + /* Checksum computed by ipx_local no need to compute again */ + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + th0 = ip4_next_header (ip40); + n_advance_bytes0 = (ip4_header_bytes (ip40) + + tcp_header_bytes (th0)); + n_data_bytes0 = clib_net_to_host_u16 (ip40->length) + - n_advance_bytes0; + } + else + { + ip60 = vlib_buffer_get_current (b0); + th0 = ip6_next_header (ip60); + n_advance_bytes0 = tcp_header_bytes (th0); + n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) + - n_advance_bytes0; + n_advance_bytes0 += sizeof (ip60[0]); + } + + /* SYNs, FINs and data consume sequence numbers */ + vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number + + tcp_is_syn (th0) + tcp_is_fin (th0) + n_data_bytes0; + + /* TODO header prediction fast path */ + + /* 1-4: check SEQ, RST, SYN */ + if (PREDICT_FALSE (tcp_segment_validate (vm, tc0, b0, th0, &next0))) + { + error0 = TCP_ERROR_SEGMENT_INVALID; + goto drop; + } + + /* 5: check the ACK field */ + if (tcp_rcv_ack (tc0, b0, th0, &next0, &error0)) + { + goto drop; + } + + /* 6: check the URG bit TODO */ + + /* 7: process the segment text */ + vlib_buffer_advance (b0, n_advance_bytes0); + error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0); + + /* 8: check the FIN bit */ + if (tcp_fin (th0)) + { + /* Send ACK and enter CLOSE-WAIT */ + tcp_make_ack (tc0, b0); + tcp_connection_force_ack (tc0, b0); + next0 = tcp_next_output (tc0->c_is_ip4); + tc0->state = TCP_STATE_CLOSE_WAIT; + stream_session_disconnect_notify (&tc0->connection); + } + + drop: + b0->error = node->errors[error0]; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + errors = session_manager_flush_enqueue_events (my_thread_index); + if (errors) + { + if (is_ip4) + vlib_node_increment_counter (vm, tcp4_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + else + vlib_node_increment_counter (vm, tcp6_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + } + + delack_timers_init (tm, my_thread_index); + + return from_frame->n_vectors; +} + +static uword +tcp4_established (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_established_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_established (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_established_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_established_node) = +{ + .function = tcp4_established, + .name = "tcp4-established", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR,.error_strings = tcp_error_strings, + .n_next_nodes = TCP_ESTABLISHED_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_established_node, tcp4_established); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_established_node) = +{ + .function = tcp6_established, + .name = "tcp6-established", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_ESTABLISHED_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_established_node, tcp6_established); + +vlib_node_registration_t tcp4_syn_sent_node; +vlib_node_registration_t tcp6_syn_sent_node; + +always_inline uword +tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index, errors = 0; + u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, ack0, seq0; + vlib_buffer_t *b0; + tcp_header_t *tcp0 = 0; + tcp_connection_t *tc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 n_advance_bytes0, n_data_bytes0; + tcp_connection_t *new_tc0; + u32 next0 = TCP_SYN_SENT_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = + tcp_half_open_connection_get (vnet_buffer (b0)-> + tcp.connection_index); + + ack0 = vnet_buffer (b0)->tcp.ack_number; + seq0 = vnet_buffer (b0)->tcp.seq_number; + + /* Checksum computed by ipx_local no need to compute again */ + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + tcp0 = ip4_next_header (ip40); + n_advance_bytes0 = (ip4_header_bytes (ip40) + + tcp_header_bytes (tcp0)); + n_data_bytes0 = clib_net_to_host_u16 (ip40->length) + - n_advance_bytes0; + } + else + { + ip60 = vlib_buffer_get_current (b0); + tcp0 = ip6_next_header (ip60); + n_advance_bytes0 = tcp_header_bytes (tcp0); + n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) + - n_advance_bytes0; + n_advance_bytes0 += sizeof (ip60[0]); + } + + if (PREDICT_FALSE + (!tcp_ack (tcp0) && !tcp_rst (tcp0) && !tcp_syn (tcp0))) + goto drop; + + /* SYNs, FINs and data consume sequence numbers */ + vnet_buffer (b0)->tcp.seq_end = seq0 + tcp_is_syn (tcp0) + + tcp_is_fin (tcp0) + n_data_bytes0; + + /* + * 1. check the ACK bit + */ + + /* + * If the ACK bit is set + * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send a reset (unless + * the RST bit is set, if so drop the segment and return) + * + * and discard the segment. Return. + * If SND.UNA =< SEG.ACK =< SND.NXT then the ACK is acceptable. + */ + if (tcp_ack (tcp0)) + { + if (ack0 <= tc0->iss || ack0 > tc0->snd_nxt) + { + if (!tcp_rst (tcp0)) + tcp_send_reset (b0, is_ip4); + + goto drop; + } + + /* Make sure ACK is valid */ + if (tc0->snd_una > ack0) + goto drop; + } + + /* + * 2. check the RST bit + */ + + if (tcp_rst (tcp0)) + { + /* If ACK is acceptable, signal client that peer is not + * willing to accept connection and drop connection*/ + if (tcp_ack (tcp0)) + { + stream_session_connect_notify (&tc0->connection, sst, + 1 /* fail */ ); + tcp_connection_cleanup (tc0); + } + goto drop; + } + + /* + * 3. check the security and precedence (skipped) + */ + + /* + * 4. check the SYN bit + */ + + /* No SYN flag. Drop. */ + if (!tcp_syn (tcp0)) + goto drop; + + /* Stop connection establishment and retransmit timers */ + tcp_timer_reset (tc0, TCP_TIMER_ESTABLISH); + tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN); + + /* Valid SYN or SYN-ACK. Move connection from half-open pool to + * current thread pool. */ + pool_get (tm->connections[my_thread_index], new_tc0); + clib_memcpy (new_tc0, tc0, sizeof (*new_tc0)); + + new_tc0->c_thread_index = my_thread_index; + + /* Cleanup half-open connection XXX lock */ + pool_put (tm->half_open_connections, tc0); + + new_tc0->rcv_nxt = vnet_buffer (b0)->tcp.seq_end; + new_tc0->irs = seq0; + + /* Parse options */ + tcp_options_parse (tcp0, &new_tc0->opt); + tcp_connection_init_vars (new_tc0); + + if (tcp_opts_tstamp (&new_tc0->opt)) + { + new_tc0->tsval_recent = new_tc0->opt.tsval; + new_tc0->tsval_recent_age = tcp_time_now (); + } + + if (tcp_opts_wscale (&new_tc0->opt)) + new_tc0->snd_wscale = new_tc0->opt.wscale; + + new_tc0->snd_wnd = clib_net_to_host_u32 (tcp0->window) + << new_tc0->snd_wscale; + new_tc0->snd_wl1 = seq0; + new_tc0->snd_wl2 = ack0; + + /* SYN-ACK: See if we can switch to ESTABLISHED state */ + if (tcp_ack (tcp0)) + { + /* Our SYN is ACKed: we have iss < ack = snd_una */ + + /* TODO Dequeue acknowledged segments if we support Fast Open */ + new_tc0->snd_una = ack0; + new_tc0->state = TCP_STATE_ESTABLISHED; + + /* Notify app that we have connection */ + stream_session_connect_notify (&new_tc0->connection, sst, 0); + + /* Make sure after data segment processing ACK is sent */ + new_tc0->flags |= TCP_CONN_SNDACK; + } + /* SYN: Simultaneous open. Change state to SYN-RCVD and send SYN-ACK */ + else + { + new_tc0->state = TCP_STATE_SYN_RCVD; + + /* Notify app that we have connection XXX */ + stream_session_connect_notify (&new_tc0->connection, sst, 0); + + tcp_make_synack (new_tc0, b0); + next0 = tcp_next_output (is_ip4); + + goto drop; + } + + /* Read data, if any */ + if (n_data_bytes0) + { + error0 = + tcp_segment_rcv (tm, new_tc0, b0, n_data_bytes0, &next0); + if (error0 == TCP_ERROR_PURE_ACK) + error0 = TCP_ERROR_SYN_ACKS_RCVD; + } + else + { + tcp_make_ack (new_tc0, b0); + next0 = tcp_next_output (new_tc0->c_is_ip4); + } + + drop: + + b0->error = error0 ? node->errors[error0] : 0; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + errors = session_manager_flush_enqueue_events (my_thread_index); + if (errors) + { + if (is_ip4) + vlib_node_increment_counter (vm, tcp4_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + else + vlib_node_increment_counter (vm, tcp6_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + } + + return from_frame->n_vectors; +} + +static uword +tcp4_syn_sent (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_syn_sent_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_syn_sent_rcv (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_syn_sent_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_syn_sent_node) = +{ + .function = tcp4_syn_sent, + .name = "tcp4-syn-sent", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_SYN_SENT_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_syn_sent_node, tcp4_syn_sent); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_syn_sent_node) = +{ + .function = tcp6_syn_sent_rcv, + .name = "tcp6-syn-sent", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_SYN_SENT_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + } +,}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_syn_sent_node, tcp6_syn_sent_rcv); +/** + * Handles reception for all states except LISTEN, SYN-SEND and ESTABLISHED + * as per RFC793 p. 64 + */ +always_inline uword +tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index, errors = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_header_t *tcp0 = 0; + tcp_connection_t *tc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 n_advance_bytes0, n_data_bytes0; + u32 next0 = TCP_RCV_PROCESS_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, + my_thread_index); + + /* Checksum computed by ipx_local no need to compute again */ + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + tcp0 = ip4_next_header (ip40); + n_advance_bytes0 = (ip4_header_bytes (ip40) + + tcp_header_bytes (tcp0)); + n_data_bytes0 = clib_net_to_host_u16 (ip40->length) + - n_advance_bytes0; + } + else + { + ip60 = vlib_buffer_get_current (b0); + tcp0 = ip6_next_header (ip60); + n_advance_bytes0 = tcp_header_bytes (tcp0); + n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) + - n_advance_bytes0; + n_advance_bytes0 += sizeof (ip60[0]); + } + + /* SYNs, FINs and data consume sequence numbers */ + vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number + + tcp_is_syn (tcp0) + tcp_is_fin (tcp0) + n_data_bytes0; + + /* + * Special treatment for CLOSED + */ + switch (tc0->state) + { + case TCP_STATE_CLOSED: + goto drop; + break; + } + + /* + * For all other states (except LISTEN) + */ + + /* 1-4: check SEQ, RST, SYN */ + if (PREDICT_FALSE + (tcp_segment_validate (vm, tc0, b0, tcp0, &next0))) + { + error0 = TCP_ERROR_SEGMENT_INVALID; + goto drop; + } + + /* 5: check the ACK field */ + switch (tc0->state) + { + case TCP_STATE_SYN_RCVD: + /* + * If the segment acknowledgment is not acceptable, form a + * reset segment, + * + * and send it. + */ + if (!tcp_rcv_ack_is_acceptable (tc0, b0)) + { + tcp_send_reset (b0, is_ip4); + goto drop; + } + /* Switch state to ESTABLISHED */ + tc0->state = TCP_STATE_ESTABLISHED; + + /* Initialize session variables */ + tc0->snd_una = vnet_buffer (b0)->tcp.ack_number; + tc0->snd_wnd = clib_net_to_host_u32 (tcp0->window) + << tc0->opt.wscale; + tc0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number; + tc0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number; + + /* Shoulder tap the server */ + stream_session_accept_notify (&tc0->connection); + + tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN); + break; + case TCP_STATE_ESTABLISHED: + /* We can get packets in established state here because they + * were enqueued before state change */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + + break; + case TCP_STATE_FIN_WAIT_1: + /* In addition to the processing for the ESTABLISHED state, if + * our FIN is now acknowledged then enter FIN-WAIT-2 and + * continue processing in that state. */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + tc0->state = TCP_STATE_FIN_WAIT_2; + /* Stop all timers, 2MSL will be set lower */ + tcp_connection_timers_reset (tc0); + break; + case TCP_STATE_FIN_WAIT_2: + /* In addition to the processing for the ESTABLISHED state, if + * the retransmission queue is empty, the user's CLOSE can be + * acknowledged ("ok") but do not delete the TCB. */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + /* check if rtx queue is empty and ack CLOSE TODO */ + break; + case TCP_STATE_CLOSE_WAIT: + /* Do the same processing as for the ESTABLISHED state. */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + break; + case TCP_STATE_CLOSING: + /* In addition to the processing for the ESTABLISHED state, if + * the ACK acknowledges our FIN then enter the TIME-WAIT state, + * otherwise ignore the segment. */ + if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) + goto drop; + + /* XXX test that send queue empty */ + tc0->state = TCP_STATE_TIME_WAIT; + goto drop; + + break; + case TCP_STATE_LAST_ACK: + /* The only thing that can arrive in this state is an + * acknowledgment of our FIN. If our FIN is now acknowledged, + * delete the TCB, enter the CLOSED state, and return. */ + + if (!tcp_rcv_ack_is_acceptable (tc0, b0)) + goto drop; + + tcp_connection_del (tc0); + goto drop; + + break; + case TCP_STATE_TIME_WAIT: + /* The only thing that can arrive in this state is a + * retransmission of the remote FIN. Acknowledge it, and restart + * the 2 MSL timeout. */ + + /* TODO */ + goto drop; + break; + default: + ASSERT (0); + } + + /* 6: check the URG bit TODO */ + + /* 7: process the segment text */ + switch (tc0->state) + { + case TCP_STATE_ESTABLISHED: + case TCP_STATE_FIN_WAIT_1: + case TCP_STATE_FIN_WAIT_2: + error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0); + break; + case TCP_STATE_CLOSE_WAIT: + case TCP_STATE_CLOSING: + case TCP_STATE_LAST_ACK: + case TCP_STATE_TIME_WAIT: + /* This should not occur, since a FIN has been received from the + * remote side. Ignore the segment text. */ + break; + } + + /* 8: check the FIN bit */ + if (!tcp_fin (tcp0)) + goto drop; + + switch (tc0->state) + { + case TCP_STATE_ESTABLISHED: + case TCP_STATE_SYN_RCVD: + /* Send FIN-ACK notify app and enter CLOSE-WAIT */ + tcp_connection_timers_reset (tc0); + tcp_make_finack (tc0, b0); + next0 = tcp_next_output (tc0->c_is_ip4); + stream_session_disconnect_notify (&tc0->connection); + tc0->state = TCP_STATE_CLOSE_WAIT; + break; + case TCP_STATE_CLOSE_WAIT: + case TCP_STATE_CLOSING: + case TCP_STATE_LAST_ACK: + /* move along .. */ + break; + case TCP_STATE_FIN_WAIT_1: + tc0->state = TCP_STATE_TIME_WAIT; + tcp_connection_timers_reset (tc0); + tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME); + break; + case TCP_STATE_FIN_WAIT_2: + /* Got FIN, send ACK! */ + tc0->state = TCP_STATE_TIME_WAIT; + tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME); + tcp_make_ack (tc0, b0); + next0 = tcp_next_output (is_ip4); + break; + case TCP_STATE_TIME_WAIT: + /* Remain in the TIME-WAIT state. Restart the 2 MSL time-wait + * timeout. + */ + tcp_timer_update (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME); + break; + } + + b0->error = error0 ? node->errors[error0] : 0; + + drop: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + errors = session_manager_flush_enqueue_events (my_thread_index); + if (errors) + { + if (is_ip4) + vlib_node_increment_counter (vm, tcp4_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + else + vlib_node_increment_counter (vm, tcp6_established_node.index, + TCP_ERROR_EVENT_FIFO_FULL, errors); + } + + return from_frame->n_vectors; +} + +static uword +tcp4_rcv_process (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_rcv_process_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_rcv_process (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_rcv_process_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_rcv_process_node) = +{ + .function = tcp4_rcv_process, + .name = "tcp4-rcv-process", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_RCV_PROCESS_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_rcv_process_node, tcp4_rcv_process); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_rcv_process_node) = +{ + .function = tcp6_rcv_process, + .name = "tcp6-rcv-process", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_RCV_PROCESS_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_rcv_process_node, tcp6_rcv_process); + +vlib_node_registration_t tcp4_listen_node; +vlib_node_registration_t tcp6_listen_node; + +/** + * LISTEN state processing as per RFC 793 p. 65 + */ +always_inline uword +tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index; + tcp_main_t *tm = vnet_get_tcp_main (); + u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_header_t *th0 = 0; + tcp_connection_t *lc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + tcp_connection_t *child0; + u32 error0 = TCP_ERROR_SYNS_RCVD, next0 = TCP_LISTEN_NEXT_DROP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + lc0 = tcp_listener_get (vnet_buffer (b0)->tcp.connection_index); + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + th0 = ip4_next_header (ip40); + } + else + { + ip60 = vlib_buffer_get_current (b0); + th0 = ip6_next_header (ip60); + } + + /* Create child session. For syn-flood protection use filter */ + + /* 1. first check for an RST */ + if (tcp_rst (th0)) + goto drop; + + /* 2. second check for an ACK */ + if (tcp_ack (th0)) + { + tcp_send_reset (b0, is_ip4); + goto drop; + } + + /* 3. check for a SYN (did that already) */ + + /* Create child session and send SYN-ACK */ + pool_get (tm->connections[my_thread_index], child0); + memset (child0, 0, sizeof (*child0)); + + child0->c_c_index = child0 - tm->connections[my_thread_index]; + child0->c_lcl_port = lc0->c_lcl_port; + child0->c_rmt_port = th0->src_port; + child0->c_is_ip4 = is_ip4; + child0->c_thread_index = my_thread_index; + + if (is_ip4) + { + child0->c_lcl_ip4.as_u32 = ip40->dst_address.as_u32; + child0->c_rmt_ip4.as_u32 = ip40->src_address.as_u32; + } + else + { + clib_memcpy (&child0->c_lcl_ip6, &ip60->dst_address, + sizeof (ip6_address_t)); + clib_memcpy (&child0->c_rmt_ip6, &ip60->src_address, + sizeof (ip6_address_t)); + } + + if (stream_session_accept (&child0->connection, lc0->c_s_index, sst, + 0 /* notify */ )) + { + error0 = TCP_ERROR_CREATE_SESSION_FAIL; + goto drop; + } + + tcp_options_parse (th0, &child0->opt); + tcp_connection_init_vars (child0); + + child0->irs = vnet_buffer (b0)->tcp.seq_number; + child0->rcv_nxt = vnet_buffer (b0)->tcp.seq_number + 1; + child0->state = TCP_STATE_SYN_RCVD; + + /* RFC1323: TSval timestamps sent on {SYN} and {SYN,ACK} + * segments are used to initialize PAWS. */ + if (tcp_opts_tstamp (&child0->opt)) + { + child0->tsval_recent = child0->opt.tsval; + child0->tsval_recent_age = tcp_time_now (); + } + + /* Reuse buffer to make syn-ack and send */ + tcp_make_synack (child0, b0); + next0 = tcp_next_output (is_ip4); + + drop: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + b0->error = error0 ? node->errors[error0] : 0; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static uword +tcp4_listen (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_listen_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_listen (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_listen_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_listen_node) = +{ + .function = tcp4_listen, + .name = "tcp4-listen", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_LISTEN_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_LISTEN_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_listen_node, tcp4_listen); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_listen_node) = +{ + .function = tcp6_listen, + .name = "tcp6-listen", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_LISTEN_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_LISTEN_NEXT_##s] = n, + foreach_tcp_state_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_listen_node, tcp6_listen); + +vlib_node_registration_t tcp4_input_node; +vlib_node_registration_t tcp6_input_node; + +typedef enum _tcp_input_next +{ + TCP_INPUT_NEXT_DROP, + TCP_INPUT_NEXT_LISTEN, + TCP_INPUT_NEXT_RCV_PROCESS, + TCP_INPUT_NEXT_SYN_SENT, + TCP_INPUT_NEXT_ESTABLISHED, + TCP_INPUT_NEXT_RESET, + TCP_INPUT_N_NEXT +} tcp_input_next_t; + +#define foreach_tcp4_input_next \ + _ (DROP, "error-drop") \ + _ (LISTEN, "tcp4-listen") \ + _ (RCV_PROCESS, "tcp4-rcv-process") \ + _ (SYN_SENT, "tcp4-syn-sent") \ + _ (ESTABLISHED, "tcp4-established") \ + _ (RESET, "tcp4-reset") + +#define foreach_tcp6_input_next \ + _ (DROP, "error-drop") \ + _ (LISTEN, "tcp6-listen") \ + _ (RCV_PROCESS, "tcp6-rcv-process") \ + _ (SYN_SENT, "tcp6-syn-sent") \ + _ (ESTABLISHED, "tcp6-established") \ + _ (RESET, "tcp6-reset") + +typedef struct +{ + u16 src_port; + u16 dst_port; + u8 state; +} tcp_rx_trace_t; + +const char *tcp_fsm_states[] = { +#define _(sym, str) str, + foreach_tcp_fsm_state +#undef _ +}; + +u8 * +format_tcp_state (u8 * s, va_list * args) +{ + tcp_state_t *state = va_arg (*args, tcp_state_t *); + + if (state[0] < TCP_N_STATES) + s = format (s, "%s", tcp_fsm_states[state[0]]); + else + s = format (s, "UNKNOWN"); + + return s; +} + +u8 * +format_tcp_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *); + + s = format (s, "TCP: src-port %d dst-port %U%s\n", + clib_net_to_host_u16 (t->src_port), + clib_net_to_host_u16 (t->dst_port), format_tcp_state, t->state); + + return s; +} + +#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN) + +always_inline uword +tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index; + tcp_main_t *tm = vnet_get_tcp_main (); + session_manager_main_t *ssm = vnet_get_session_manager_main (); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_header_t *tcp0 = 0; + tcp_connection_t *tc0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 error0 = TCP_ERROR_NO_LISTENER, next0 = TCP_INPUT_NEXT_DROP; + u8 flags0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (is_ip4) + { + ip40 = vlib_buffer_get_current (b0); + tcp0 = ip4_next_header (ip40); + + /* lookup session */ + tc0 = + (tcp_connection_t *) stream_session_lookup_transport4 (ssm, + &ip40->dst_address, + &ip40->src_address, + tcp0->dst_port, + tcp0->src_port, + SESSION_TYPE_IP4_TCP, + my_thread_index); + } + else + { + ip60 = vlib_buffer_get_current (b0); + tcp0 = ip6_next_header (ip60); + tc0 = + (tcp_connection_t *) stream_session_lookup_transport6 (ssm, + &ip60->src_address, + &ip60->dst_address, + tcp0->src_port, + tcp0->dst_port, + SESSION_TYPE_IP6_TCP, + my_thread_index); + } + + /* Session exists */ + if (PREDICT_TRUE (0 != tc0)) + { + /* Save connection index */ + vnet_buffer (b0)->tcp.connection_index = tc0->c_c_index; + vnet_buffer (b0)->tcp.seq_number = + clib_net_to_host_u32 (tcp0->seq_number); + vnet_buffer (b0)->tcp.ack_number = + clib_net_to_host_u32 (tcp0->ack_number); + + flags0 = tcp0->flags & filter_flags; + next0 = tm->dispatch_table[tc0->state][flags0].next; + error0 = tm->dispatch_table[tc0->state][flags0].error; + + if (PREDICT_FALSE (error0 == TCP_ERROR_DISPATCH)) + { + /* Overload tcp flags to store state */ + vnet_buffer (b0)->tcp.flags = tc0->state; + } + } + else + { + /* Send reset */ + next0 = TCP_INPUT_NEXT_RESET; + error0 = TCP_ERROR_NO_LISTENER; + vnet_buffer (b0)->tcp.flags = 0; + } + + b0->error = error0 ? node->errors[error0] : 0; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static uword +tcp4_input (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_input (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_input_node) = +{ + .function = tcp4_input, + .name = "tcp4-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_INPUT_NEXT_##s] = n, + foreach_tcp4_input_next +#undef _ + }, + .format_buffer = format_tcp_header, + .format_trace = format_tcp_rx_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_input_node, tcp4_input); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_input_node) = +{ + .function = tcp6_input, + .name = "tcp6-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [TCP_INPUT_NEXT_##s] = n, + foreach_tcp6_input_next +#undef _ + }, + .format_buffer = format_tcp_header, + .format_trace = format_tcp_rx_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_input_node, tcp6_input); +void +tcp_update_time (f64 now, u32 thread_index) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + tw_timer_expire_timers_16t_2w_512sl (&tm->timer_wheels[thread_index], now); +} + +static void +tcp_dispatch_table_init (tcp_main_t * tm) +{ + int i, j; + for (i = 0; i < ARRAY_LEN (tm->dispatch_table); i++) + for (j = 0; j < ARRAY_LEN (tm->dispatch_table[i]); j++) + { + tm->dispatch_table[i][j].next = TCP_INPUT_NEXT_DROP; + tm->dispatch_table[i][j].error = TCP_ERROR_DISPATCH; + } + +#define _(t,f,n,e) \ +do { \ + tm->dispatch_table[TCP_STATE_##t][f].next = (n); \ + tm->dispatch_table[TCP_STATE_##t][f].error = (e); \ +} while (0) + + /* SYNs for new connections -> tcp-listen. */ + _(LISTEN, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE); + /* ACK for for a SYN-ACK -> tcp-rcv-process. */ + _(SYN_RCVD, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + /* SYN-ACK for a SYN */ + _(SYN_SENT, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, + TCP_ERROR_NONE); + _(SYN_SENT, TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE); + _(SYN_SENT, TCP_FLAG_RST, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE); + _(SYN_SENT, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, + TCP_ERROR_NONE); + /* ACK for for established connection -> tcp-established. */ + _(ESTABLISHED, TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE); + /* FIN for for established connection -> tcp-established. */ + _(ESTABLISHED, TCP_FLAG_FIN, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE); + _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED, + TCP_ERROR_NONE); + /* ACK or FIN-ACK to our FIN */ + _(FIN_WAIT_1, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(FIN_WAIT_1, TCP_FLAG_ACK | TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, + TCP_ERROR_NONE); + /* FIN in reply to our FIN from the other side */ + _(FIN_WAIT_1, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + /* FIN confirming that the peer (app) has closed */ + _(FIN_WAIT_2, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(FIN_WAIT_2, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, + TCP_ERROR_NONE); + _(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); +#undef _ +} + +clib_error_t * +tcp_input_init (vlib_main_t * vm) +{ + clib_error_t *error = 0; + tcp_main_t *tm = vnet_get_tcp_main (); + + if ((error = vlib_call_init_function (vm, tcp_init))) + return error; + + /* Initialize dispatch table. */ + tcp_dispatch_table_init (tm); + + return error; +} + +VLIB_INIT_FUNCTION (tcp_input_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c new file mode 100644 index 00000000..856dffe4 --- /dev/null +++ b/src/vnet/tcp/tcp_newreno.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +void +newreno_congestion (tcp_connection_t * tc) +{ + tc->prev_ssthresh = tc->ssthresh; + tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss); +} + +void +newreno_recovered (tcp_connection_t * tc) +{ + tc->cwnd = tc->ssthresh; +} + +void +newreno_rcv_ack (tcp_connection_t * tc) +{ + if (tcp_in_slowstart (tc)) + { + tc->cwnd += clib_min (tc->snd_mss, tc->bytes_acked); + } + else + { + /* Round up to 1 if needed */ + tc->cwnd += clib_max (tc->snd_mss * tc->snd_mss / tc->cwnd, 1); + } +} + +void +newreno_rcv_cong_ack (tcp_connection_t * tc, tcp_cc_ack_t ack_type) +{ + if (ack_type == TCP_CC_DUPACK) + { + tc->cwnd += tc->snd_mss; + } + else if (ack_type == TCP_CC_PARTIALACK) + { + tc->cwnd -= tc->bytes_acked; + if (tc->bytes_acked > tc->snd_mss) + tc->bytes_acked += tc->snd_mss; + } +} + +void +newreno_conn_init (tcp_connection_t * tc) +{ + tc->ssthresh = tc->snd_wnd; + tc->cwnd = tcp_initial_cwnd (tc); +} + +const static tcp_cc_algorithm_t tcp_newreno = { + .congestion = newreno_congestion, + .recovered = newreno_recovered, + .rcv_ack = newreno_rcv_ack, + .rcv_cong_ack = newreno_rcv_cong_ack, + .init = newreno_conn_init +}; + +clib_error_t * +newreno_init (vlib_main_t * vm) +{ + clib_error_t *error = 0; + + tcp_cc_algo_register (TCP_CC_NEWRENO, &tcp_newreno); + + return error; +} + +VLIB_INIT_FUNCTION (newreno_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c new file mode 100644 index 00000000..dbcf1f74 --- /dev/null +++ b/src/vnet/tcp/tcp_output.c @@ -0,0 +1,1412 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +vlib_node_registration_t tcp4_output_node; +vlib_node_registration_t tcp6_output_node; + +typedef enum _tcp_output_nect +{ + TCP_OUTPUT_NEXT_DROP, + TCP_OUTPUT_NEXT_IP_LOOKUP, + TCP_OUTPUT_N_NEXT +} tcp_output_next_t; + +#define foreach_tcp4_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip4-lookup") + +#define foreach_tcp6_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip6-lookup") + +static char *tcp_error_strings[] = { +#define tcp_error(n,s) s, +#include +#undef tcp_error +}; + +typedef struct +{ + u16 src_port; + u16 dst_port; + u8 state; +} tcp_tx_trace_t; + +u16 dummy_mtu = 400; + +u8 * +format_tcp_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + + s = format (s, "TBD\n"); + + return s; +} + +void +tcp_set_snd_mss (tcp_connection_t * tc) +{ + u16 snd_mss; + + /* TODO find our iface MTU */ + snd_mss = dummy_mtu; + + /* TODO cache mss and consider PMTU discovery */ + snd_mss = tc->opt.mss < snd_mss ? tc->opt.mss : snd_mss; + + tc->snd_mss = snd_mss; + + if (tc->snd_mss == 0) + { + clib_warning ("snd mss is 0"); + tc->snd_mss = dummy_mtu; + } +} + +static u8 +tcp_window_compute_scale (u32 available_space) +{ + u8 wnd_scale = 0; + while (wnd_scale < TCP_MAX_WND_SCALE + && (available_space >> wnd_scale) > TCP_WND_MAX) + wnd_scale++; + return wnd_scale; +} + +/** + * Compute initial window and scale factor. As per RFC1323, window field in + * SYN and SYN-ACK segments is never scaled. + */ +u32 +tcp_initial_window_to_advertise (tcp_connection_t * tc) +{ + u32 available_space; + + /* Initial wnd for SYN. Fifos are not allocated yet. + * Use some predefined value */ + if (tc->state != TCP_STATE_SYN_RCVD) + { + return TCP_DEFAULT_RX_FIFO_SIZE; + } + + available_space = stream_session_max_enqueue (&tc->connection); + tc->rcv_wscale = tcp_window_compute_scale (available_space); + tc->rcv_wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale); + + return clib_min (tc->rcv_wnd, TCP_WND_MAX); +} + +/** + * Compute and return window to advertise, scaled as per RFC1323 + */ +u32 +tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state) +{ + u32 available_space, wnd, scaled_space; + + if (state != TCP_STATE_ESTABLISHED) + return tcp_initial_window_to_advertise (tc); + + available_space = stream_session_max_enqueue (&tc->connection); + scaled_space = available_space >> tc->rcv_wscale; + + /* Need to update scale */ + if (PREDICT_FALSE ((scaled_space == 0 && available_space != 0)) + || (scaled_space >= TCP_WND_MAX)) + tc->rcv_wscale = tcp_window_compute_scale (available_space); + + wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale); + tc->rcv_wnd = wnd; + + return wnd >> tc->rcv_wscale; +} + +/** + * Write TCP options to segment. + */ +u32 +tcp_options_write (u8 * data, tcp_options_t * opts) +{ + u32 opts_len = 0; + u32 buf, seq_len = 4; + + if (tcp_opts_mss (opts)) + { + *data++ = TCP_OPTION_MSS; + *data++ = TCP_OPTION_LEN_MSS; + buf = clib_host_to_net_u16 (opts->mss); + clib_memcpy (data, &buf, sizeof (opts->mss)); + data += sizeof (opts->mss); + opts_len += TCP_OPTION_LEN_MSS; + } + + if (tcp_opts_wscale (opts)) + { + *data++ = TCP_OPTION_WINDOW_SCALE; + *data++ = TCP_OPTION_LEN_WINDOW_SCALE; + *data++ = opts->wscale; + opts_len += TCP_OPTION_LEN_WINDOW_SCALE; + } + + if (tcp_opts_sack_permitted (opts)) + { + *data++ = TCP_OPTION_SACK_PERMITTED; + *data++ = TCP_OPTION_LEN_SACK_PERMITTED; + opts_len += TCP_OPTION_LEN_SACK_PERMITTED; + } + + if (tcp_opts_tstamp (opts)) + { + *data++ = TCP_OPTION_TIMESTAMP; + *data++ = TCP_OPTION_LEN_TIMESTAMP; + buf = clib_host_to_net_u32 (opts->tsval); + clib_memcpy (data, &buf, sizeof (opts->tsval)); + data += sizeof (opts->tsval); + buf = clib_host_to_net_u32 (opts->tsecr); + clib_memcpy (data, &buf, sizeof (opts->tsecr)); + data += sizeof (opts->tsecr); + opts_len += TCP_OPTION_LEN_TIMESTAMP; + } + + if (tcp_opts_sack (opts)) + { + int i; + u32 n_sack_blocks = clib_min (vec_len (opts->sacks), + TCP_OPTS_MAX_SACK_BLOCKS); + + if (n_sack_blocks != 0) + { + *data++ = TCP_OPTION_SACK_BLOCK; + *data++ = 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK; + for (i = 0; i < n_sack_blocks; i++) + { + buf = clib_host_to_net_u32 (opts->sacks[i].start); + clib_memcpy (data, &buf, seq_len); + data += seq_len; + buf = clib_host_to_net_u32 (opts->sacks[i].end); + clib_memcpy (data, &buf, seq_len); + data += seq_len; + } + opts_len += 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK; + } + } + + /* Terminate TCP options */ + if (opts_len % 4) + { + *data++ = TCP_OPTION_EOL; + opts_len += TCP_OPTION_LEN_EOL; + } + + /* Pad with zeroes to a u32 boundary */ + while (opts_len % 4) + { + *data++ = TCP_OPTION_NOOP; + opts_len += TCP_OPTION_LEN_NOOP; + } + return opts_len; +} + +always_inline int +tcp_make_syn_options (tcp_options_t * opts, u32 initial_wnd) +{ + u8 len = 0; + + opts->flags |= TCP_OPTS_FLAG_MSS; + opts->mss = dummy_mtu; /*XXX discover that */ + len += TCP_OPTION_LEN_MSS; + + opts->flags |= TCP_OPTS_FLAG_WSCALE; + opts->wscale = tcp_window_compute_scale (initial_wnd); + len += TCP_OPTION_LEN_WINDOW_SCALE; + + opts->flags |= TCP_OPTS_FLAG_TSTAMP; + opts->tsval = tcp_time_now (); + opts->tsecr = 0; + len += TCP_OPTION_LEN_TIMESTAMP; + + opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED; + len += TCP_OPTION_LEN_SACK_PERMITTED; + + /* Align to needed boundary */ + len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN; + return len; +} + +always_inline int +tcp_make_synack_options (tcp_connection_t * tc, tcp_options_t * opts) +{ + u8 len = 0; + + opts->flags |= TCP_OPTS_FLAG_MSS; + opts->mss = dummy_mtu; /*XXX discover that */ + len += TCP_OPTION_LEN_MSS; + + if (tcp_opts_wscale (&tc->opt)) + { + opts->flags |= TCP_OPTS_FLAG_WSCALE; + opts->wscale = tc->rcv_wscale; + len += TCP_OPTION_LEN_WINDOW_SCALE; + } + + if (tcp_opts_tstamp (&tc->opt)) + { + opts->flags |= TCP_OPTS_FLAG_TSTAMP; + opts->tsval = tcp_time_now (); + opts->tsecr = tc->tsval_recent; + len += TCP_OPTION_LEN_TIMESTAMP; + } + + if (tcp_opts_sack_permitted (&tc->opt)) + { + opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED; + len += TCP_OPTION_LEN_SACK_PERMITTED; + } + + /* Align to needed boundary */ + len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN; + return len; +} + +always_inline int +tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts) +{ + u8 len = 0; + + opts->flags = 0; + + if (tcp_opts_tstamp (&tc->opt)) + { + opts->flags |= TCP_OPTS_FLAG_TSTAMP; + opts->tsval = tcp_time_now (); + opts->tsecr = tc->tsval_recent; + len += TCP_OPTION_LEN_TIMESTAMP; + } + if (tcp_opts_sack_permitted (&tc->opt)) + { + if (vec_len (tc->snd_sacks)) + { + opts->flags |= TCP_OPTS_FLAG_SACK; + opts->sacks = tc->snd_sacks; + opts->n_sack_blocks = vec_len (tc->snd_sacks); + len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks; + } + } + + /* Align to needed boundary */ + len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN; + return len; +} + +always_inline int +tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts, + tcp_state_t state) +{ + switch (state) + { + case TCP_STATE_ESTABLISHED: + case TCP_STATE_FIN_WAIT_1: + return tcp_make_established_options (tc, opts); + case TCP_STATE_SYN_RCVD: + return tcp_make_synack_options (tc, opts); + case TCP_STATE_SYN_SENT: + return tcp_make_syn_options (opts, + tcp_initial_window_to_advertise (tc)); + default: + clib_warning ("Not handled!"); + return 0; + } +} + +#define tcp_get_free_buffer_index(tm, bidx) \ +do { \ + u32 *my_tx_buffers, n_free_buffers; \ + u32 cpu_index = tm->vlib_main->cpu_index; \ + my_tx_buffers = tm->tx_buffers[cpu_index]; \ + if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0)) \ + { \ + n_free_buffers = 32; /* TODO config or macro */ \ + vec_validate (my_tx_buffers, n_free_buffers - 1); \ + _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list ( \ + tm->vlib_main, my_tx_buffers, n_free_buffers, \ + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); \ + tm->tx_buffers[cpu_index] = my_tx_buffers; \ + } \ + /* buffer shortage */ \ + if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0)) \ + return; \ + *bidx = my_tx_buffers[_vec_len (my_tx_buffers)-1]; \ + _vec_len (my_tx_buffers) -= 1; \ +} while (0) + +always_inline void +tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) +{ + vlib_buffer_t *it = b; + do + { + it->current_data = 0; + it->current_length = 0; + it->total_length_not_including_first_buffer = 0; + } + while ((it->flags & VLIB_BUFFER_NEXT_PRESENT) + && (it = vlib_get_buffer (vm, it->next_buffer))); + + /* Leave enough space for headers */ + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); +} + +/** + * Prepare ACK + */ +void +tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state, + u8 flags) +{ + tcp_options_t _snd_opts, *snd_opts = &_snd_opts; + u8 tcp_opts_len, tcp_hdr_opts_len; + tcp_header_t *th; + u16 wnd; + + wnd = tcp_window_to_advertise (tc, state); + + /* Make and write options */ + tcp_opts_len = tcp_make_established_options (tc, snd_opts); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt, + tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd); + + tcp_options_write ((u8 *) (th + 1), snd_opts); + + /* Mark as ACK */ + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; +} + +/** + * Convert buffer to ACK + */ +void +tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + + tcp_reuse_buffer (vm, b); + tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK); + vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK; +} + +/** + * Convert buffer to FIN-ACK + */ +void +tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + + tcp_reuse_buffer (vm, b); + tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK | TCP_FLAG_FIN); + + /* Reset flags, make sure ack is sent */ + tc->flags = TCP_CONN_SNDACK; + vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK; + + tc->snd_nxt += 1; +} + +/** + * Convert buffer to SYN-ACK + */ +void +tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + tcp_options_t _snd_opts, *snd_opts = &_snd_opts; + u8 tcp_opts_len, tcp_hdr_opts_len; + tcp_header_t *th; + u16 initial_wnd; + u32 time_now; + + memset (snd_opts, 0, sizeof (*snd_opts)); + + tcp_reuse_buffer (vm, b); + + /* Set random initial sequence */ + time_now = tcp_time_now (); + + tc->iss = random_u32 (&time_now); + tc->snd_una = tc->iss; + tc->snd_nxt = tc->iss + 1; + tc->snd_una_max = tc->snd_nxt; + + initial_wnd = tcp_initial_window_to_advertise (tc); + + /* Make and write options */ + tcp_opts_len = tcp_make_synack_options (tc, snd_opts); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss, + tc->rcv_nxt, tcp_hdr_opts_len, + TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd); + + tcp_options_write ((u8 *) (th + 1), snd_opts); + + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; + vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK; + + /* Init retransmit timer */ + tcp_retransmit_timer_set (tm, tc); +} + +always_inline void +tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, + u8 is_ip4) +{ + u32 *to_next, next_index; + vlib_frame_t *f; + + b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b->error = 0; + + /* Default FIB for now */ + vnet_buffer (b)->sw_if_index[VLIB_TX] = 0; + + /* Send to IP lookup */ + next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index; + f = vlib_get_frame_to_node (vm, next_index); + + /* Enqueue the packet */ + to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, next_index, f); +} + +int +tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0, + tcp_state_t state, u32 my_thread_index, u8 is_ip4) +{ + u8 tcp_hdr_len = sizeof (tcp_header_t); + ip4_header_t *ih4; + ip6_header_t *ih6; + tcp_header_t *th0; + ip4_address_t src_ip40; + ip6_address_t src_ip60; + u16 src_port0; + u32 tmp; + + /* Find IP and TCP headers */ + if (is_ip4) + { + ih4 = vlib_buffer_get_current (b0); + th0 = ip4_next_header (ih4); + } + else + { + ih6 = vlib_buffer_get_current (b0); + th0 = ip6_next_header (ih6); + } + + /* Swap src and dst ip */ + if (is_ip4) + { + ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40); + src_ip40.as_u32 = ih4->src_address.as_u32; + ih4->src_address.as_u32 = ih4->dst_address.as_u32; + ih4->dst_address.as_u32 = src_ip40.as_u32; + + /* Chop the end of the pkt */ + b0->current_length += ip4_header_bytes (ih4) + tcp_hdr_len; + } + else + { + ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60); + clib_memcpy (&src_ip60, &ih6->src_address, sizeof (ip6_address_t)); + clib_memcpy (&ih6->src_address, &ih6->dst_address, + sizeof (ip6_address_t)); + clib_memcpy (&ih6->dst_address, &src_ip60, sizeof (ip6_address_t)); + + /* Chop the end of the pkt */ + b0->current_length += sizeof (ip6_header_t) + tcp_hdr_len; + } + + /* Try to determine what/why we're actually resetting and swap + * src and dst ports */ + if (state == TCP_STATE_CLOSED) + { + if (!tcp_syn (th0)) + return -1; + + tmp = clib_net_to_host_u32 (th0->seq_number); + + /* Got a SYN for no listener. */ + th0->flags = TCP_FLAG_RST | TCP_FLAG_ACK; + th0->ack_number = clib_host_to_net_u32 (tmp + 1); + th0->seq_number = 0; + + } + else if (state >= TCP_STATE_SYN_SENT) + { + th0->flags = TCP_FLAG_RST | TCP_FLAG_ACK; + th0->seq_number = th0->ack_number; + th0->ack_number = 0; + } + + src_port0 = th0->src_port; + th0->src_port = th0->dst_port; + th0->dst_port = src_port0; + th0->window = 0; + th0->data_offset_and_reserved = (tcp_hdr_len >> 2) << 4; + th0->urgent_pointer = 0; + + /* Compute checksum */ + if (is_ip4) + { + th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih4); + } + else + { + int bogus = ~0; + th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih6, &bogus); + ASSERT (!bogus); + } + + return 0; +} + +/** + * Send reset without reusing existing buffer + */ +void +tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4) +{ + vlib_buffer_t *b; + u32 bi; + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u8 tcp_hdr_len, flags = 0; + tcp_header_t *th, *pkt_th; + u32 seq, ack; + ip4_header_t *ih4, *pkt_ih4; + ip6_header_t *ih6, *pkt_ih6; + + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + /* Leave enough space for headers */ + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + + /* Make and write options */ + tcp_hdr_len = sizeof (tcp_header_t); + + if (is_ip4) + { + pkt_ih4 = vlib_buffer_get_current (pkt); + pkt_th = ip4_next_header (pkt_ih4); + } + else + { + pkt_ih6 = vlib_buffer_get_current (pkt); + pkt_th = ip6_next_header (pkt_ih6); + } + + if (tcp_ack (pkt_th)) + { + flags = TCP_FLAG_RST; + seq = pkt_th->ack_number; + ack = 0; + } + else + { + flags = TCP_FLAG_RST | TCP_FLAG_ACK; + seq = 0; + ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end); + } + + th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port, + seq, ack, tcp_hdr_len, flags, 0); + + /* Swap src and dst ip */ + if (is_ip4) + { + ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40); + ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address, + &pkt_ih4->src_address, IP_PROTOCOL_TCP); + th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4); + } + else + { + int bogus = ~0; + pkt_ih6 = (ip6_header_t *) (pkt_th - 1); + ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) == + 0x60); + ih6 = + vlib_buffer_push_ip6 (vm, b, &pkt_ih6->dst_address, + &pkt_ih6->src_address, IP_PROTOCOL_TCP); + th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus); + ASSERT (!bogus); + } + + tcp_enqueue_to_ip_lookup (vm, b, bi, is_ip4); +} + +void +tcp_push_ip_hdr (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b) +{ + tcp_header_t *th = vlib_buffer_get_current (b); + + if (tc->c_is_ip4) + { + ip4_header_t *ih; + ih = vlib_buffer_push_ip4 (tm->vlib_main, b, &tc->c_lcl_ip4, + &tc->c_rmt_ip4, IP_PROTOCOL_TCP); + th->checksum = ip4_tcp_udp_compute_checksum (tm->vlib_main, b, ih); + } + else + { + ip6_header_t *ih; + int bogus = ~0; + + ih = vlib_buffer_push_ip6 (tm->vlib_main, b, &tc->c_lcl_ip6, + &tc->c_rmt_ip6, IP_PROTOCOL_TCP); + th->checksum = ip6_tcp_udp_icmp_compute_checksum (tm->vlib_main, b, ih, + &bogus); + ASSERT (!bogus); + } +} + +/** + * Send SYN + * + * Builds a SYN packet for a half-open connection and sends it to ipx_lookup. + * The packet is not forwarded through tcpx_output to avoid doing lookups + * in the half_open pool. + */ +void +tcp_send_syn (tcp_connection_t * tc) +{ + vlib_buffer_t *b; + u32 bi; + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u8 tcp_hdr_opts_len, tcp_opts_len; + tcp_header_t *th; + u32 time_now; + u16 initial_wnd; + tcp_options_t snd_opts; + + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + /* Leave enough space for headers */ + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + + /* Set random initial sequence */ + time_now = tcp_time_now (); + + tc->iss = random_u32 (&time_now); + tc->snd_una = tc->iss; + tc->snd_una_max = tc->snd_nxt = tc->iss + 1; + + initial_wnd = tcp_initial_window_to_advertise (tc); + + /* Make and write options */ + memset (&snd_opts, 0, sizeof (snd_opts)); + tcp_opts_len = tcp_make_syn_options (&snd_opts, initial_wnd); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss, + tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN, + initial_wnd); + + tcp_options_write ((u8 *) (th + 1), &snd_opts); + + /* Measure RTT with this */ + tc->rtt_ts = tcp_time_now (); + tc->rtt_seq = tc->snd_nxt; + + /* Start retransmit trimer */ + tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN, tc->rto * TCP_TO_TIMER_TICK); + tc->rto_boff = 0; + + /* Set the connection establishment timer */ + tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME); + + tcp_push_ip_hdr (tm, tc, b); + tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4); +} + +always_inline void +tcp_enqueue_to_output (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, u8 is_ip4) +{ + u32 *to_next, next_index; + vlib_frame_t *f; + + b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b->error = 0; + + /* Decide where to send the packet */ + next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index; + f = vlib_get_frame_to_node (vm, next_index); + + /* Enqueue the packet */ + to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, next_index, f); +} + +/** + * Send FIN + */ +void +tcp_send_fin (tcp_connection_t * tc) +{ + vlib_buffer_t *b; + u32 bi; + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + /* Leave enough space for headers */ + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + + tcp_make_finack (tc, b); + + tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); +} + +always_inline u8 +tcp_make_state_flags (tcp_state_t next_state) +{ + switch (next_state) + { + case TCP_STATE_ESTABLISHED: + return TCP_FLAG_ACK; + case TCP_STATE_SYN_RCVD: + return TCP_FLAG_SYN | TCP_FLAG_ACK; + case TCP_STATE_SYN_SENT: + return TCP_FLAG_SYN; + case TCP_STATE_LAST_ACK: + case TCP_STATE_FIN_WAIT_1: + return TCP_FLAG_FIN; + default: + clib_warning ("Shouldn't be here!"); + } + return 0; +} + +/** + * Push TCP header and update connection variables + */ +static void +tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, + tcp_state_t next_state) +{ + u32 advertise_wnd, data_len; + u8 tcp_opts_len, tcp_hdr_opts_len, opts_write_len, flags; + tcp_options_t _snd_opts, *snd_opts = &_snd_opts; + tcp_header_t *th; + + data_len = b->current_length; + vnet_buffer (b)->tcp.flags = 0; + + /* Make and write options */ + memset (snd_opts, 0, sizeof (*snd_opts)); + tcp_opts_len = tcp_make_options (tc, snd_opts, next_state); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + /* Get rcv window to advertise */ + advertise_wnd = tcp_window_to_advertise (tc, next_state); + flags = tcp_make_state_flags (next_state); + + /* Push header and options */ + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt, + tc->rcv_nxt, tcp_hdr_opts_len, flags, + advertise_wnd); + + opts_write_len = tcp_options_write ((u8 *) (th + 1), snd_opts); + + ASSERT (opts_write_len == tcp_opts_len); + + /* Tag the buffer with the connection index */ + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; + + tc->snd_nxt += data_len; +} + +/* Send delayed ACK when timer expires */ +void +tcp_timer_delack_handler (u32 index) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u32 thread_index = os_get_cpu_number (); + tcp_connection_t *tc; + vlib_buffer_t *b; + u32 bi; + + tc = tcp_connection_get (index, thread_index); + + /* Get buffer */ + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + /* Fill in the ACK */ + tcp_make_ack (tc, b); + + tc->timers[TCP_TIMER_DELACK] = TCP_TIMER_HANDLE_INVALID; + tc->flags &= ~TCP_CONN_DELACK; + + tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); +} + +/** Build a retransmit segment + * + * @return the number of bytes in the segment or 0 if there's nothing to + * retransmit + * */ +u32 +tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b, + u32 max_bytes) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u32 n_bytes, offset = 0; + sack_scoreboard_hole_t *hole; + u32 hole_size; + + tcp_reuse_buffer (vm, b); + + ASSERT (tc->state == TCP_STATE_ESTABLISHED); + ASSERT (max_bytes != 0); + + if (tcp_opts_sack_permitted (&tc->opt)) + { + /* XXX get first hole not retransmitted yet */ + hole = scoreboard_first_hole (&tc->sack_sb); + if (!hole) + return 0; + + offset = hole->start - tc->snd_una; + hole_size = hole->end - hole->start; + + ASSERT (hole_size); + + if (hole_size < max_bytes) + max_bytes = hole_size; + } + else + { + if (seq_geq (tc->snd_nxt, tc->snd_una_max)) + return 0; + } + + n_bytes = stream_session_peek_bytes (&tc->connection, + vlib_buffer_get_current (b), offset, + max_bytes); + ASSERT (n_bytes != 0); + + tc->snd_nxt += n_bytes; + tcp_push_hdr_i (tc, b, tc->state); + + return n_bytes; +} + +static void +tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + vlib_main_t *vm = tm->vlib_main; + u32 thread_index = os_get_cpu_number (); + tcp_connection_t *tc; + vlib_buffer_t *b; + u32 bi, max_bytes, snd_space; + + if (is_syn) + { + tc = tcp_half_open_connection_get (index); + } + else + { + tc = tcp_connection_get (index, thread_index); + } + + /* Make sure timer handle is set to invalid */ + tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID; + + /* Increment RTO backoff (also equal to number of retries) */ + tc->rto_boff += 1; + + /* Go back to first un-acked byte */ + tc->snd_nxt = tc->snd_una; + + /* Get buffer */ + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (vm, bi); + + if (tc->state == TCP_STATE_ESTABLISHED) + { + tcp_fastrecovery_off (tc); + + /* Exponential backoff */ + tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); + + /* Figure out what and how many bytes we can send */ + snd_space = tcp_available_snd_space (tc); + max_bytes = clib_min (tc->snd_mss, snd_space); + tcp_prepare_retransmit_segment (tc, b, max_bytes); + + tc->rtx_bytes += max_bytes; + + /* No fancy recovery for now! */ + scoreboard_clear (&tc->sack_sb); + } + else + { + /* Retransmit for SYN/SYNACK */ + ASSERT (tc->state == TCP_STATE_SYN_RCVD + || tc->state == TCP_STATE_SYN_SENT); + + /* Try without increasing RTO a number of times. If this fails, + * start growing RTO exponentially */ + if (tc->rto_boff > TCP_RTO_SYN_RETRIES) + tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); + + vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + tcp_push_hdr_i (tc, b, tc->state); + } + + if (!is_syn) + { + tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); + + /* Re-enable retransmit timer */ + tcp_retransmit_timer_set (tm, tc); + } + else + { + ASSERT (tc->state == TCP_STATE_SYN_SENT); + + /* This goes straight to ipx_lookup */ + tcp_push_ip_hdr (tm, tc, b); + tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4); + + /* Re-enable retransmit timer */ + tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN, + tc->rto * TCP_TO_TIMER_TICK); + } +} + +void +tcp_timer_retransmit_handler (u32 index) +{ + tcp_timer_retransmit_handler_i (index, 0); +} + +void +tcp_timer_retransmit_syn_handler (u32 index) +{ + tcp_timer_retransmit_handler_i (index, 1); +} + +/** + * Retansmit first unacked segment */ +void +tcp_retransmit_first_unacked (tcp_connection_t * tc) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 snd_nxt = tc->snd_nxt; + vlib_buffer_t *b; + u32 bi; + + tc->snd_nxt = tc->snd_una; + + /* Get buffer */ + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (tm->vlib_main, bi); + + tcp_prepare_retransmit_segment (tc, b, tc->snd_mss); + tcp_enqueue_to_output (tm->vlib_main, b, bi, tc->c_is_ip4); + + tc->snd_nxt = snd_nxt; + tc->rtx_bytes += tc->snd_mss; +} + +void +tcp_fast_retransmit (tcp_connection_t * tc) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 snd_space, max_bytes, n_bytes, bi; + vlib_buffer_t *b; + + ASSERT (tcp_in_fastrecovery (tc)); + + clib_warning ("fast retransmit!"); + + /* Start resending from first un-acked segment */ + tc->snd_nxt = tc->snd_una; + + snd_space = tcp_available_snd_space (tc); + + while (snd_space) + { + tcp_get_free_buffer_index (tm, &bi); + b = vlib_get_buffer (tm->vlib_main, bi); + + max_bytes = clib_min (tc->snd_mss, snd_space); + n_bytes = tcp_prepare_retransmit_segment (tc, b, max_bytes); + + /* Nothing left to retransmit */ + if (n_bytes == 0) + return; + + tcp_enqueue_to_output (tm->vlib_main, b, bi, tc->c_is_ip4); + + snd_space -= n_bytes; + } + + /* If window allows, send new data */ + tc->snd_nxt = tc->snd_una_max; +} + +always_inline u32 +tcp_session_has_ooo_data (tcp_connection_t * tc) +{ + stream_session_t *s = + stream_session_get (tc->c_s_index, tc->c_thread_index); + return svm_fifo_has_ooo_data (s->server_rx_fifo); +} + +always_inline uword +tcp46_output_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + tcp_main_t *tm = vnet_get_tcp_main (); + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + tcp_connection_t *tc0; + tcp_header_t *th0; + u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, + my_thread_index); + th0 = vlib_buffer_get_current (b0); + + if (is_ip4) + { + ip4_header_t *ih0; + ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, + &tc0->c_rmt_ip4, IP_PROTOCOL_TCP); + th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih0); + } + else + { + ip6_header_t *ih0; + int bogus = ~0; + + ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6, + &tc0->c_rmt_ip6, IP_PROTOCOL_TCP); + th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih0, + &bogus); + ASSERT (!bogus); + } + + /* Filter out DUPACKs if there are no OOO segments left */ + if (PREDICT_FALSE + (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK)) + { + tc0->snt_dupacks--; + ASSERT (tc0->snt_dupacks >= 0); + if (!tcp_session_has_ooo_data (tc0)) + { + error0 = TCP_ERROR_FILTERED_DUPACKS; + next0 = TCP_OUTPUT_NEXT_DROP; + goto done; + } + } + + /* Retransmitted SYNs do reach this but it should be harmless */ + tc0->rcv_las = tc0->rcv_nxt; + + /* Stop DELACK timer and fix flags */ + tc0->flags &= + ~(TCP_CONN_SNDACK | TCP_CONN_DELACK | TCP_CONN_BURSTACK); + if (tcp_timer_is_active (tc0, TCP_TIMER_DELACK)) + { + tcp_timer_reset (tc0, TCP_TIMER_DELACK); + } + + /* If not retransmitting + * 1) update snd_una_max (SYN, SYNACK, new data, FIN) + * 2) If we're not tracking an ACK, start tracking */ + if (seq_lt (tc0->snd_una_max, tc0->snd_nxt)) + { + tc0->snd_una_max = tc0->snd_nxt; + if (tc0->rtt_ts == 0) + { + tc0->rtt_ts = tcp_time_now (); + tc0->rtt_seq = tc0->snd_nxt; + } + } + + /* Set the retransmit timer if not set already and not + * doing a pure ACK */ + if (!tcp_timer_is_active (tc0, TCP_TIMER_RETRANSMIT) + && tc0->snd_nxt != tc0->snd_una) + { + tcp_retransmit_timer_set (tm, tc0); + tc0->rto_boff = 0; + } + + /* set fib index to default and lookup node */ + /* XXX network virtualization (vrf/vni) */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + + done: + b0->error = error0 != 0 ? node->errors[error0] : 0; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static uword +tcp4_output (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +tcp6_output (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +VLIB_REGISTER_NODE (tcp4_output_node) = +{ + .function = tcp4_output,.name = "tcp4-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32),.n_errors = TCP_N_ERROR,.error_strings = + tcp_error_strings,.n_next_nodes = TCP_OUTPUT_N_NEXT,.next_nodes = + { +#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, + foreach_tcp4_output_next +#undef _ + } +,.format_buffer = format_tcp_header,.format_trace = format_tcp_tx_trace,}; + +VLIB_NODE_FUNCTION_MULTIARCH (tcp4_output_node, tcp4_output) +VLIB_REGISTER_NODE (tcp6_output_node) = +{ + .function = tcp6_output,.name = "tcp6-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32),.n_errors = TCP_N_ERROR,.error_strings = + tcp_error_strings,.n_next_nodes = TCP_OUTPUT_N_NEXT,.next_nodes = + { +#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, + foreach_tcp6_output_next +#undef _ + } +,.format_buffer = format_tcp_header,.format_trace = format_tcp_tx_trace,}; + +VLIB_NODE_FUNCTION_MULTIARCH (tcp6_output_node, tcp6_output) u32 +tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +{ + tcp_connection_t *tc; + + tc = (tcp_connection_t *) tconn; + tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED); + return 0; +} + +typedef enum _tcp_reset_next +{ + TCP_RESET_NEXT_DROP, + TCP_RESET_NEXT_IP_LOOKUP, + TCP_RESET_N_NEXT +} tcp_reset_next_t; + +#define foreach_tcp4_reset_next \ + _(DROP, "error-drop") \ + _(IP_LOOKUP, "ip4-lookup") + +#define foreach_tcp6_reset_next \ + _(DROP, "error-drop") \ + _(IP_LOOKUP, "ip6-lookup") + +static uword +tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, u8 is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->cpu_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (tcp_make_reset_in_place (vm, b0, vnet_buffer (b0)->tcp.flags, + my_thread_index, is_ip4)) + { + error0 = TCP_ERROR_LOOKUP_DROPS; + next0 = TCP_RESET_NEXT_DROP; + goto done; + } + + /* Prepare to send to IP lookup */ + vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0; + next0 = TCP_RESET_NEXT_IP_LOOKUP; + + done: + b0->error = error0 != 0 ? node->errors[error0] : 0; + b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static uword +tcp4_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_send_reset_inline (vm, node, from_frame, 1); +} + +static uword +tcp6_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return tcp46_send_reset_inline (vm, node, from_frame, 0); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp4_reset_node) = { + .function = tcp4_send_reset, + .name = "tcp4-reset", + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_RESET_N_NEXT, + .next_nodes = { +#define _(s,n) [TCP_RESET_NEXT_##s] = n, + foreach_tcp4_reset_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (tcp6_reset_node) = { + .function = tcp6_send_reset, + .name = "tcp6-reset", + .vector_size = sizeof (u32), + .n_errors = TCP_N_ERROR, + .error_strings = tcp_error_strings, + .n_next_nodes = TCP_RESET_N_NEXT, + .next_nodes = { +#define _(s,n) [TCP_RESET_NEXT_##s] = n, + foreach_tcp6_reset_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_packet.h b/src/vnet/tcp/tcp_packet.h new file mode 100644 index 00000000..866c5fd6 --- /dev/null +++ b/src/vnet/tcp/tcp_packet.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_tcp_packet_h +#define included_tcp_packet_h + +#include + +/* TCP flags bit 0 first. */ +#define foreach_tcp_flag \ + _ (FIN) /**< No more data from sender. */ \ + _ (SYN) /**< Synchronize sequence numbers. */ \ + _ (RST) /**< Reset the connection. */ \ + _ (PSH) /**< Push function. */ \ + _ (ACK) /**< Ack field significant. */ \ + _ (URG) /**< Urgent pointer field significant. */ \ + _ (ECE) /**< ECN-echo. Receiver got CE packet */ \ + _ (CWR) /**< Sender reduced congestion window */ + +enum +{ +#define _(f) TCP_FLAG_BIT_##f, + foreach_tcp_flag +#undef _ + TCP_N_FLAG_BITS, +}; + +enum +{ +#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f, + foreach_tcp_flag +#undef _ +}; + +typedef struct _tcp_header +{ + union + { + struct + { + u16 src_port; /**< Source port. */ + u16 dst_port; /**< Destination port. */ + }; + struct + { + u16 src, dst; + }; + }; + + u32 seq_number; /**< Sequence number of the first data octet in this + * segment, except when SYN is present. If SYN + * is present the seq number is is the ISN and the + * first data octet is ISN+1 */ + u32 ack_number; /**< Acknowledgement number if ACK is set. It contains + * the value of the next sequence number the sender + * of the segment is expecting to receive. */ + u8 data_offset_and_reserved; + u8 flags; /**< Flags: see the macro above */ + u16 window; /**< Number of bytes sender is willing to receive. */ + + u16 checksum; /**< Checksum of TCP pseudo header and data. */ + u16 urgent_pointer; /**< Seq number of the byte after the urgent data. */ +} __attribute__ ((packed)) tcp_header_t; + +/* Flag tests that return 0 or !0 */ +#define tcp_doff(_th) ((_th)->data_offset_and_reserved >> 4) +#define tcp_fin(_th) ((_th)->flags & TCP_FLAG_FIN) +#define tcp_syn(_th) ((_th)->flags & TCP_FLAG_SYN) +#define tcp_rst(_th) ((_th)->flags & TCP_FLAG_RST) +#define tcp_psh(_th) ((_th)->flags & TCP_FLAG_PSH) +#define tcp_ack(_th) ((_th)->flags & TCP_FLAG_ACK) +#define tcp_urg(_th) ((_th)->flags & TCP_FLAG_URG) +#define tcp_ece(_th) ((_th)->flags & TCP_FLAG_ECE) +#define tcp_cwr(_th) ((_th)->flags & TCP_FLAG_CWR) + +/* Flag tests that return 0 or 1 */ +#define tcp_is_syn(_th) !!((_th)->flags & TCP_FLAG_SYN) +#define tcp_is_fin(_th) !!((_th)->flags & TCP_FLAG_FIN) + +always_inline int +tcp_header_bytes (tcp_header_t * t) +{ + return tcp_doff (t) * sizeof (u32); +} + +/* + * TCP options. + */ + +typedef enum tcp_option_type +{ + TCP_OPTION_EOL = 0, /**< End of options. */ + TCP_OPTION_NOOP = 1, /**< No operation. */ + TCP_OPTION_MSS = 2, /**< Limit MSS. */ + TCP_OPTION_WINDOW_SCALE = 3, /**< Window scale. */ + TCP_OPTION_SACK_PERMITTED = 4, /**< Selective Ack permitted. */ + TCP_OPTION_SACK_BLOCK = 5, /**< Selective Ack block. */ + TCP_OPTION_TIMESTAMP = 8, /**< Timestamps. */ + TCP_OPTION_UTO = 28, /**< User timeout. */ + TCP_OPTION_AO = 29, /**< Authentication Option. */ +} tcp_option_type_t; + +#define foreach_tcp_options_flag \ + _ (MSS) /**< MSS advertised in SYN */ \ + _ (TSTAMP) /**< Timestamp capability advertised in SYN */ \ + _ (WSCALE) /**< Wnd scale capability advertised in SYN */ \ + _ (SACK_PERMITTED) /**< SACK capability advertised in SYN */ \ + _ (SACK) /**< SACK present */ + +enum +{ +#define _(f) TCP_OPTS_FLAG_BIT_##f, + foreach_tcp_options_flag +#undef _ + TCP_OPTIONS_N_FLAG_BITS, +}; + +enum +{ +#define _(f) TCP_OPTS_FLAG_##f = 1 << TCP_OPTS_FLAG_BIT_##f, + foreach_tcp_options_flag +#undef _ +}; + +typedef struct _sack_block +{ + u32 start; /**< Start sequence number */ + u32 end; /**< End sequence number */ +} sack_block_t; + +typedef struct +{ + u8 flags; /** Option flags, see above */ + + /* Received options */ + u16 mss; /**< Maximum segment size advertised by peer */ + u8 wscale; /**< Window scale advertised by peer */ + u32 tsval; /**< Peer's timestamp value */ + u32 tsecr; /**< Echoed/reflected time stamp */ + sack_block_t *sacks; /**< SACK blocks received */ + u8 n_sack_blocks; /**< Number of SACKs blocks */ +} tcp_options_t; + +/* Flag tests that return 0 or !0 */ +#define tcp_opts_mss(_to) ((_to)->flags & TCP_OPTS_FLAG_MSS) +#define tcp_opts_tstamp(_to) ((_to)->flags & TCP_OPTS_FLAG_TSTAMP) +#define tcp_opts_wscale(_to) ((_to)->flags & TCP_OPTS_FLAG_WSCALE) +#define tcp_opts_sack(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK) +#define tcp_opts_sack_permitted(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK_PERMITTED) + +/* TCP option lengths */ +#define TCP_OPTION_LEN_EOL 1 +#define TCP_OPTION_LEN_NOOP 1 +#define TCP_OPTION_LEN_MSS 4 +#define TCP_OPTION_LEN_WINDOW_SCALE 3 +#define TCP_OPTION_LEN_SACK_PERMITTED 2 +#define TCP_OPTION_LEN_TIMESTAMP 10 +#define TCP_OPTION_LEN_SACK_BLOCK 8 + +#define TCP_WND_MAX 65535U +#define TCP_MAX_WND_SCALE 14 /* See RFC 1323 */ +#define TCP_OPTS_ALIGN 4 +#define TCP_OPTS_MAX_SACK_BLOCKS 3 +#endif /* included_tcp_packet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_pg.c b/src/vnet/tcp/tcp_pg.c new file mode 100644 index 00000000..dc324049 --- /dev/null +++ b/src/vnet/tcp/tcp_pg.c @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/tcp_pg: TCP packet-generator interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +/* TCP flags bit 0 first. */ +#define foreach_tcp_flag \ + _ (FIN) \ + _ (SYN) \ + _ (RST) \ + _ (PSH) \ + _ (ACK) \ + _ (URG) \ + _ (ECE) \ + _ (CWR) + +static void +tcp_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, + u32 n_packets) +{ + vlib_main_t * vm = vlib_get_main(); + u32 ip_offset, tcp_offset; + + tcp_offset = g->start_byte_offset; + ip_offset = (g-1)->start_byte_offset; + + while (n_packets >= 1) + { + vlib_buffer_t * p0; + ip4_header_t * ip0; + tcp_header_t * tcp0; + ip_csum_t sum0; + u32 tcp_len0; + + p0 = vlib_get_buffer (vm, packets[0]); + n_packets -= 1; + packets += 1; + + ASSERT (p0->current_data == 0); + ip0 = (void *) (p0->data + ip_offset); + tcp0 = (void *) (p0->data + tcp_offset); + tcp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]); + + /* Initialize checksum with header. */ + if (BITS (sum0) == 32) + { + sum0 = clib_mem_unaligned (&ip0->src_address, u32); + sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32)); + } + else + sum0 = clib_mem_unaligned (&ip0->src_address, u64); + + sum0 = ip_csum_with_carry + (sum0, clib_host_to_net_u32 (tcp_len0 + (ip0->protocol << 16))); + + /* Invalidate possibly old checksum. */ + tcp0->checksum = 0; + + sum0 = ip_incremental_checksum_buffer (vm, p0, tcp_offset, tcp_len0, sum0); + + tcp0->checksum = ~ ip_csum_fold (sum0); + } +} + +typedef struct { + pg_edit_t src, dst; + pg_edit_t seq_number, ack_number; + pg_edit_t data_offset_and_reserved; +#define _(f) pg_edit_t f##_flag; + foreach_tcp_flag +#undef _ + pg_edit_t window; + pg_edit_t checksum; + pg_edit_t urgent_pointer; +} pg_tcp_header_t; + +static inline void +pg_tcp_header_init (pg_tcp_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, tcp_header_t, f); + _ (src); + _ (dst); + _ (seq_number); + _ (ack_number); + _ (window); + _ (checksum); + _ (urgent_pointer); +#undef _ + + /* Initialize bit fields. */ +#define _(f) \ + pg_edit_init_bitfield (&p->f##_flag, tcp_header_t, \ + flags, \ + TCP_FLAG_BIT_##f, 1); + + foreach_tcp_flag +#undef _ + + pg_edit_init_bitfield (&p->data_offset_and_reserved, tcp_header_t, + data_offset_and_reserved, + 4, 4); +} + +uword +unformat_pg_tcp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_tcp_header_t * p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (tcp_header_t), + &group_index); + pg_tcp_header_init (p); + + /* Defaults. */ + pg_edit_set_fixed (&p->seq_number, 0); + pg_edit_set_fixed (&p->ack_number, 0); + + pg_edit_set_fixed (&p->data_offset_and_reserved, + sizeof (tcp_header_t) / sizeof (u32)); + + pg_edit_set_fixed (&p->window, 4096); + pg_edit_set_fixed (&p->urgent_pointer, 0); + +#define _(f) pg_edit_set_fixed (&p->f##_flag, 0); + foreach_tcp_flag +#undef _ + + p->checksum.type = PG_EDIT_UNSPECIFIED; + + if (! unformat (input, "TCP: %U -> %U", + unformat_pg_edit, + unformat_tcp_udp_port, &p->src, + unformat_pg_edit, + unformat_tcp_udp_port, &p->dst)) + goto error; + + /* Parse options. */ + while (1) + { + if (unformat (input, "window %U", + unformat_pg_edit, + unformat_pg_number, &p->window)) + ; + + else if (unformat (input, "checksum %U", + unformat_pg_edit, + unformat_pg_number, &p->checksum)) + ; + + /* Flags. */ +#define _(f) else if (unformat (input, #f)) pg_edit_set_fixed (&p->f##_flag, 1); + foreach_tcp_flag +#undef _ + + /* Can't parse input: try next protocol level. */ + else + break; + } + + { + ip_main_t * im = &ip_main; + u16 dst_port; + tcp_udp_port_info_t * pi; + + pi = 0; + if (p->dst.type == PG_EDIT_FIXED) + { + dst_port = pg_edit_get_value (&p->dst, PG_EDIT_LO); + pi = ip_get_tcp_udp_port_info (im, dst_port); + } + + if (pi && pi->unformat_pg_edit + && unformat_user (input, pi->unformat_pg_edit, s)) + ; + + else if (! unformat_user (input, unformat_pg_payload, s)) + goto error; + + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t * g = pg_stream_get_group (s, group_index); + g->edit_function = tcp_pg_edit_function; + g->edit_function_opaque = 0; + } + + return 1; + } + + error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + diff --git a/src/vnet/tcp/tcp_syn_filter4.c b/src/vnet/tcp/tcp_syn_filter4.c new file mode 100644 index 00000000..c7605a30 --- /dev/null +++ b/src/vnet/tcp/tcp_syn_filter4.c @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +typedef struct +{ + f64 next_reset; + f64 reset_interval; + u8 *syn_counts; +} syn_filter4_runtime_t; + +typedef struct +{ + u32 next_index; + int not_a_syn; + u8 filter_value; +} syn_filter4_trace_t; + +/* packet trace format function */ +static u8 * +format_syn_filter4_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + syn_filter4_trace_t *t = va_arg (*args, syn_filter4_trace_t *); + + s = format (s, "SYN_FILTER4: next index %d, %s", + t->next_index, t->not_a_syn ? "not a syn" : "syn"); + if (t->not_a_syn == 0) + s = format (s, ", filter value %d\n", t->filter_value); + else + s = format (s, "\n"); + return s; +} + +static vlib_node_registration_t syn_filter4_node; + +#define foreach_syn_filter_error \ +_(THROTTLED, "TCP SYN packet throttle drops") \ +_(OK, "TCP SYN packets passed") + +typedef enum +{ +#define _(sym,str) SYN_FILTER_ERROR_##sym, + foreach_syn_filter_error +#undef _ + SYN_FILTER_N_ERROR, +} syn_filter_error_t; + +static char *syn_filter4_error_strings[] = { +#define _(sym,string) string, + foreach_syn_filter_error +#undef _ +}; + +typedef enum +{ + SYN_FILTER_NEXT_DROP, + SYN_FILTER_N_NEXT, +} syn_filter_next_t; + +extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local; + +static uword +syn_filter4_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + syn_filter_next_t next_index; + u32 ok_syn_packets = 0; + vnet_feature_main_t *fm = &feature_main; + u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; + vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index]; + syn_filter4_runtime_t *rt = (syn_filter4_runtime_t *) node->runtime_data; + f64 now = vlib_time_now (vm); + /* Shut up spurious gcc warnings. */ + u8 *c0 = 0, *c1 = 0, *c2 = 0, *c3 = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (now > rt->next_reset) + { + memset (rt->syn_counts, 0, vec_len (rt->syn_counts)); + rt->next_reset = now + rt->reset_interval; + } + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + ip4_header_t *ip0, *ip1, *ip2, *ip3; + tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3; + u32 not_a_syn0 = 1, not_a_syn1 = 1, not_a_syn2 = 1, not_a_syn3 = 1; + u64 hash0, hash1, hash2, hash3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + vnet_get_config_data + (&cm->config_main, &b0->current_config_index, + &next0, 0 /* sizeof (c0[0]) */ ); + vnet_get_config_data + (&cm->config_main, &b1->current_config_index, + &next1, 0 /* sizeof (c0[0]) */ ); + vnet_get_config_data + (&cm->config_main, &b2->current_config_index, + &next2, 0 /* sizeof (c0[0]) */ ); + vnet_get_config_data + (&cm->config_main, &b3->current_config_index, + &next3, 0 /* sizeof (c0[0]) */ ); + + /* Not TCP? */ + ip0 = vlib_buffer_get_current (b0); + if (ip0->protocol != IP_PROTOCOL_TCP) + goto trace00; + + tcp0 = ip4_next_header (ip0); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp0->flags & 0x2))) + goto trace00; + + not_a_syn0 = 0; + hash0 = clib_xxhash ((u64) ip0->src_address.as_u32); + c0 = &rt->syn_counts[hash0 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c0 >= 0x80)) + { + next0 = SYN_FILTER_NEXT_DROP; + b0->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace00; + } + *c0 += 1; + ok_syn_packets++; + + trace00: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->not_a_syn = not_a_syn0; + t->next_index = next0; + t->filter_value = not_a_syn0 ? 0 : *c0; + } + + /* Not TCP? */ + ip1 = vlib_buffer_get_current (b1); + if (ip1->protocol != IP_PROTOCOL_TCP) + goto trace01; + + tcp1 = ip4_next_header (ip1); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp1->flags & 0x2))) + goto trace01; + + not_a_syn1 = 0; + hash1 = clib_xxhash ((u64) ip1->src_address.as_u32); + c1 = &rt->syn_counts[hash1 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c1 >= 0x80)) + { + next1 = SYN_FILTER_NEXT_DROP; + b1->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace01; + } + *c1 += 1; + ok_syn_packets++; + + trace01: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->not_a_syn = not_a_syn1; + t->next_index = next1; + t->filter_value = not_a_syn1 ? 0 : *c1; + } + + /* Not TCP? */ + ip2 = vlib_buffer_get_current (b2); + if (ip2->protocol != IP_PROTOCOL_TCP) + goto trace02; + + tcp2 = ip4_next_header (ip2); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp2->flags & 0x2))) + goto trace02; + + not_a_syn2 = 0; + hash2 = clib_xxhash ((u64) ip2->src_address.as_u32); + c2 = &rt->syn_counts[hash2 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c2 >= 0x80)) + { + next2 = SYN_FILTER_NEXT_DROP; + b2->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace02; + } + *c2 += 1; + ok_syn_packets++; + + trace02: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b2->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b2, sizeof (*t)); + t->not_a_syn = not_a_syn2; + t->next_index = next2; + t->filter_value = not_a_syn2 ? 0 : *c2; + } + + /* Not TCP? */ + ip3 = vlib_buffer_get_current (b3); + if (ip3->protocol != IP_PROTOCOL_TCP) + goto trace03; + + tcp3 = ip4_next_header (ip3); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp3->flags & 0x2))) + goto trace03; + + not_a_syn3 = 0; + hash3 = clib_xxhash ((u64) ip3->src_address.as_u32); + c3 = &rt->syn_counts[hash3 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c3 >= 0x80)) + { + next3 = SYN_FILTER_NEXT_DROP; + b3->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace03; + } + *c3 += 1; + ok_syn_packets++; + + trace03: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b3->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b3, sizeof (*t)); + t->not_a_syn = not_a_syn3; + t->next_index = next3; + t->filter_value = not_a_syn3 ? 0 : *c3; + } + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + ip4_header_t *ip0; + tcp_header_t *tcp0; + u32 not_a_syn0 = 1; + u32 hash0; + u8 *c0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + vnet_get_config_data + (&cm->config_main, &b0->current_config_index, + &next0, 0 /* sizeof (c0[0]) */ ); + + /* Not TCP? */ + ip0 = vlib_buffer_get_current (b0); + if (ip0->protocol != IP_PROTOCOL_TCP) + goto trace0; + + tcp0 = ip4_next_header (ip0); + /* + * Not a SYN? + * $$$$ hack: the TCP bitfield flags seem not to compile + * correct code. + */ + if (PREDICT_TRUE (!(tcp0->flags & 0x2))) + goto trace0; + + not_a_syn0 = 0; + hash0 = clib_xxhash ((u64) ip0->src_address.as_u32); + c0 = &rt->syn_counts[hash0 & (_vec_len (rt->syn_counts) - 1)]; + if (PREDICT_FALSE (*c0 >= 0x80)) + { + next0 = SYN_FILTER_NEXT_DROP; + b0->error = node->errors[SYN_FILTER_ERROR_THROTTLED]; + goto trace0; + } + *c0 += 1; + ok_syn_packets++; + + trace0: + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + syn_filter4_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->not_a_syn = not_a_syn0; + t->next_index = next0; + t->filter_value = not_a_syn0 ? 0 : *c0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, syn_filter4_node.index, + SYN_FILTER_ERROR_OK, ok_syn_packets); + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (syn_filter4_node, static) = +{ + .function = syn_filter4_node_fn, + .name = "syn-filter-4", + .vector_size = sizeof (u32), + .format_trace = format_syn_filter4_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .runtime_data_bytes = sizeof (syn_filter4_runtime_t), + .n_errors = ARRAY_LEN(syn_filter4_error_strings), + .error_strings = syn_filter4_error_strings, + + .n_next_nodes = SYN_FILTER_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SYN_FILTER_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (syn_filter4_node, syn_filter4_node_fn); + +/* *INDENT-OFF* */ +VNET_FEATURE_INIT (syn_filter_4, static) = +{ + .arc_name = "ip4-local", + .node_name = "syn-filter-4", + .runs_before = VNET_FEATURES("ip4-local-end-of-arc"), +}; +/* *INDENT-ON* */ + +int +syn_filter_enable_disable (u32 sw_if_index, int enable_disable) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *sw; + int rv = 0; + + /* Utterly wrong? */ + if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + /* Not a physical port? */ + sw = vnet_get_sw_interface (vnm, sw_if_index); + if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if (enable_disable) + { + vlib_main_t *vm = vlib_get_main (); + syn_filter4_runtime_t *rt; + + rt = vlib_node_get_runtime_data (vm, syn_filter4_node.index); + vec_validate (rt->syn_counts, 1023); + /* + * Given perfect disperson / optimal hashing results: + * Allow 128k (successful) syns/sec. 1024, buckets each of which + * absorb 128 syns before filtering. Reset table once a second. + * Reality bites, lets try resetting once every 100ms. + */ + rt->reset_interval = 0.1; /* reset interval in seconds */ + } + + rv = vnet_feature_enable_disable ("ip4-local", "syn-filter-4", + sw_if_index, enable_disable, 0, 0); + + return rv; +} + +static clib_error_t * +syn_filter_enable_disable_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + u32 sw_if_index = ~0; + int enable_disable = 1; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "disable")) + enable_disable = 0; + else if (unformat (input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "Please specify an interface..."); + + rv = syn_filter_enable_disable (sw_if_index, enable_disable); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return + (0, "Invalid interface, only works on physical ports"); + break; + + case VNET_API_ERROR_UNIMPLEMENTED: + return clib_error_return (0, + "Device driver doesn't support redirection"); + break; + + case VNET_API_ERROR_INVALID_VALUE: + return clib_error_return (0, "feature arc not found"); + + case VNET_API_ERROR_INVALID_VALUE_2: + return clib_error_return (0, "feature node not found"); + + default: + return clib_error_return (0, "syn_filter_enable_disable returned %d", + rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_content_command, static) = +{ + .path = "ip syn filter", + .short_help = "ip syn filter [disable]", + .function = syn_filter_enable_disable_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_timer.h b/src/vnet/tcp/tcp_timer.h new file mode 100644 index 00000000..fa25268c --- /dev/null +++ b/src/vnet/tcp/tcp_timer.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_tcp_timer_h__ +#define __included_tcp_timer_h__ + +#include +#include + +#endif /* __included_tcp_timer_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/builtin_server.c b/src/vnet/udp/builtin_server.c new file mode 100644 index 00000000..afa66ba4 --- /dev/null +++ b/src/vnet/udp/builtin_server.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** @file + udp builtin server +*/ + +#include +#include +#include + +/** per-worker built-in server copy buffers */ +u8 **copy_buffers; + +static int +builtin_session_create_callback (stream_session_t * s) +{ + /* Simple version: declare session ready-to-go... */ + s->session_state = SESSION_STATE_READY; + return 0; +} + +static void +builtin_session_disconnect_callback (stream_session_t * s) +{ + stream_session_disconnect (s); +} + +static int +builtin_server_rx_callback (stream_session_t * s) +{ + svm_fifo_t *rx_fifo, *tx_fifo; + u32 this_transfer; + int actual_transfer; + u8 *my_copy_buffer; + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + + my_copy_buffer = copy_buffers[s->thread_index]; + rx_fifo = s->server_rx_fifo; + tx_fifo = s->server_tx_fifo; + + this_transfer = svm_fifo_max_enqueue (tx_fifo) + < svm_fifo_max_dequeue (rx_fifo) ? + svm_fifo_max_enqueue (tx_fifo) : svm_fifo_max_dequeue (rx_fifo); + + vec_validate (my_copy_buffer, this_transfer - 1); + _vec_len (my_copy_buffer) = this_transfer; + + actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, 0, this_transfer, + my_copy_buffer); + ASSERT (actual_transfer == this_transfer); + actual_transfer = svm_fifo_enqueue_nowait (tx_fifo, 0, this_transfer, + my_copy_buffer); + + copy_buffers[s->thread_index] = my_copy_buffer; + + /* Fabricate TX event, send to ourselves */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + /* $$$$ for event logging */ + evt.enqueue_length = actual_transfer; + evt.event_id = 0; + q = session_manager_get_vpp_event_queue (s->thread_index); + unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* do wait for mutex */ ); + + return 0; +} + +/* *INDENT-OFF* */ +static session_cb_vft_t builtin_server = { + .session_accept_callback = builtin_session_create_callback, + .session_disconnect_callback = builtin_session_disconnect_callback, + .builtin_server_rx_callback = builtin_server_rx_callback +}; +/* *INDENT-ON* */ + +static int +bind_builtin_uri_server (u8 * uri) +{ + vnet_bind_args_t _a, *a = &_a; + char segment_name[128]; + u32 segment_name_length; + int rv; + u64 options[16]; + + segment_name_length = ARRAY_LEN (segment_name); + + memset (a, 0, sizeof (*a)); + memset (options, 0, sizeof (options)); + + a->uri = (char *) uri; + a->api_client_index = ~0; /* built-in server */ + a->segment_name = segment_name; + a->segment_name_length = segment_name_length; + a->session_cb_vft = &builtin_server; + + options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678; + options[SESSION_OPTIONS_SEGMENT_SIZE] = (2 << 30); /*$$$$ config / arg */ + a->options = options; + + rv = vnet_bind_uri (a); + + return rv; +} + +static int +unbind_builtin_uri_server (u8 * uri) +{ + int rv; + + rv = vnet_unbind_uri ((char *) uri, ~0 /* client_index */ ); + + return rv; +} + +static clib_error_t * +builtin_server_init (vlib_main_t * vm) +{ + vlib_thread_main_t *vtm = vlib_get_thread_main (); + u32 num_threads; + + num_threads = 1 /* main thread */ + vtm->n_threads; + + vec_validate (copy_buffers, num_threads - 1); + return 0; +} + +VLIB_INIT_FUNCTION (builtin_server_init); + +static clib_error_t * +builtin_uri_bind_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 *uri = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "uri %s", &uri)) + ; + else + break; + } + + if (uri == 0) + return clib_error_return (0, "uri to bind not specified..."); + + rv = bind_builtin_uri_server (uri); + + vec_free (uri); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return (0, "bind_uri_server returned %d", rv); + break; + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (builtin_uri_bind_command, static) = +{ + .path = "builtin uri bind", + .short_help = "builtin uri bind", + .function = builtin_uri_bind_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +builtin_uri_unbind_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 *uri = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "uri %s", &uri)) + ; + else + break; + } + + if (uri == 0) + return clib_error_return (0, "uri to unbind not specified..."); + + rv = unbind_builtin_uri_server (uri); + + vec_free (uri); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return (0, "unbind_uri_server returned %d", rv); + break; + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (builtin_uri_unbind_command, static) = +{ + .path = "builtin uri unbind", + .short_help = "builtin uri unbind", + .function = builtin_uri_unbind_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c new file mode 100644 index 00000000..9e740466 --- /dev/null +++ b/src/vnet/udp/udp.c @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** @file + udp state machine, etc. +*/ + +#include +#include +#include +#include + +udp_uri_main_t udp_uri_main; + +u32 +udp_session_bind_ip4 (vlib_main_t * vm, u32 session_index, + ip46_address_t * ip, u16 port_number_host_byte_order) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + udp_connection_t *listener; + + pool_get (um->udp_listeners, listener); + memset (listener, 0, sizeof (udp_connection_t)); + listener->c_lcl_port = clib_host_to_net_u16 (port_number_host_byte_order); + listener->c_lcl_ip4.as_u32 = ip->ip4.as_u32; + listener->c_proto = SESSION_TYPE_IP4_UDP; + udp_register_dst_port (um->vlib_main, port_number_host_byte_order, + udp4_uri_input_node.index, 1 /* is_ipv4 */ ); + return 0; +} + +u32 +udp_session_bind_ip6 (vlib_main_t * vm, u32 session_index, + ip46_address_t * ip, u16 port_number_host_byte_order) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + udp_connection_t *listener; + + pool_get (um->udp_listeners, listener); + listener->c_lcl_port = clib_host_to_net_u16 (port_number_host_byte_order); + clib_memcpy (&listener->c_lcl_ip6, &ip->ip6, sizeof (ip6_address_t)); + listener->c_proto = SESSION_TYPE_IP6_UDP; + udp_register_dst_port (um->vlib_main, port_number_host_byte_order, + udp4_uri_input_node.index, 0 /* is_ipv4 */ ); + return 0; +} + +u32 +udp_session_unbind_ip4 (vlib_main_t * vm, u32 listener_index) +{ + udp_connection_t *listener; + listener = udp_listener_get (listener_index); + + /* deregister the udp_local mapping */ + udp_unregister_dst_port (vm, listener->c_lcl_port, 1 /* is_ipv4 */ ); + return 0; +} + +u32 +udp_session_unbind_ip6 (vlib_main_t * vm, u32 listener_index) +{ + udp_connection_t *listener; + + listener = udp_listener_get (listener_index); + + /* deregister the udp_local mapping */ + udp_unregister_dst_port (vm, listener->c_lcl_port, 0 /* is_ipv4 */ ); + return 0; +} + +transport_connection_t * +udp_session_get_listener (u32 listener_index) +{ + udp_connection_t *us; + + us = udp_listener_get (listener_index); + return &us->connection; +} + +u32 +udp_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +{ + udp_connection_t *us; + u8 *data; + udp_header_t *udp; + + us = (udp_connection_t *) tconn; + + if (tconn->is_ip4) + { + ip4_header_t *ip; + + data = vlib_buffer_get_current (b); + udp = (udp_header_t *) (data - sizeof (udp_header_t)); + ip = (ip4_header_t *) ((u8 *) udp - sizeof (ip4_header_t)); + + /* Build packet header, swap rx key src + dst fields */ + ip->src_address.as_u32 = us->c_lcl_ip4.as_u32; + ip->dst_address.as_u32 = us->c_rmt_ip4.as_u32; + ip->ip_version_and_header_length = 0x45; + ip->ttl = 254; + ip->protocol = IP_PROTOCOL_UDP; + ip->length = clib_host_to_net_u16 (b->current_length + sizeof (*udp)); + ip->checksum = ip4_header_checksum (ip); + + udp->src_port = us->c_lcl_port; + udp->dst_port = us->c_rmt_port; + udp->length = clib_host_to_net_u16 (b->current_length); + udp->checksum = 0; + + b->current_length = sizeof (*ip) + sizeof (*udp); + return SESSION_QUEUE_NEXT_IP4_LOOKUP; + } + else + { + vlib_main_t *vm = vlib_get_main (); + ip6_header_t *ip; + u16 payload_length; + int bogus = ~0; + + data = vlib_buffer_get_current (b); + udp = (udp_header_t *) (data - sizeof (udp_header_t)); + ip = (ip6_header_t *) ((u8 *) udp - sizeof (ip6_header_t)); + + /* Build packet header, swap rx key src + dst fields */ + clib_memcpy (&ip->src_address, &us->c_lcl_ip6, sizeof (ip6_address_t)); + clib_memcpy (&ip->dst_address, &us->c_rmt_ip6, sizeof (ip6_address_t)); + + ip->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + + ip->hop_limit = 0xff; + ip->protocol = IP_PROTOCOL_UDP; + + payload_length = vlib_buffer_length_in_chain (vm, b); + payload_length -= sizeof (*ip); + + ip->payload_length = clib_host_to_net_u16 (payload_length); + + udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip, &bogus); + ASSERT (!bogus); + + udp->src_port = us->c_lcl_port; + udp->dst_port = us->c_rmt_port; + udp->length = clib_host_to_net_u16 (b->current_length); + udp->checksum = 0; + + b->current_length = sizeof (*ip) + sizeof (*udp); + + return SESSION_QUEUE_NEXT_IP6_LOOKUP; + } +} + +transport_connection_t * +udp_session_get (u32 connection_index, u32 my_thread_index) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + + udp_connection_t *us; + us = + pool_elt_at_index (um->udp_sessions[my_thread_index], connection_index); + return &us->connection; +} + +void +udp_session_close (u32 connection_index, u32 my_thread_index) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + pool_put_index (um->udp_sessions[my_thread_index], connection_index); +} + +u8 * +format_udp_session_ip4 (u8 * s, va_list * args) +{ + u32 uci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + udp_connection_t *u4; + + u4 = udp_connection_get (uci, thread_index); + + s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip4_address, + &u4->c_lcl_ip4, clib_net_to_host_u16 (u4->c_lcl_port), + format_ip4_address, &u4->c_rmt_ip4, + clib_net_to_host_u16 (u4->c_rmt_port)); + return s; +} + +u8 * +format_udp_session_ip6 (u8 * s, va_list * args) +{ + u32 uci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + udp_connection_t *tc = udp_connection_get (uci, thread_index); + s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_udp_listener_session_ip4 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + udp_connection_t *tc = udp_listener_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip4_address, + &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip4_address, &tc->c_rmt_ip4, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u8 * +format_udp_listener_session_ip6 (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + udp_connection_t *tc = udp_listener_get (tci); + s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip6_address, + &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port), + format_ip6_address, &tc->c_rmt_ip6, + clib_net_to_host_u16 (tc->c_rmt_port)); + return s; +} + +u16 +udp_send_mss_uri (transport_connection_t * t) +{ + /* TODO figure out MTU of output interface */ + return 400; +} + +u32 +udp_send_space_uri (transport_connection_t * t) +{ + /* No constraint on TX window */ + return ~0; +} + +int +udp_open_connection (ip46_address_t * addr, u16 port) +{ + clib_warning ("Not implemented"); + return 0; +} + +/* *INDENT-OFF* */ +const static transport_proto_vft_t udp4_proto = { + .bind = udp_session_bind_ip4, + .open = udp_open_connection, + .unbind = udp_session_unbind_ip4, + .push_header = udp_push_header, + .get_connection = udp_session_get, + .get_listener = udp_session_get_listener, + .close = udp_session_close, + .send_mss = udp_send_mss_uri, + .send_space = udp_send_space_uri, + .format_connection = format_udp_session_ip4, + .format_listener = format_udp_listener_session_ip4 +}; + +const static transport_proto_vft_t udp6_proto = { + .bind = udp_session_bind_ip6, + .open = udp_open_connection, + .unbind = udp_session_unbind_ip6, + .push_header = udp_push_header, + .get_connection = udp_session_get, + .get_listener = udp_session_get_listener, + .close = udp_session_close, + .send_mss = udp_send_mss_uri, + .send_space = udp_send_space_uri, + .format_connection = format_udp_session_ip6, + .format_listener = format_udp_listener_session_ip6 +}; +/* *INDENT-ON* */ + +static clib_error_t * +udp_init (vlib_main_t * vm) +{ + udp_uri_main_t *um = vnet_get_udp_main (); + ip_main_t *im = &ip_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 num_threads; + clib_error_t *error = 0; + ip_protocol_info_t *pi; + + um->vlib_main = vm; + um->vnet_main = vnet_get_main (); + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + if ((error = vlib_call_init_function (vm, ip4_lookup_init))) + return error; + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + /* + * Registrations + */ + + /* IP registration */ + pi = ip_get_protocol_info (im, IP_PROTOCOL_UDP); + if (pi == 0) + return clib_error_return (0, "UDP protocol info AWOL"); + pi->format_header = format_udp_header; + pi->unformat_pg_edit = unformat_pg_udp_header; + + + /* Register as transport with URI */ + session_register_transport (SESSION_TYPE_IP4_UDP, &udp4_proto); + session_register_transport (SESSION_TYPE_IP6_UDP, &udp6_proto); + + /* + * Initialize data structures + */ + + num_threads = 1 /* main thread */ + tm->n_threads; + vec_validate (um->udp_sessions, num_threads - 1); + + return error; +} + +VLIB_INIT_FUNCTION (udp_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h new file mode 100644 index 00000000..7ab26ce9 --- /dev/null +++ b/src/vnet/udp/udp.h @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_udp_h__ +#define __included_udp_h__ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +typedef struct +{ + transport_connection_t connection; /** must be first */ + + /** ersatz MTU to limit fifo pushes to test data size */ + u32 mtu; +} udp_connection_t; + +typedef struct _udp_uri_main +{ + /* Per-worker thread udp connection pools */ + udp_connection_t **udp_sessions; + udp_connection_t *udp_listeners; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + ip4_main_t *ip4_main; + ip6_main_t *ip6_main; +} udp_uri_main_t; + +extern udp_uri_main_t udp_uri_main; +extern vlib_node_registration_t udp4_uri_input_node; + +always_inline udp_uri_main_t * +vnet_get_udp_main () +{ + return &udp_uri_main; +} + +always_inline udp_connection_t * +udp_connection_get (u32 conn_index, u32 thread_index) +{ + return pool_elt_at_index (udp_uri_main.udp_sessions[thread_index], + conn_index); +} + +always_inline udp_connection_t * +udp_listener_get (u32 conn_index) +{ + return pool_elt_at_index (udp_uri_main.udp_listeners, conn_index); +} + +typedef enum +{ +#define udp_error(n,s) UDP_ERROR_##n, +#include +#undef udp_error + UDP_N_ERROR, +} udp_error_t; + +#define foreach_udp4_dst_port \ +_ (67, dhcp_to_server) \ +_ (68, dhcp_to_client) \ +_ (500, ikev2) \ +_ (3784, bfd4) \ +_ (3785, bfd_echo4) \ +_ (4341, lisp_gpe) \ +_ (4342, lisp_cp) \ +_ (4739, ipfix) \ +_ (4789, vxlan) \ +_ (4789, vxlan6) \ +_ (4790, vxlan_gpe) \ +_ (6633, vpath_3) + + +#define foreach_udp6_dst_port \ +_ (547, dhcpv6_to_server) \ +_ (546, dhcpv6_to_client) \ +_ (3784, bfd6) \ +_ (3785, bfd_echo6) \ +_ (4341, lisp_gpe6) \ +_ (4342, lisp_cp6) \ +_ (4790, vxlan6_gpe) \ +_ (6633, vpath6_3) + +typedef enum +{ +#define _(n,f) UDP_DST_PORT_##f = n, + foreach_udp4_dst_port foreach_udp6_dst_port +#undef _ +} udp_dst_port_t; + +typedef enum +{ +#define _(n,f) UDP6_DST_PORT_##f = n, + foreach_udp6_dst_port +#undef _ +} udp6_dst_port_t; + +typedef struct +{ + /* Name (a c string). */ + char *name; + + /* GRE protocol type in host byte order. */ + udp_dst_port_t dst_port; + + /* Node which handles this type. */ + u32 node_index; + + /* Next index for this type. */ + u32 next_index; +} udp_dst_port_info_t; + +typedef enum +{ + UDP_IP6 = 0, + UDP_IP4, /* the code is full of is_ip4... */ + N_UDP_AF, +} udp_af_t; + +typedef struct +{ + udp_dst_port_info_t *dst_port_infos[N_UDP_AF]; + + /* Hash tables mapping name/protocol to protocol info index. */ + uword *dst_port_info_by_name[N_UDP_AF]; + uword *dst_port_info_by_dst_port[N_UDP_AF]; + + /* convenience */ + vlib_main_t *vlib_main; +} udp_main_t; + +always_inline udp_dst_port_info_t * +udp_get_dst_port_info (udp_main_t * um, udp_dst_port_t dst_port, u8 is_ip4) +{ + uword *p = hash_get (um->dst_port_info_by_dst_port[is_ip4], dst_port); + return p ? vec_elt_at_index (um->dst_port_infos[is_ip4], p[0]) : 0; +} + +format_function_t format_udp_header; +format_function_t format_udp_rx_trace; + +unformat_function_t unformat_udp_header; + +void udp_register_dst_port (vlib_main_t * vm, + udp_dst_port_t dst_port, + u32 node_index, u8 is_ip4); + +void +udp_unregister_dst_port (vlib_main_t * vm, + udp_dst_port_t dst_port, u8 is_ip4); + +void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add); + +always_inline void +ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4) +{ + u16 new_l0; + udp_header_t *udp0; + + if (is_ip4) + { + ip4_header_t *ip0; + ip_csum_t sum0; + u16 old_l0 = 0; + + ip0 = vlib_buffer_get_current (b0); + + /* fix the ing outer-IP checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */ ); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + udp0->length = new_l0; + } + else + { + ip6_header_t *ip0; + int bogus0; + + ip0 = vlib_buffer_get_current (b0); + + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + ip0->payload_length = new_l0; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + udp0->length = new_l0; + + udp0->checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); + ASSERT (bogus0 == 0); + + if (udp0->checksum == 0) + udp0->checksum = 0xffff; + } +} + +always_inline void +ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, + u8 is_ip4) +{ + vlib_buffer_advance (b0, -ec_len); + + if (is_ip4) + { + ip4_header_t *ip0; + + ip0 = vlib_buffer_get_current (b0); + + /* Apply the encap string. */ + clib_memcpy (ip0, ec0, ec_len); + ip_udp_fixup_one (vm, b0, 1); + } + else + { + ip6_header_t *ip0; + + ip0 = vlib_buffer_get_current (b0); + + /* Apply the encap string. */ + clib_memcpy (ip0, ec0, ec_len); + ip_udp_fixup_one (vm, b0, 0); + } +} + +always_inline void +ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, + u8 * ec0, u8 * ec1, word ec_len, u8 is_v4) +{ + u16 new_l0, new_l1; + udp_header_t *udp0, *udp1; + + ASSERT (_vec_len (ec0) == _vec_len (ec1)); + + vlib_buffer_advance (b0, -ec_len); + vlib_buffer_advance (b1, -ec_len); + + if (is_v4) + { + ip4_header_t *ip0, *ip1; + ip_csum_t sum0, sum1; + u16 old_l0 = 0, old_l1 = 0; + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* Apply the encap string */ + clib_memcpy (ip0, ec0, ec_len); + clib_memcpy (ip1, ec1, ec_len); + + /* fix the ing outer-IP checksum */ + sum0 = ip0->checksum; + sum1 = ip1->checksum; + + /* old_l0 always 0, see the rewrite setup */ + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */ ); + sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, + length /* changed member */ ); + + ip0->checksum = ip_csum_fold (sum0); + ip1->checksum = ip_csum_fold (sum1); + + ip0->length = new_l0; + ip1->length = new_l1; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + udp1 = (udp_header_t *) (ip1 + 1); + + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - + sizeof (*ip0)); + new_l1 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) - + sizeof (*ip1)); + udp0->length = new_l0; + udp1->length = new_l1; + } + else + { + ip6_header_t *ip0, *ip1; + int bogus0, bogus1; + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* Apply the encap string. */ + clib_memcpy (ip0, ec0, ec_len); + clib_memcpy (ip1, ec1, ec_len); + + new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) + - sizeof (*ip0)); + new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) + - sizeof (*ip1)); + ip0->payload_length = new_l0; + ip1->payload_length = new_l1; + + /* Fix UDP length */ + udp0 = (udp_header_t *) (ip0 + 1); + udp1 = (udp_header_t *) (ip1 + 1); + + udp0->length = new_l0; + udp1->length = new_l1; + + udp0->checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0); + udp1->checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, b1, ip1, &bogus1); + ASSERT (bogus0 == 0); + ASSERT (bogus1 == 0); + + if (udp0->checksum == 0) + udp0->checksum = 0xffff; + if (udp1->checksum == 0) + udp1->checksum = 0xffff; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif /* __included_udp_h__ */ diff --git a/src/vnet/udp/udp_error.def b/src/vnet/udp/udp_error.def new file mode 100644 index 00000000..bfdae0ac --- /dev/null +++ b/src/vnet/udp/udp_error.def @@ -0,0 +1,21 @@ +/* + * udp_error.def: udp errors + * + * Copyright (c) 2013-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +udp_error (NONE, "no error") +udp_error (NO_LISTENER, "no listener for dst port") +udp_error (LENGTH_ERROR, "UDP packets with length errors") +udp_error (PUNT, "no listener punt") diff --git a/src/vnet/udp/udp_format.c b/src/vnet/udp/udp_format.c new file mode 100644 index 00000000..abdf561e --- /dev/null +++ b/src/vnet/udp/udp_format.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/udp_format.c: udp formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +/* Format UDP header. */ +u8 * +format_udp_header (u8 * s, va_list * args) +{ + udp_header_t *udp = va_arg (*args, udp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + uword indent; + u32 header_bytes = sizeof (udp[0]); + + /* Nothing to do. */ + if (max_header_bytes < sizeof (udp[0])) + return format (s, "UDP header truncated"); + + indent = format_get_indent (s); + indent += 2; + + s = format (s, "UDP: %d -> %d", + clib_net_to_host_u16 (udp->src_port), + clib_net_to_host_u16 (udp->dst_port)); + + s = format (s, "\n%Ulength %d, checksum 0x%04x", + format_white_space, indent, + clib_net_to_host_u16 (udp->length), + clib_net_to_host_u16 (udp->checksum)); + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ip_main_t *im = &ip_main; + tcp_udp_port_info_t *pi; + + pi = ip_get_tcp_udp_port_info (im, udp->dst_port); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", + format_white_space, indent - 2, pi->format_header, + /* next protocol header */ (udp + 1), + max_header_bytes - sizeof (udp[0])); + } + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c new file mode 100644 index 00000000..4d509335 --- /dev/null +++ b/src/vnet/udp/udp_input.c @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include "../session/application_interface.h" + +vlib_node_registration_t udp4_uri_input_node; + +typedef struct +{ + u32 session; + u32 disposition; + u32 thread_index; +} udp4_uri_input_trace_t; + +/* packet trace format function */ +static u8 * +format_udp4_uri_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + udp4_uri_input_trace_t *t = va_arg (*args, udp4_uri_input_trace_t *); + + s = format (s, "UDP4_URI_INPUT: session %d, disposition %d, thread %d", + t->session, t->disposition, t->thread_index); + return s; +} + +typedef enum +{ + UDP4_URI_INPUT_NEXT_DROP, + UDP4_URI_INPUT_N_NEXT, +} udp4_uri_input_next_t; + +static char *udp4_uri_input_error_strings[] = { +#define _(sym,string) string, + foreach_session_input_error +#undef _ +}; + +static uword +udp4_uri_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + udp4_uri_input_next_t next_index; + udp_uri_main_t *um = vnet_get_udp_main (); + session_manager_main_t *smm = vnet_get_session_manager_main (); + u32 my_thread_index = vm->cpu_index; + u8 my_enqueue_epoch; + u32 *session_indices_to_enqueue; + static u32 serial_number; + int i; + + my_enqueue_epoch = ++smm->current_enqueue_epoch[my_thread_index]; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = UDP4_URI_INPUT_NEXT_DROP; + u32 error0 = SESSION_ERROR_ENQUEUED; + udp_header_t *udp0; + ip4_header_t *ip0; + stream_session_t *s0; + svm_fifo_t *f0; + u16 udp_len0; + u8 *data0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* udp_local hands us a pointer to the udp data */ + + data0 = vlib_buffer_get_current (b0); + udp0 = (udp_header_t *) (data0 - sizeof (*udp0)); + + /* $$$$ fixme: udp_local doesn't do ip options correctly anyhow */ + ip0 = (ip4_header_t *) (((u8 *) udp0) - sizeof (*ip0)); + s0 = 0; + + /* lookup session */ + s0 = stream_session_lookup4 (&ip0->dst_address, &ip0->src_address, + udp0->dst_port, udp0->src_port, + SESSION_TYPE_IP4_UDP, my_thread_index); + + /* no listener */ + if (PREDICT_FALSE (s0 == 0)) + { + error0 = SESSION_ERROR_NO_LISTENER; + goto trace0; + } + + f0 = s0->server_rx_fifo; + + /* established hit */ + if (PREDICT_TRUE (s0->session_state == SESSION_STATE_READY)) + { + udp_len0 = clib_net_to_host_u16 (udp0->length); + + if (PREDICT_FALSE (udp_len0 > svm_fifo_max_enqueue (f0))) + { + error0 = SESSION_ERROR_FIFO_FULL; + goto trace0; + } + + svm_fifo_enqueue_nowait (f0, 0 /* pid */ , + udp_len0 - sizeof (*udp0), + (u8 *) (udp0 + 1)); + + b0->error = node->errors[SESSION_ERROR_ENQUEUED]; + + /* We need to send an RX event on this fifo */ + if (s0->enqueue_epoch != my_enqueue_epoch) + { + s0->enqueue_epoch = my_enqueue_epoch; + + vec_add1 (smm->session_indices_to_enqueue_by_thread + [my_thread_index], + s0 - smm->sessions[my_thread_index]); + } + } + /* listener hit */ + else if (s0->session_state == SESSION_STATE_LISTENING) + { + udp_connection_t *us; + int rv; + + error0 = SESSION_ERROR_NOT_READY; + + /* + * create udp transport session + */ + pool_get (um->udp_sessions[my_thread_index], us); + + us->mtu = 1024; /* $$$$ policy */ + + us->c_lcl_ip4.as_u32 = ip0->dst_address.as_u32; + us->c_rmt_ip4.as_u32 = ip0->src_address.as_u32; + us->c_lcl_port = udp0->dst_port; + us->c_rmt_port = udp0->src_port; + us->c_proto = SESSION_TYPE_IP4_UDP; + us->c_c_index = us - um->udp_sessions[my_thread_index]; + + /* + * create stream session and attach the udp session to it + */ + rv = stream_session_accept (&us->connection, s0->session_index, + SESSION_TYPE_IP4_UDP, + 1 /*notify */ ); + if (rv) + error0 = rv; + + } + else + { + + error0 = SESSION_ERROR_NOT_READY; + goto trace0; + } + + trace0: + b0->error = node->errors[error0]; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + udp4_uri_input_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + + t->session = ~0; + if (s0) + t->session = s0 - smm->sessions[my_thread_index]; + t->disposition = error0; + t->thread_index = my_thread_index; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Send enqueue events */ + + session_indices_to_enqueue = + smm->session_indices_to_enqueue_by_thread[my_thread_index]; + + for (i = 0; i < vec_len (session_indices_to_enqueue); i++) + { + session_fifo_event_t evt; + unix_shared_memory_queue_t *q; + stream_session_t *s0; + application_t *server0; + + /* Get session */ + s0 = pool_elt_at_index (smm->sessions[my_thread_index], + session_indices_to_enqueue[i]); + + /* Get session's server */ + server0 = application_get (s0->app_index); + + /* Built-in server? Deliver the goods... */ + if (server0->cb_fns.builtin_server_rx_callback) + { + server0->cb_fns.builtin_server_rx_callback (s0); + continue; + } + + /* Fabricate event */ + evt.fifo = s0->server_rx_fifo; + evt.event_type = FIFO_EVENT_SERVER_RX; + evt.event_id = serial_number++; + evt.enqueue_length = svm_fifo_max_dequeue (s0->server_rx_fifo); + + /* Add event to server's event queue */ + q = server0->event_queue; + + /* Don't block for lack of space */ + if (PREDICT_TRUE (q->cursize < q->maxsize)) + unix_shared_memory_queue_add (server0->event_queue, (u8 *) & evt, + 0 /* do wait for mutex */ ); + else + { + vlib_node_increment_counter (vm, udp4_uri_input_node.index, + SESSION_ERROR_FIFO_FULL, 1); + } + if (1) + { + ELOG_TYPE_DECLARE (e) = + { + .format = "evt-enqueue: id %d length %d",.format_args = "i4i4",}; + struct + { + u32 data[2]; + } *ed; + ed = ELOG_DATA (&vlib_global_main.elog_main, e); + ed->data[0] = evt.event_id; + ed->data[1] = evt.enqueue_length; + } + } + + vec_reset_length (session_indices_to_enqueue); + + smm->session_indices_to_enqueue_by_thread[my_thread_index] = + session_indices_to_enqueue; + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (udp4_uri_input_node) = +{ + .function = udp4_uri_input_node_fn,.name = "udp4-uri-input",.vector_size = + sizeof (u32),.format_trace = format_udp4_uri_input_trace,.type = + VLIB_NODE_TYPE_INTERNAL,.n_errors = + ARRAY_LEN (udp4_uri_input_error_strings),.error_strings = + udp4_uri_input_error_strings,.n_next_nodes = UDP4_URI_INPUT_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = + { + [UDP4_URI_INPUT_NEXT_DROP] = "error-drop",} +,}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp_local.c b/src/vnet/udp/udp_local.c new file mode 100644 index 00000000..6b239f73 --- /dev/null +++ b/src/vnet/udp/udp_local.c @@ -0,0 +1,666 @@ +/* + * node.c: udp packet processing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +udp_main_t udp_main; + +#define foreach_udp_input_next \ + _ (PUNT, "error-punt") \ + _ (DROP, "error-drop") \ + _ (ICMP4_ERROR, "ip4-icmp-error") \ + _ (ICMP6_ERROR, "ip6-icmp-error") + +typedef enum +{ +#define _(s,n) UDP_INPUT_NEXT_##s, + foreach_udp_input_next +#undef _ + UDP_INPUT_N_NEXT, +} udp_input_next_t; + +typedef struct +{ + u16 src_port; + u16 dst_port; + u8 bound; +} udp_rx_trace_t; + +u8 * +format_udp_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + udp_rx_trace_t *t = va_arg (*args, udp_rx_trace_t *); + + s = format (s, "UDP: src-port %d dst-port %d%s", + clib_net_to_host_u16 (t->src_port), + clib_net_to_host_u16 (t->dst_port), + t->bound ? "" : " (no listener)"); + return s; +} + +typedef struct +{ + /* Sparse vector mapping udp dst_port in network byte order + to next index. */ + u16 *next_by_dst_port; + u8 punt_unknown; +} udp_input_runtime_t; + +vlib_node_registration_t udp4_input_node; +vlib_node_registration_t udp6_input_node; + +always_inline uword +udp46_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + udp_input_runtime_t *rt = is_ip4 ? + (void *) vlib_node_get_runtime_data (vm, udp4_input_node.index) + : (void *) vlib_node_get_runtime_data (vm, udp6_input_node.index); + __attribute__ ((unused)) u32 n_left_from, next_index, *from, *to_next; + word n_no_listener = 0; + u8 punt_unknown = rt->punt_unknown; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + udp_header_t *h0 = 0, *h1 = 0; + u32 i0, i1, dst_port0, dst_port1; + u32 advance0, advance1; + u32 error0, next0, error1, next1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* ip4/6_local hands us the ip header, not the udp header */ + if (is_ip4) + { + advance0 = sizeof (ip4_header_t); + advance1 = sizeof (ip4_header_t); + } + else + { + advance0 = sizeof (ip6_header_t); + advance1 = sizeof (ip6_header_t); + } + + if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) + { + error0 = UDP_ERROR_LENGTH_ERROR; + next0 = UDP_INPUT_NEXT_DROP; + } + else + { + vlib_buffer_advance (b0, advance0); + h0 = vlib_buffer_get_current (b0); + error0 = next0 = 0; + if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) > + vlib_buffer_length_in_chain (vm, b0))) + { + error0 = UDP_ERROR_LENGTH_ERROR; + next0 = UDP_INPUT_NEXT_DROP; + } + } + + if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1))) + { + error1 = UDP_ERROR_LENGTH_ERROR; + next1 = UDP_INPUT_NEXT_DROP; + } + else + { + vlib_buffer_advance (b1, advance1); + h1 = vlib_buffer_get_current (b1); + error1 = next1 = 0; + if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) > + vlib_buffer_length_in_chain (vm, b1))) + { + error1 = UDP_ERROR_LENGTH_ERROR; + next1 = UDP_INPUT_NEXT_DROP; + } + } + + /* Index sparse array with network byte order. */ + dst_port0 = (error0 == 0) ? h0->dst_port : 0; + dst_port1 = (error1 == 0) ? h1->dst_port : 0; + sparse_vec_index2 (rt->next_by_dst_port, dst_port0, dst_port1, + &i0, &i1); + next0 = (error0 == 0) ? vec_elt (rt->next_by_dst_port, i0) : next0; + next1 = (error1 == 0) ? vec_elt (rt->next_by_dst_port, i1) : next1; + + if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX)) + { + // move the pointer back so icmp-error can find the + // ip packet header + vlib_buffer_advance (b0, -(word) advance0); + + if (PREDICT_FALSE (punt_unknown)) + { + b0->error = node->errors[UDP_ERROR_PUNT]; + next0 = UDP_INPUT_NEXT_PUNT; + } + else if (is_ip4) + { + icmp4_error_set_vnet_buffer (b0, + ICMP4_destination_unreachable, + ICMP4_destination_unreachable_port_unreachable, + 0); + next0 = UDP_INPUT_NEXT_ICMP4_ERROR; + n_no_listener++; + } + else + { + icmp6_error_set_vnet_buffer (b0, + ICMP6_destination_unreachable, + ICMP6_destination_unreachable_port_unreachable, + 0); + next0 = UDP_INPUT_NEXT_ICMP6_ERROR; + n_no_listener++; + } + } + else + { + b0->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b0, sizeof (*h0)); + } + + if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX)) + { + // move the pointer back so icmp-error can find the + // ip packet header + vlib_buffer_advance (b1, -(word) advance1); + + if (PREDICT_FALSE (punt_unknown)) + { + b1->error = node->errors[UDP_ERROR_PUNT]; + next1 = UDP_INPUT_NEXT_PUNT; + } + else if (is_ip4) + { + icmp4_error_set_vnet_buffer (b1, + ICMP4_destination_unreachable, + ICMP4_destination_unreachable_port_unreachable, + 0); + next1 = UDP_INPUT_NEXT_ICMP4_ERROR; + n_no_listener++; + } + else + { + icmp6_error_set_vnet_buffer (b1, + ICMP6_destination_unreachable, + ICMP6_destination_unreachable_port_unreachable, + 0); + next1 = UDP_INPUT_NEXT_ICMP6_ERROR; + n_no_listener++; + } + } + else + { + b1->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b1, sizeof (*h1)); + } + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) + { + tr->src_port = h0 ? h0->src_port : 0; + tr->dst_port = h0 ? h0->dst_port : 0; + tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR && + next0 != UDP_INPUT_NEXT_ICMP6_ERROR); + } + } + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_rx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + if (b1->error != node->errors[UDP_ERROR_LENGTH_ERROR]) + { + tr->src_port = h1 ? h1->src_port : 0; + tr->dst_port = h1 ? h1->dst_port : 0; + tr->bound = (next1 != UDP_INPUT_NEXT_ICMP4_ERROR && + next1 != UDP_INPUT_NEXT_ICMP6_ERROR); + } + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + udp_header_t *h0 = 0; + u32 i0, next0; + u32 advance0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* ip4/6_local hands us the ip header, not the udp header */ + if (is_ip4) + advance0 = sizeof (ip4_header_t); + else + advance0 = sizeof (ip6_header_t); + + if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0))) + { + b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; + next0 = UDP_INPUT_NEXT_DROP; + goto trace_x1; + } + + vlib_buffer_advance (b0, advance0); + + h0 = vlib_buffer_get_current (b0); + + if (PREDICT_TRUE (clib_net_to_host_u16 (h0->length) <= + vlib_buffer_length_in_chain (vm, b0))) + { + i0 = sparse_vec_index (rt->next_by_dst_port, h0->dst_port); + next0 = vec_elt (rt->next_by_dst_port, i0); + + if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX)) + { + // move the pointer back so icmp-error can find the + // ip packet header + vlib_buffer_advance (b0, -(word) advance0); + + if (PREDICT_FALSE (punt_unknown)) + { + b0->error = node->errors[UDP_ERROR_PUNT]; + next0 = UDP_INPUT_NEXT_PUNT; + } + else if (is_ip4) + { + icmp4_error_set_vnet_buffer (b0, + ICMP4_destination_unreachable, + ICMP4_destination_unreachable_port_unreachable, + 0); + next0 = UDP_INPUT_NEXT_ICMP4_ERROR; + n_no_listener++; + } + else + { + icmp6_error_set_vnet_buffer (b0, + ICMP6_destination_unreachable, + ICMP6_destination_unreachable_port_unreachable, + 0); + next0 = UDP_INPUT_NEXT_ICMP6_ERROR; + n_no_listener++; + } + } + else + { + b0->error = node->errors[UDP_ERROR_NONE]; + // advance to the payload + vlib_buffer_advance (b0, sizeof (*h0)); + } + } + else + { + b0->error = node->errors[UDP_ERROR_LENGTH_ERROR]; + next0 = UDP_INPUT_NEXT_DROP; + } + + trace_x1: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + udp_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR]) + { + tr->src_port = h0->src_port; + tr->dst_port = h0->dst_port; + tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR && + next0 != UDP_INPUT_NEXT_ICMP6_ERROR); + } + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_error_count (vm, node->node_index, UDP_ERROR_NO_LISTENER, + n_no_listener); + return from_frame->n_vectors; +} + +static char *udp_error_strings[] = { +#define udp_error(n,s) s, +#include "udp_error.def" +#undef udp_error +}; + +static uword +udp4_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return udp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +udp6_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return udp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (udp4_input_node) = { + .function = udp4_input, + .name = "ip4-udp-lookup", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof (udp_input_runtime_t), + + .n_errors = UDP_N_ERROR, + .error_strings = udp_error_strings, + + .n_next_nodes = UDP_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [UDP_INPUT_NEXT_##s] = n, + foreach_udp_input_next +#undef _ + }, + + .format_buffer = format_udp_header, + .format_trace = format_udp_rx_trace, + .unformat_buffer = unformat_udp_header, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (udp4_input_node, udp4_input); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (udp6_input_node) = { + .function = udp6_input, + .name = "ip6-udp-lookup", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof (udp_input_runtime_t), + + .n_errors = UDP_N_ERROR, + .error_strings = udp_error_strings, + + .n_next_nodes = UDP_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [UDP_INPUT_NEXT_##s] = n, + foreach_udp_input_next +#undef _ + }, + + .format_buffer = format_udp_header, + .format_trace = format_udp_rx_trace, + .unformat_buffer = unformat_udp_header, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (udp6_input_node, udp6_input); + +static void +add_dst_port (udp_main_t * um, + udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4) +{ + udp_dst_port_info_t *pi; + u32 i; + + vec_add2 (um->dst_port_infos[is_ip4], pi, 1); + i = pi - um->dst_port_infos[is_ip4]; + + pi->name = dst_port_name; + pi->dst_port = dst_port; + pi->next_index = pi->node_index = ~0; + + hash_set (um->dst_port_info_by_dst_port[is_ip4], dst_port, i); + + if (pi->name) + hash_set_mem (um->dst_port_info_by_name[is_ip4], pi->name, i); +} + +void +udp_register_dst_port (vlib_main_t * vm, + udp_dst_port_t dst_port, u32 node_index, u8 is_ip4) +{ + udp_main_t *um = &udp_main; + udp_dst_port_info_t *pi; + udp_input_runtime_t *rt; + u16 *n; + + { + clib_error_t *error = vlib_call_init_function (vm, udp_local_init); + if (error) + clib_error_report (error); + } + + pi = udp_get_dst_port_info (um, dst_port, is_ip4); + if (!pi) + { + add_dst_port (um, dst_port, 0, is_ip4); + pi = udp_get_dst_port_info (um, dst_port, is_ip4); + ASSERT (pi); + } + + pi->node_index = node_index; + pi->next_index = vlib_node_add_next (vm, + is_ip4 ? udp4_input_node.index + : udp6_input_node.index, node_index); + + /* Setup udp protocol -> next index sparse vector mapping. */ + rt = vlib_node_get_runtime_data + (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); + n = sparse_vec_validate (rt->next_by_dst_port, + clib_host_to_net_u16 (dst_port)); + n[0] = pi->next_index; +} + +void +udp_unregister_dst_port (vlib_main_t * vm, udp_dst_port_t dst_port, u8 is_ip4) +{ + udp_main_t *um = &udp_main; + udp_dst_port_info_t *pi; + udp_input_runtime_t *rt; + u16 *n; + + pi = udp_get_dst_port_info (um, dst_port, is_ip4); + /* Not registered? Fagedaboudit */ + if (!pi) + return; + + /* Kill the mapping. Don't bother killing the pi, it may be back. */ + rt = vlib_node_get_runtime_data + (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); + n = sparse_vec_validate (rt->next_by_dst_port, + clib_host_to_net_u16 (dst_port)); + n[0] = SPARSE_VEC_INVALID_INDEX; +} + +void +udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add) +{ + udp_input_runtime_t *rt; + + { + clib_error_t *error = vlib_call_init_function (vm, udp_local_init); + if (error) + clib_error_report (error); + } + + rt = vlib_node_get_runtime_data + (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index); + + rt->punt_unknown = is_add; +} + +/* Parse a UDP header. */ +uword +unformat_udp_header (unformat_input_t * input, va_list * args) +{ + u8 **result = va_arg (*args, u8 **); + udp_header_t *udp; + __attribute__ ((unused)) int old_length; + u16 src_port, dst_port; + + /* Allocate space for IP header. */ + { + void *p; + + old_length = vec_len (*result); + vec_add2 (*result, p, sizeof (ip4_header_t)); + udp = p; + } + + memset (udp, 0, sizeof (udp[0])); + if (unformat (input, "src-port %d dst-port %d", &src_port, &dst_port)) + { + udp->src_port = clib_host_to_net_u16 (src_port); + udp->dst_port = clib_host_to_net_u16 (dst_port); + return 1; + } + return 0; +} + +static void +udp_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t *n = vlib_get_node (vm, node_index); + pg_node_t *pn = pg_get_node (node_index); + + n->format_buffer = format_udp_header; + n->unformat_buffer = unformat_udp_header; + pn->unformat_edit = unformat_pg_udp_header; +} + +clib_error_t * +udp_local_init (vlib_main_t * vm) +{ + udp_input_runtime_t *rt; + udp_main_t *um = &udp_main; + int i; + + { + clib_error_t *error; + error = vlib_call_init_function (vm, udp_init); + if (error) + clib_error_report (error); + } + + + for (i = 0; i < 2; i++) + { + um->dst_port_info_by_name[i] = hash_create_string (0, sizeof (uword)); + um->dst_port_info_by_dst_port[i] = hash_create (0, sizeof (uword)); + } + + udp_setup_node (vm, udp4_input_node.index); + udp_setup_node (vm, udp6_input_node.index); + + rt = vlib_node_get_runtime_data (vm, udp4_input_node.index); + + rt->next_by_dst_port = sparse_vec_new + ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]), + /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); + + rt->punt_unknown = 0; + +#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */); + foreach_udp4_dst_port +#undef _ + rt = vlib_node_get_runtime_data (vm, udp6_input_node.index); + + rt->next_by_dst_port = sparse_vec_new + ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]), + /* bits in index */ BITS (((udp_header_t *) 0)->dst_port)); + + rt->punt_unknown = 0; + +#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */); + foreach_udp6_dst_port +#undef _ + ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index); + /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */ + return 0; +} + +VLIB_INIT_FUNCTION (udp_local_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp_packet.h b/src/vnet/udp/udp_packet.h new file mode 100644 index 00000000..beea3059 --- /dev/null +++ b/src/vnet/udp/udp_packet.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip4/udp_packet.h: UDP packet format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_udp_packet_h +#define included_udp_packet_h + +typedef struct +{ + /* Source and destination port. */ + u16 src_port, dst_port; + + /* Length of UDP header plus payload. */ + u16 length; + + /* Checksum of UDP pseudo-header and data or + zero if checksum is disabled. */ + u16 checksum; +} udp_header_t; + +#endif /* included_udp_packet_h */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/udp/udp_pg.c b/src/vnet/udp/udp_pg.c new file mode 100644 index 00000000..c9d8d38c --- /dev/null +++ b/src/vnet/udp/udp_pg.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/udp_pg: UDP packet-generator interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include /* for unformat_udp_udp_port */ + +#define UDP_PG_EDIT_LENGTH (1 << 0) +#define UDP_PG_EDIT_CHECKSUM (1 << 1) + +always_inline void +udp_pg_edit_function_inline (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, + u32 * packets, u32 n_packets, u32 flags) +{ + vlib_main_t *vm = vlib_get_main (); + u32 ip_offset, udp_offset; + + udp_offset = g->start_byte_offset; + ip_offset = (g - 1)->start_byte_offset; + + while (n_packets >= 1) + { + vlib_buffer_t *p0; + ip4_header_t *ip0; + udp_header_t *udp0; + u32 udp_len0; + + p0 = vlib_get_buffer (vm, packets[0]); + n_packets -= 1; + packets += 1; + + ip0 = (void *) (p0->data + ip_offset); + udp0 = (void *) (p0->data + udp_offset); + udp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]); + + if (flags & UDP_PG_EDIT_LENGTH) + udp0->length = + clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, p0) + - ip_offset); + + /* Initialize checksum with header. */ + if (flags & UDP_PG_EDIT_CHECKSUM) + { + ip_csum_t sum0; + + sum0 = clib_mem_unaligned (&ip0->src_address, u64); + + sum0 = ip_csum_with_carry + (sum0, clib_host_to_net_u32 (udp_len0 + (ip0->protocol << 16))); + + /* Invalidate possibly old checksum. */ + udp0->checksum = 0; + + sum0 = + ip_incremental_checksum_buffer (vm, p0, udp_offset, udp_len0, + sum0); + + sum0 = ~ip_csum_fold (sum0); + + /* Zero checksum means checksumming disabled. */ + sum0 = sum0 != 0 ? sum0 : 0xffff; + + udp0->checksum = sum0; + } + } +} + +static void +udp_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, u32 * packets, u32 n_packets) +{ + switch (g->edit_function_opaque) + { + case UDP_PG_EDIT_LENGTH: + udp_pg_edit_function_inline (pg, s, g, packets, n_packets, + UDP_PG_EDIT_LENGTH); + break; + + case UDP_PG_EDIT_CHECKSUM: + udp_pg_edit_function_inline (pg, s, g, packets, n_packets, + UDP_PG_EDIT_CHECKSUM); + break; + + case UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH: + udp_pg_edit_function_inline (pg, s, g, packets, n_packets, + UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH); + break; + + default: + ASSERT (0); + break; + } +} + +typedef struct +{ + pg_edit_t src_port, dst_port; + pg_edit_t length; + pg_edit_t checksum; +} pg_udp_header_t; + +static inline void +pg_udp_header_init (pg_udp_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, udp_header_t, f); + _(src_port); + _(dst_port); + _(length); + _(checksum); +#undef _ +} + +uword +unformat_pg_udp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t *s = va_arg (*args, pg_stream_t *); + pg_udp_header_t *p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (udp_header_t), + &group_index); + pg_udp_header_init (p); + + /* Defaults. */ + p->checksum.type = PG_EDIT_UNSPECIFIED; + p->length.type = PG_EDIT_UNSPECIFIED; + + if (!unformat (input, "UDP: %U -> %U", + unformat_pg_edit, + unformat_tcp_udp_port, &p->src_port, + unformat_pg_edit, unformat_tcp_udp_port, &p->dst_port)) + goto error; + + /* Parse options. */ + while (1) + { + if (unformat (input, "length %U", + unformat_pg_edit, unformat_pg_number, &p->length)) + ; + + else if (unformat (input, "checksum %U", + unformat_pg_edit, unformat_pg_number, &p->checksum)) + ; + + /* Can't parse input: try next protocol level. */ + else + break; + } + + { + ip_main_t *im = &ip_main; + u16 dst_port; + tcp_udp_port_info_t *pi; + + pi = 0; + if (p->dst_port.type == PG_EDIT_FIXED) + { + dst_port = pg_edit_get_value (&p->dst_port, PG_EDIT_LO); + pi = ip_get_tcp_udp_port_info (im, dst_port); + } + + if (pi && pi->unformat_pg_edit + && unformat_user (input, pi->unformat_pg_edit, s)) + ; + + else if (!unformat_user (input, unformat_pg_payload, s)) + goto error; + + p = pg_get_edit_group (s, group_index); + if (p->checksum.type == PG_EDIT_UNSPECIFIED + || p->length.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t *g = pg_stream_get_group (s, group_index); + g->edit_function = udp_pg_edit_function; + g->edit_function_opaque = 0; + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + g->edit_function_opaque |= UDP_PG_EDIT_CHECKSUM; + if (p->length.type == PG_EDIT_UNSPECIFIED) + g->edit_function_opaque |= UDP_PG_EDIT_LENGTH; + } + + return 1; + } + +error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index 142acedc..c4075db6 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/vnet/vxlan-gpe/vxlan_gpe.h index 1b4bc44e..e768d230 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.h +++ b/src/vnet/vxlan-gpe/vxlan_gpe.h @@ -29,7 +29,7 @@ #include #include #include -#include +#include /** * @brief VXLAN GPE header struct diff --git a/src/vnet/vxlan/vxlan.h b/src/vnet/vxlan/vxlan.h index adfa3a8e..dca1cd12 100644 --- a/src/vnet/vxlan/vxlan.h +++ b/src/vnet/vxlan/vxlan.h @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index 24f48293..2d6e4f37 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -38,6 +38,7 @@ * IPSEC-GRE APIs: see .../src/vnet/ipsec-gre/{ipsec_gre.api, ipsec_gre_api.c} * LISP APIs: see .../src/vnet/lisp/{lisp.api, lisp_api.c} * LISP-GPE APIs: see .../src/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c} + * SESSION APIs: .../vnet/session/{session.api session_api.c} * MPLS APIs: see .../src/vnet/mpls/{mpls.api, mpls_api.c} * SR APIs: see .../src/vnet/sr/{sr.api, sr_api.c} * DPDK APIs: see ... /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c} diff --git a/src/vppinfra.am b/src/vppinfra.am index 8d375958..4b9f0c29 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -157,7 +157,9 @@ nobase_include_HEADERS = \ vppinfra/asm_mips.h \ vppinfra/asm_x86.h \ vppinfra/bihash_8_8.h \ + vppinfra/bihash_16_8.h \ vppinfra/bihash_24_8.h \ + vppinfra/bihash_48_8.h \ vppinfra/bihash_template.h \ vppinfra/bihash_template.c \ vppinfra/bitmap.h \ @@ -206,6 +208,7 @@ nobase_include_HEADERS = \ vppinfra/timer.h \ vppinfra/tw_timer_2t_1w_2048sl.h \ vppinfra/tw_timer_16t_2w_512sl.h \ + vppinfra/tw_timer_16t_1w_2048sl.h \ vppinfra/tw_timer_template.h \ vppinfra/tw_timer_template.c \ vppinfra/types.h \ @@ -261,6 +264,8 @@ CLIB_CORE = \ vppinfra/tw_timer_2t_1w_2048sl.c \ vppinfra/tw_timer_16t_2w_512sl.h \ vppinfra/tw_timer_16t_2w_512sl.c \ + vppinfra/tw_timer_16t_1w_2048sl.h \ + vppinfra/tw_timer_16t_1w_2048sl.c \ vppinfra/unformat.c \ vppinfra/vec.c \ vppinfra/vector.c \ diff --git a/src/vppinfra/bihash_16_8.h b/src/vppinfra/bihash_16_8.h new file mode 100644 index 00000000..ce80f70e --- /dev/null +++ b/src/vppinfra/bihash_16_8.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#undef BIHASH_TYPE + +#define BIHASH_TYPE _16_8 +#define BIHASH_KVP_PER_PAGE 4 + +#ifndef __included_bihash_16_8_h__ +#define __included_bihash_16_8_h__ + +#include +#include +#include +#include + +typedef struct +{ + u64 key[2]; + u64 value; +} clib_bihash_kv_16_8_t; + +static inline int +clib_bihash_is_free_16_8 (clib_bihash_kv_16_8_t * v) +{ + /* Free values are memset to 0xff, check a bit... */ + if (v->key[0] == ~0ULL && v->value == ~0ULL) + return 1; + return 0; +} + +#if __SSE4_2__ +#ifndef __defined_crc_u32__ +#define __defined_crc_u32__ +static inline u32 +crc_u32 (u32 data, u32 value) +{ + __asm__ volatile ("crc32l %[data], %[value];":[value] "+r" (value):[data] + "rm" (data)); + return value; +} +#endif /* __defined_crc_u32__ */ + +static inline u64 +clib_bihash_hash_16_8 (clib_bihash_kv_16_8_t * v) +{ + u32 *dp = (u32 *) & v->key[0]; + u32 value = 0; + + value = crc_u32 (dp[0], value); + value = crc_u32 (dp[1], value); + value = crc_u32 (dp[2], value); + value = crc_u32 (dp[3], value); + + return value; +} +#else +static inline u64 +clib_bihash_hash_16_8 (clib_bihash_kv_16_8_t * v) +{ + u64 tmp = v->key[0] ^ v->key[1]; + return clib_xxhash (tmp); +} +#endif + +static inline u8 * +format_bihash_kvp_16_8 (u8 * s, va_list * args) +{ + clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *); + + s = format (s, "key %llu %llu value %llu", v->key[0], v->key[1], v->value); + return s; +} + +static inline int +clib_bihash_key_compare_16_8 (u64 * a, u64 * b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1])) == 0; +} + +#undef __included_bihash_template_h__ +#include + +#endif /* __included_bihash_16_8_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/bihash_48_8.h b/src/vppinfra/bihash_48_8.h new file mode 100644 index 00000000..1a6e7691 --- /dev/null +++ b/src/vppinfra/bihash_48_8.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#undef BIHASH_TYPE + +#define BIHASH_TYPE _48_8 +#define BIHASH_KVP_PER_PAGE 4 + +#ifndef __included_bihash_48_8_h__ +#define __included_bihash_48_8_h__ + +#include +#include +#include +#include + +typedef struct +{ + u64 key[6]; + u64 value; +} clib_bihash_kv_48_8_t; + +static inline int +clib_bihash_is_free_48_8 (const clib_bihash_kv_48_8_t * v) +{ + /* Free values are memset to 0xff, check a bit... */ + if (v->key[0] == ~0ULL && v->value == ~0ULL) + return 1; + return 0; +} + +#if __SSE4_2__ +#ifndef __defined_crc_u32__ +#define __defined_crc_u32__ +static inline u32 +crc_u32 (u32 data, u32 value) +{ + __asm__ volatile ("crc32l %[data], %[value];":[value] "+r" (value):[data] + "rm" (data)); + return value; +} +#endif /* __defined_crc_u32__ */ + +static inline u64 +clib_bihash_hash_48_8 (const clib_bihash_kv_48_8_t * v) +{ + const u32 *dp = (const u32 *) &v->key[0]; + u32 value = 0; + + value = crc_u32 (dp[0], value); + value = crc_u32 (dp[1], value); + value = crc_u32 (dp[2], value); + value = crc_u32 (dp[3], value); + value = crc_u32 (dp[4], value); + value = crc_u32 (dp[5], value); + value = crc_u32 (dp[6], value); + value = crc_u32 (dp[7], value); + value = crc_u32 (dp[8], value); + value = crc_u32 (dp[9], value); + value = crc_u32 (dp[10], value); + value = crc_u32 (dp[11], value); + + return value; +} +#else +static inline u64 +clib_bihash_hash_48_8 (const clib_bihash_kv_48_8_t * v) +{ + u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2] ^ v->key[3] ^ v->key[4] + ^ v->key[5]; + return clib_xxhash (tmp); +} +#endif + +static inline u8 * +format_bihash_kvp_48_8 (u8 * s, va_list * args) +{ + clib_bihash_kv_48_8_t *v = va_arg (*args, clib_bihash_kv_48_8_t *); + + s = format (s, "key %llu %llu %llu %llu %llu %llu value %llu", v->key[0], + v->key[1], v->key[2], v->key[3], v->key[4], v->key[5], + v->value); + return s; +} + +static inline int +clib_bihash_key_compare_48_8 (const u64 * a, const u64 * b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) + | (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0; +} + +#undef __included_bihash_template_h__ +#include + +#endif /* __included_bihash_48_8_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.c b/src/vppinfra/tw_timer_16t_1w_2048sl.c new file mode 100644 index 00000000..3f342045 --- /dev/null +++ b/src/vppinfra/tw_timer_16t_1w_2048sl.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "tw_timer_16t_1w_2048sl.h" +#include "tw_timer_template.c" + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.h b/src/vppinfra/tw_timer_16t_1w_2048sl.h new file mode 100644 index 00000000..685ac31e --- /dev/null +++ b/src/vppinfra/tw_timer_16t_1w_2048sl.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_tw_timer_16t_2w_512sl_h__ +#define __included_tw_timer_16t_2w_512sl_h__ + +/* ... So that a client app can create multiple wheel geometries */ +#undef TW_TIMER_WHEELS +#undef TW_SLOTS_PER_RING +#undef TW_RING_SHIFT +#undef TW_RING_MASK +#undef TW_TIMERS_PER_OBJECT +#undef LOG2_TW_TIMERS_PER_OBJECT +#undef TW_SUFFIX + +#define TW_TIMER_WHEELS 1 +#define TW_SLOTS_PER_RING 2048 +#define TW_RING_SHIFT 11 +#define TW_RING_MASK (TW_SLOTS_PER_RING -1) +#define TW_TIMERS_PER_OBJECT 16 +#define LOG2_TW_TIMERS_PER_OBJECT 4 +#define TW_SUFFIX _16t_1w_2048sl + +#include + +#endif /* __included_tw_timer_16t_2w_512sl_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 738844871220f853629504f61c248f0c9402dc77 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Thu, 23 Feb 2017 09:26:30 +0100 Subject: BFD: command line interface Implement command line interface to the BFD binary APIs. Add corresponding unit tests. Change-Id: Ia0542d0bc4c8d78e6f7b777a08fd94ebfe4d524f Signed-off-by: Klement Sekera --- src/vnet.am | 1 + src/vnet/api_errno.h | 1 - src/vnet/bfd/bfd.api | 28 +- src/vnet/bfd/bfd_api.c | 22 +- src/vnet/bfd/bfd_api.h | 1 - src/vnet/bfd/bfd_cli.c | 900 ++++++++++++++++++++++++++++++++++++++++++++ src/vnet/bfd/bfd_main.c | 154 ++++++-- src/vnet/bfd/bfd_main.h | 11 +- src/vnet/bfd/bfd_protocol.c | 6 + src/vnet/bfd/bfd_udp.c | 120 ++---- src/vnet/bfd/bfd_udp.h | 5 + test/bfd.py | 9 +- test/test_bfd.py | 579 +++++++++++++++++++++++++++- test/vpp_object.py | 1 + test/vpp_papi_provider.py | 12 +- 15 files changed, 1659 insertions(+), 191 deletions(-) create mode 100644 src/vnet/bfd/bfd_cli.c (limited to 'src/vnet/bfd') diff --git a/src/vnet.am b/src/vnet.am index 84930f05..d89d516e 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -369,6 +369,7 @@ libvnet_la_SOURCES += \ vnet/bfd/bfd_udp.c \ vnet/bfd/bfd_main.c \ vnet/bfd/bfd_protocol.c \ + vnet/bfd/bfd_cli.c \ vnet/bfd/bfd_api.c API_FILES += vnet/bfd/bfd.api diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 861a5767..a5bcb377 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -51,7 +51,6 @@ _(ADDRESS_LENGTH_MISMATCH, -59, "Address length mismatch") \ _(ADDRESS_NOT_FOUND_FOR_INTERFACE, -60, "Address not found for interface") \ _(ADDRESS_NOT_LINK_LOCAL, -61, "Address not link-local") \ _(IP6_NOT_ENABLED, -62, "ip6 not enabled") \ -_(ADDRESS_MATCHES_INTERFACE_ADDRESS, -63, "Address matches interface address") \ _(IN_PROGRESS, 10, "Operation in progress") \ _(NO_SUCH_NODE, -63, "No such graph node") \ _(NO_SUCH_NODE2, -64, "No such graph node #2") \ diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api index 93bf0fb9..2cdcfad3 100644 --- a/src/vnet/bfd/bfd.api +++ b/src/vnet/bfd/bfd.api @@ -45,7 +45,7 @@ define bfd_udp_del_echo_source u32 context; }; -/** \brief Delete BFD feature response +/** \brief Delete BFD echo source response @param context - sender context, to match reply w/ request @param retval - return code for the request */ @@ -55,32 +55,6 @@ define bfd_udp_del_echo_source_reply i32 retval; }; -/** \brief Get BFD configuration -*/ -define bfd_get_config -{ - u32 client_index; - u32 context; -}; - -/** \brief Get BFD configuration response - @param context - sender context, to match reply w/ request - @param retval - return code for the request - @param slow_timer - slow timer (seconds) - @param min_tx - desired min tx interval - @param min_rx - desired min rx interval - @param detect_mult - desired detection multiplier -*/ -define bfd_get_config_reply -{ - u32 client_index; - u32 context; - u32 slow_timer; - u32 min_tx; - u32 min_rx; - u8 detect_mult; -}; - /** \brief Add UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c index 6632eae4..e64df48e 100644 --- a/src/vnet/bfd/bfd_api.c +++ b/src/vnet/bfd/bfd_api.c @@ -154,6 +154,21 @@ send_bfd_udp_session_details (unix_shared_memory_queue_t * q, u32 context, bfd_udp_key_t *key = &bus->key; mp->sw_if_index = clib_host_to_net_u32 (key->sw_if_index); mp->is_ipv6 = !(ip46_address_is_ip4 (&key->local_addr)); + if ((!bs->auth.is_delayed && bs->auth.curr_key) || + (bs->auth.is_delayed && bs->auth.next_key)) + { + mp->is_authenticated = 1; + } + if (bs->auth.is_delayed && bs->auth.next_key) + { + mp->bfd_key_id = bs->auth.next_bfd_key_id; + mp->conf_key_id = clib_host_to_net_u32 (bs->auth.next_key->conf_key_id); + } + else if (!bs->auth.is_delayed && bs->auth.curr_key) + { + mp->bfd_key_id = bs->auth.curr_bfd_key_id; + mp->conf_key_id = clib_host_to_net_u32 (bs->auth.curr_key->conf_key_id); + } if (mp->is_ipv6) { clib_memcpy (mp->local_addr, &key->local_addr, @@ -289,10 +304,9 @@ vl_api_bfd_udp_auth_activate_t_handler (vl_api_bfd_udp_auth_activate_t * mp) BFD_UDP_API_PARAM_COMMON_CODE; - rv = - bfd_udp_auth_activate (BFD_UDP_API_PARAM_FROM_MP (mp), - clib_net_to_host_u32 (mp->conf_key_id), - mp->bfd_key_id, mp->is_delayed); + rv = bfd_udp_auth_activate (BFD_UDP_API_PARAM_FROM_MP (mp), + clib_net_to_host_u32 (mp->conf_key_id), + mp->bfd_key_id, mp->is_delayed); BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_BFD_UDP_AUTH_ACTIVATE_REPLY); diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h index 63d4a62e..35ad3cda 100644 --- a/src/vnet/bfd/bfd_api.h +++ b/src/vnet/bfd/bfd_api.h @@ -22,7 +22,6 @@ #include #include #include -#include #define foreach_bfd_transport(F) \ F (UDP4, "ip4-rewrite") \ diff --git a/src/vnet/bfd/bfd_cli.c b/src/vnet/bfd/bfd_cli.c new file mode 100644 index 00000000..a3736d98 --- /dev/null +++ b/src/vnet/bfd/bfd_cli.c @@ -0,0 +1,900 @@ +/* + * Copyright (c) 2011-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief BFD CLI implementation + */ + +#include +#include +#include +#include +#include +#include +#include + +static u8 * +format_bfd_session_cli (u8 * s, va_list * args) +{ + bfd_main_t *bm = va_arg (*args, bfd_main_t *); + bfd_session_t *bs = va_arg (*args, bfd_session_t *); + switch (bs->transport) + { + case BFD_TRANSPORT_UDP4: + s = format (s, "%=10u %-32s %20U %20U\n", bs->bs_idx, "IPv4 address", + format_ip4_address, bs->udp.key.local_addr.ip4.as_u8, + format_ip4_address, bs->udp.key.peer_addr.ip4.as_u8); + break; + case BFD_TRANSPORT_UDP6: + s = format (s, "%=10u %-32s %20U %20U\n", bs->bs_idx, "IPv6 address", + format_ip6_address, &bs->udp.key.local_addr.ip6, + format_ip6_address, &bs->udp.key.peer_addr.ip6); + break; + } + s = format (s, "%10s %-32s %20s %20s\n", "", "Session state", + bfd_state_string (bs->local_state), + bfd_state_string (bs->remote_state)); + s = format (s, "%10s %-32s %20s %20s\n", "", "Diagnostic code", + bfd_diag_code_string (bs->local_diag), + bfd_diag_code_string (bs->remote_diag)); + s = format (s, "%10s %-32s %20u %20u\n", "", "Detect multiplier", + bs->local_detect_mult, bs->remote_detect_mult); + s = format (s, "%10s %-32s %20u %20u\n", "", + "Required Min Rx Interval (usec)", + bs->config_required_min_rx_usec, bs->remote_min_rx_usec); + s = format (s, "%10s %-32s %20u %20u\n", "", + "Desired Min Tx Interval (usec)", + bs->config_desired_min_tx_usec, bfd_clocks_to_usec (bm, + bs->remote_desired_min_tx_clocks)); + s = + format (s, "%10s %-32s %20u\n", "", "Transmit interval", + bfd_clocks_to_usec (bm, bs->transmit_interval_clocks)); + s = + format (s, "%10s %-32s %20s %20s\n", "", "Demand mode", "no", + bs->remote_demand ? "yes" : "no"); + s = + format (s, "%10s %-32s %20s\n", "", "Poll state", + bfd_poll_state_string (bs->poll_state)); + if (bs->auth.curr_key) + { + s = format (s, "%10s %-32s %20u\n", "", "Authentication config key ID", + bs->auth.curr_key->conf_key_id); + s = format (s, "%10s %-32s %20u\n", "", "Authentication BFD key ID", + bs->auth.curr_bfd_key_id); + } + return s; +} + +static clib_error_t * +show_bfd (vlib_main_t * vm, unformat_input_t * input, + CLIB_UNUSED (vlib_cli_command_t * lmd)) +{ + bfd_main_t *bm = &bfd_main; + bfd_session_t *bs = NULL; + + if (unformat (input, "keys")) + { + bfd_auth_key_t *key = NULL; + u8 *s = format (NULL, "%=10s %=25s %=10s\n", "Configuration Key ID", + "Type", "Use Count"); + /* *INDENT-OFF* */ + pool_foreach (key, bm->auth_keys, { + s = format (s, "%10u %-25s %10u\n", key->conf_key_id, + bfd_auth_type_str (key->auth_type), key->use_count); + }); + /* *INDENT-ON* */ + vlib_cli_output (vm, "%v\n", s); + vlib_cli_output (vm, "Number of configured BFD keys: %lu\n", + (u64) pool_elts (bm->auth_keys)); + } + else if (unformat (input, "sessions")) + { + u8 *s = format (NULL, "%=10s %=32s %=20s %=20s\n", "Index", "Property", + "Local value", "Remote value"); + /* *INDENT-OFF* */ + pool_foreach (bs, bm->sessions, + { s = format (s, "%U", format_bfd_session_cli, bm, bs); }); + /* *INDENT-ON* */ + vlib_cli_output (vm, "%v", s); + vec_free (s); + vlib_cli_output (vm, "Number of configured BFD sessions: %lu\n", + (u64) pool_elts (bm->sessions)); + } + else if (unformat (input, "echo-source")) + { + int is_set; + u32 sw_if_index; + int have_usable_ip4; + ip4_address_t ip4; + int have_usable_ip6; + ip6_address_t ip6; + bfd_udp_get_echo_source (&is_set, &sw_if_index, &have_usable_ip4, &ip4, + &have_usable_ip6, &ip6); + if (is_set) + { + vnet_sw_interface_t *sw_if = + vnet_get_sw_interface_safe (&vnet_main, sw_if_index); + vnet_hw_interface_t *hw_if = + vnet_get_hw_interface (&vnet_main, sw_if->hw_if_index); + u8 *s = format (NULL, "UDP echo source is: %v\n", hw_if->name); + s = format (s, "IPv4 address usable as echo source: "); + if (have_usable_ip4) + { + s = format (s, "%U\n", format_ip4_address, &ip4); + } + else + { + s = format (s, "none\n"); + } + s = format (s, "IPv6 address usable as echo source: "); + if (have_usable_ip6) + { + s = format (s, "%U\n", format_ip6_address, &ip6); + } + else + { + s = format (s, "none\n"); + } + vlib_cli_output (vm, "%v", s); + vec_free (s); + } + else + { + vlib_cli_output (vm, "UDP echo source is not set.\n"); + } + } + else + { + vlib_cli_output (vm, "Number of configured BFD sessions: %lu\n", + (u64) pool_elts (bm->sessions)); + vlib_cli_output (vm, "Number of configured BFD keys: %lu\n", + (u64) pool_elts (bm->auth_keys)); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_bfd_command, static) = { + .path = "show bfd", + .short_help = "show bfd [keys|sessions|echo-source]", + .function = show_bfd, +}; +/* *INDENT-ON* */ + +static u8 * +format_vnet_api_errno (u8 * s, va_list * args) +{ + vnet_api_error_t api_error = va_arg (*args, vnet_api_error_t); +#define _(a, b, c) \ + case b: \ + s = format (s, "%s", c); \ + break; + switch (api_error) + { + foreach_vnet_api_error default:s = format (s, "UNKNOWN"); + break; + } + return s; +} + +static clib_error_t * +bfd_cli_key_add (vlib_main_t * vm, unformat_input_t * input, + CLIB_UNUSED (vlib_cli_command_t * lmd)) +{ + clib_error_t *ret = NULL; + int have_key_id = 0; + u32 key_id = 0; + u8 *vec_auth_type = NULL; + bfd_auth_type_e auth_type = BFD_AUTH_TYPE_reserved; + u8 *secret = NULL; + static const u8 keyed_sha1[] = "keyed-sha1"; + static const u8 meticulous_keyed_sha1[] = "meticulous-keyed-sha1"; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "conf-key-id %u", &key_id)) + { + have_key_id = 1; + } + else if (unformat (input, "type %U", unformat_token, "a-zA-Z0-9-", + &vec_auth_type)) + { + if (vec_len (vec_auth_type) == sizeof (keyed_sha1) - 1 && + 0 == memcmp (vec_auth_type, keyed_sha1, + sizeof (keyed_sha1) - 1)) + { + auth_type = BFD_AUTH_TYPE_keyed_sha1; + } + else if (vec_len (vec_auth_type) == + sizeof (meticulous_keyed_sha1) - 1 && + 0 == memcmp (vec_auth_type, meticulous_keyed_sha1, + sizeof (meticulous_keyed_sha1) - 1)) + { + auth_type = BFD_AUTH_TYPE_meticulous_keyed_sha1; + } + else + { + ret = clib_error_return (0, "invalid type `%v'", vec_auth_type); + goto out; + } + } + else if (unformat (input, "secret %U", unformat_hex_string, &secret)) + { + /* nothing to do here */ + } + else + { + ret = clib_error_return (0, "Unknown input `%U'", + format_unformat_error, input); + goto out; + } + } + + if (!have_key_id) + { + ret = + clib_error_return (0, "required parameter missing: `conf-key-id'"); + goto out; + } + if (!vec_auth_type) + { + ret = clib_error_return (0, "required parameter missing: `type'"); + goto out; + } + if (!secret) + { + ret = clib_error_return (0, "required parameter missing: `secret'"); + goto out; + } + + vnet_api_error_t rv = + bfd_auth_set_key (key_id, auth_type, vec_len (secret), secret); + if (rv) + { + ret = + clib_error_return (0, "`bfd_auth_set_key' API call failed, rv=%d:%U", + (int) rv, format_vnet_api_errno, rv); + } + +out: + vec_free (vec_auth_type); + return ret; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bfd_cli_key_add_command, static) = { + .path = "bfd key set", + .short_help = "bfd key set" + " conf-key-id " + " type " + " secret ", + .function = bfd_cli_key_add, +}; +/* *INDENT-ON* */ + +static clib_error_t * +bfd_cli_key_del (vlib_main_t * vm, unformat_input_t * input, + CLIB_UNUSED (vlib_cli_command_t * lmd)) +{ + clib_error_t *ret = NULL; + u32 key_id = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (!unformat (input, "conf-key-id %u", &key_id)) + { + ret = clib_error_return (0, "Unknown input `%U'", + format_unformat_error, input); + goto out; + } + } + + vnet_api_error_t rv = bfd_auth_del_key (key_id); + if (rv) + { + ret = + clib_error_return (0, "`bfd_auth_del_key' API call failed, rv=%d:%U", + (int) rv, format_vnet_api_errno, rv); + } + +out: + return ret; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bfd_cli_key_del_command, static) = { + .path = "bfd key del", + .short_help = "bfd key del conf-key-id ", + .function = bfd_cli_key_del, +}; +/* *INDENT-ON* */ + +#define INTERFACE_STR "interface" +#define LOCAL_ADDR_STR "local-addr" +#define PEER_ADDR_STR "peer-addr" +#define CONF_KEY_ID_STR "conf-key-id" +#define BFD_KEY_ID_STR "bfd-key-id" +#define DESIRED_MIN_TX_STR "desired-min-tx" +#define REQUIRED_MIN_RX_STR "required-min-rx" +#define DETECT_MULT_STR "detect-mult" +#define ADMIN_STR "admin" +#define DELAYED_STR "delayed" + +static const unsigned mandatory = 1; +static const unsigned optional = 0; + +#define DECLARE(t, n, s, r, ...) \ + int have_##n = 0; \ + t n; + +#define UNFORMAT(t, n, s, r, ...) \ + if (unformat (input, s " " __VA_ARGS__, &n)) \ + { \ + something_parsed = 1; \ + have_##n = 1; \ + } + +#define CHECK_MANDATORY(t, n, s, r, ...) \ + if (mandatory == r && !have_##n) \ + { \ + ret = clib_error_return (0, "Required parameter `%s' missing.", n); \ + goto out; \ + } + +static clib_error_t * +bfd_cli_udp_session_add (vlib_main_t * vm, unformat_input_t * input, + CLIB_UNUSED (vlib_cli_command_t * lmd)) +{ + clib_error_t *ret = NULL; +#define foreach_bfd_cli_udp_session_add_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) \ + F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ + unformat_ip46_address) \ + F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ + unformat_ip46_address) \ + F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \ + F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \ + F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u") \ + F (u32, conf_key_id, CONF_KEY_ID_STR, optional, "%u") \ + F (u32, bfd_key_id, BFD_KEY_ID_STR, optional, "%u") + + foreach_bfd_cli_udp_session_add_cli_param (DECLARE); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + int something_parsed = 0; + foreach_bfd_cli_udp_session_add_cli_param (UNFORMAT); + + if (!something_parsed) + { + ret = clib_error_return (0, "Unknown input `%U'", + format_unformat_error, input); + goto out; + } + } + + foreach_bfd_cli_udp_session_add_cli_param (CHECK_MANDATORY); + + if (1 == have_conf_key_id + have_bfd_key_id) + { + ret = clib_error_return (0, "Incompatible parameter combination, `%s' " + "and `%s' must be either both specified or none", + CONF_KEY_ID_STR, BFD_KEY_ID_STR); + goto out; + } + + if (detect_mult > 255) + { + ret = clib_error_return (0, "%s value `%u' out of range <1,255>", + DETECT_MULT_STR, detect_mult); + goto out; + } + + if (have_bfd_key_id && bfd_key_id > 255) + { + ret = clib_error_return (0, "%s value `%u' out of range <1,255>", + BFD_KEY_ID_STR, bfd_key_id); + goto out; + } + + vnet_api_error_t rv = + bfd_udp_add_session (sw_if_index, &local_addr, &peer_addr, desired_min_tx, + required_min_rx, + detect_mult, have_conf_key_id, conf_key_id, + bfd_key_id); + if (rv) + { + ret = + clib_error_return (0, + "`bfd_add_add_session' API call failed, rv=%d:%U", + (int) rv, format_vnet_api_errno, rv); + goto out; + } + +out: + return ret; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bfd_cli_udp_session_add_command, static) = { + .path = "bfd udp session add", + .short_help = "bfd udp session add" + " interface " + " local-addr " + " peer-addr " + " desired-min-tx " + " required-min-rx " + " detect-mult " + "[" + " conf-key-id " + " bfd-key-id " + "]", + .function = bfd_cli_udp_session_add, +}; +/* *INDENT-ON* */ + +static clib_error_t * +bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input, + CLIB_UNUSED (vlib_cli_command_t * lmd)) +{ + clib_error_t *ret = NULL; +#define foreach_bfd_cli_udp_session_mod_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) \ + F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ + unformat_ip46_address) \ + F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ + unformat_ip46_address) \ + F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \ + F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \ + F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u") + + foreach_bfd_cli_udp_session_mod_cli_param (DECLARE); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + int something_parsed = 0; + foreach_bfd_cli_udp_session_mod_cli_param (UNFORMAT); + + if (!something_parsed) + { + ret = clib_error_return (0, "Unknown input `%U'", + format_unformat_error, input); + goto out; + } + } + + foreach_bfd_cli_udp_session_mod_cli_param (CHECK_MANDATORY); + + if (detect_mult > 255) + { + ret = clib_error_return (0, "%s value `%u' out of range <1,255>", + DETECT_MULT_STR, detect_mult); + goto out; + } + + vnet_api_error_t rv = + bfd_udp_mod_session (sw_if_index, &local_addr, &peer_addr, + desired_min_tx, required_min_rx, detect_mult); + if (rv) + { + ret = + clib_error_return (0, + "`bfd_udp_mod_session' API call failed, rv=%d:%U", + (int) rv, format_vnet_api_errno, rv); + goto out; + } + +out: + return ret; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bfd_cli_udp_session_mod_command, static) = { + .path = "bfd udp session mod", + .short_help = "bfd udp session mod interface" + " local-addr" + " peer-addr" + " desired-min-tx" + " required-min-rx" + " detect-mult" + " ", + .function = bfd_cli_udp_session_mod, +}; +/* *INDENT-ON* */ + +static clib_error_t * +bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input, + CLIB_UNUSED (vlib_cli_command_t * lmd)) +{ + clib_error_t *ret = NULL; +#define foreach_bfd_cli_udp_session_del_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) \ + F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ + unformat_ip46_address) \ + F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ + unformat_ip46_address) + + foreach_bfd_cli_udp_session_del_cli_param (DECLARE); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + int something_parsed = 0; + foreach_bfd_cli_udp_session_del_cli_param (UNFORMAT); + + if (!something_parsed) + { + ret = clib_error_return (0, "Unknown input `%U'", + format_unformat_error, input); + goto out; + } + } + + foreach_bfd_cli_udp_session_del_cli_param (CHECK_MANDATORY); + + vnet_api_error_t rv = + bfd_udp_del_session (sw_if_index, &local_addr, &peer_addr); + if (rv) + { + ret = + clib_error_return (0, + "`bfd_udp_del_session' API call failed, rv=%d:%U", + (int) rv, format_vnet_api_errno, rv); + goto out; + } + +out: + return ret; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bfd_cli_udp_session_del_command, static) = { + .path = "bfd udp session del", + .short_help = "bfd udp session del interface" + " local-addr" + " peer-addr" + " ", + .function = bfd_cli_udp_session_del, +}; +/* *INDENT-ON* */ + +static clib_error_t * +bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input, + CLIB_UNUSED (vlib_cli_command_t * lmd)) +{ + clib_error_t *ret = NULL; +#define foreach_bfd_cli_udp_session_set_flags_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) \ + F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ + unformat_ip46_address) \ + F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ + unformat_ip46_address) \ + F (u8 *, admin_up_down_token, ADMIN_STR, mandatory, "%v", \ + &admin_up_down_token) + + foreach_bfd_cli_udp_session_set_flags_cli_param (DECLARE); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + int something_parsed = 0; + foreach_bfd_cli_udp_session_set_flags_cli_param (UNFORMAT); + + if (!something_parsed) + { + ret = clib_error_return (0, "Unknown input `%U'", + format_unformat_error, input); + goto out; + } + } + + foreach_bfd_cli_udp_session_set_flags_cli_param (CHECK_MANDATORY); + + u8 admin_up_down; + static const char up[] = "up"; + static const char down[] = "down"; + if (!memcmp (admin_up_down_token, up, sizeof (up) - 1)) + { + admin_up_down = 1; + } + else if (!memcmp (admin_up_down_token, down, sizeof (down) - 1)) + { + admin_up_down = 0; + } + else + { + ret = + clib_error_return (0, "Unrecognized value for `%s' parameter: `%v'", + ADMIN_STR, admin_up_down_token); + goto out; + } + vnet_api_error_t rv = bfd_udp_session_set_flags (sw_if_index, &local_addr, + &peer_addr, admin_up_down); + if (rv) + { + ret = + clib_error_return (0, + "`bfd_udp_session_set_flags' API call failed, rv=%d:%U", + (int) rv, format_vnet_api_errno, rv); + goto out; + } + +out: + return ret; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bfd_cli_udp_session_set_flags_command, static) = { + .path = "bfd udp session set-flags", + .short_help = "bfd udp session set-flags" + " interface " + " local-addr " + " peer-addr " + " admin ", + .function = bfd_cli_udp_session_set_flags, +}; +/* *INDENT-ON* */ + +static clib_error_t * +bfd_cli_udp_session_auth_activate (vlib_main_t * vm, unformat_input_t * input, + CLIB_UNUSED (vlib_cli_command_t * lmd)) +{ + clib_error_t *ret = NULL; +#define foreach_bfd_cli_udp_session_auth_activate_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) \ + F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ + unformat_ip46_address) \ + F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ + unformat_ip46_address) \ + F (u8 *, delayed_token, DELAYED_STR, optional, "%v") \ + F (u32, conf_key_id, CONF_KEY_ID_STR, mandatory, "%u") \ + F (u32, bfd_key_id, BFD_KEY_ID_STR, mandatory, "%u") + + foreach_bfd_cli_udp_session_auth_activate_cli_param (DECLARE); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + int something_parsed = 0; + foreach_bfd_cli_udp_session_auth_activate_cli_param (UNFORMAT); + + if (!something_parsed) + { + ret = clib_error_return (0, "Unknown input `%U'", + format_unformat_error, input); + goto out; + } + } + + foreach_bfd_cli_udp_session_auth_activate_cli_param (CHECK_MANDATORY); + + u8 is_delayed = 0; + if (have_delayed_token) + { + static const char yes[] = "yes"; + static const char no[] = "no"; + if (!memcmp (delayed_token, yes, sizeof (yes) - 1)) + { + is_delayed = 1; + } + else if (!memcmp (delayed_token, no, sizeof (no) - 1)) + { + is_delayed = 0; + } + else + { + ret = + clib_error_return (0, + "Unrecognized value for `%s' parameter: `%v'", + DELAYED_STR, delayed_token); + goto out; + } + } + + if (have_bfd_key_id && bfd_key_id > 255) + { + ret = clib_error_return (0, "%s value `%u' out of range <1,255>", + BFD_KEY_ID_STR, bfd_key_id); + goto out; + } + + vnet_api_error_t rv = + bfd_udp_auth_activate (sw_if_index, &local_addr, &peer_addr, conf_key_id, + bfd_key_id, is_delayed); + if (rv) + { + ret = + clib_error_return (0, + "`bfd_udp_auth_activate' API call failed, rv=%d:%U", + (int) rv, format_vnet_api_errno, rv); + goto out; + } + +out: + return ret; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_activate_command, static) = { + .path = "bfd udp session auth activate", + .short_help = "bfd udp session auth activate" + " interface " + " local-addr " + " peer-addr " + " conf-key-id " + " bfd-key-id " + " [ delayed ]", + .function = bfd_cli_udp_session_auth_activate, +}; + +static clib_error_t * +bfd_cli_udp_session_auth_deactivate (vlib_main_t *vm, unformat_input_t *input, + CLIB_UNUSED (vlib_cli_command_t *lmd)) +{ + clib_error_t *ret = NULL; +#define foreach_bfd_cli_udp_session_auth_deactivate_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) \ + F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \ + unformat_ip46_address) \ + F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \ + unformat_ip46_address) \ + F (u8 *, delayed_token, DELAYED_STR, optional, "%v") + + foreach_bfd_cli_udp_session_auth_deactivate_cli_param (DECLARE); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + int something_parsed = 0; + foreach_bfd_cli_udp_session_auth_deactivate_cli_param (UNFORMAT); + + if (!something_parsed) + { + ret = clib_error_return (0, "Unknown input `%U'", + format_unformat_error, input); + goto out; + } + } + + foreach_bfd_cli_udp_session_auth_deactivate_cli_param (CHECK_MANDATORY); + + u8 is_delayed = 0; + if (have_delayed_token) + { + static const char yes[] = "yes"; + static const char no[] = "no"; + if (!memcmp (delayed_token, yes, sizeof (yes) - 1)) + { + is_delayed = 1; + } + else if (!memcmp (delayed_token, no, sizeof (no) - 1)) + { + is_delayed = 0; + } + else + { + ret = clib_error_return ( + 0, "Unrecognized value for `%s' parameter: `%v'", DELAYED_STR, + delayed_token); + goto out; + } + } + + vnet_api_error_t rv = bfd_udp_auth_deactivate (sw_if_index, &local_addr, + &peer_addr, is_delayed); + if (rv) + { + ret = clib_error_return ( + 0, "`bfd_udp_auth_deactivate' API call failed, rv=%d:%U", (int)rv, + format_vnet_api_errno, rv); + goto out; + } + +out: + return ret; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = { + .path = "bfd udp session auth deactivate", + .short_help = "bfd udp session auth deactivate" + " interface " + " local-addr " + " peer-addr " + "[ delayed ]", + .function = bfd_cli_udp_session_auth_deactivate, +}; +/* *INDENT-ON* */ + +static clib_error_t * +bfd_cli_udp_set_echo_source (vlib_main_t * vm, unformat_input_t * input, + CLIB_UNUSED (vlib_cli_command_t * lmd)) +{ + clib_error_t *ret = NULL; +#define foreach_bfd_cli_udp_set_echo_source_cli_param(F) \ + F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \ + unformat_vnet_sw_interface, &vnet_main) + + foreach_bfd_cli_udp_set_echo_source_cli_param (DECLARE); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + int something_parsed = 0; + foreach_bfd_cli_udp_set_echo_source_cli_param (UNFORMAT); + + if (!something_parsed) + { + ret = clib_error_return (0, "Unknown input `%U'", + format_unformat_error, input); + goto out; + } + } + + foreach_bfd_cli_udp_set_echo_source_cli_param (CHECK_MANDATORY); + + vnet_api_error_t rv = bfd_udp_set_echo_source (sw_if_index); + if (rv) + { + ret = + clib_error_return (0, + "`bfd_udp_set_echo_source' API call failed, rv=%d:%U", + (int) rv, format_vnet_api_errno, rv); + goto out; + } + +out: + return ret; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bfd_cli_udp_set_echo_source_cmd, static) = { + .path = "bfd udp echo-source set", + .short_help = "bfd udp echo-source set interface ", + .function = bfd_cli_udp_set_echo_source, +}; +/* *INDENT-ON* */ + +static clib_error_t * +bfd_cli_udp_del_echo_source (vlib_main_t * vm, unformat_input_t * input, + CLIB_UNUSED (vlib_cli_command_t * lmd)) +{ + vnet_api_error_t rv = bfd_udp_del_echo_source (); + if (rv) + { + return clib_error_return (0, + "`bfd_udp_del_echo_source' API call failed, rv=%d:%U", + (int) rv, format_vnet_api_errno, rv); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (bfd_cli_udp_del_echo_source_cmd, static) = { + .path = "bfd udp echo-source del", + .short_help = "bfd udp echo-source del", + .function = bfd_cli_udp_del_echo_source, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 29c40458..01de6375 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -55,7 +55,7 @@ bfd_usec_to_clocks (const bfd_main_t * bm, u64 us) return bm->cpu_cps * ((f64) us / USEC_PER_SECOND); } -static u32 +u32 bfd_clocks_to_usec (const bfd_main_t * bm, u64 clocks) { return (clocks / bm->cpu_cps) * USEC_PER_SECOND; @@ -70,7 +70,7 @@ static u32 bfd_node_index_by_transport[] = { #undef F }; -static u8 * +u8 * format_bfd_auth_key (u8 * s, va_list * args) { const bfd_auth_key_t *key = va_arg (*args, bfd_auth_key_t *); @@ -147,7 +147,7 @@ bfd_set_state (bfd_main_t * bm, bfd_session_t * bs, } } -static const char * +const char * bfd_poll_state_string (bfd_poll_state_e state) { switch (state) @@ -265,12 +265,16 @@ bfd_calc_next_echo_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now) static void bfd_recalc_detection_time (bfd_main_t * bm, bfd_session_t * bs) { - bs->detection_time_clocks = - bs->remote_detect_mult * clib_max (bs->effective_required_min_rx_clocks, - bs->remote_desired_min_tx_clocks); - BFD_DBG ("Recalculated detection time %lu clocks/%.2fs", - bs->detection_time_clocks, - bs->detection_time_clocks / bm->cpu_cps); + if (bs->local_state == BFD_STATE_init || bs->local_state == BFD_STATE_up) + { + bs->detection_time_clocks = + bs->remote_detect_mult * + clib_max (bs->effective_required_min_rx_clocks, + bs->remote_desired_min_tx_clocks); + BFD_DBG ("Recalculated detection time %lu clocks/%.2fs", + bs->detection_time_clocks, + bs->detection_time_clocks / bm->cpu_cps); + } } static void @@ -284,7 +288,8 @@ bfd_set_timer (bfd_main_t * bm, bfd_session_t * bs, u64 now, { rx_timeout = bs->last_rx_clocks + bs->detection_time_clocks; } - if (BFD_STATE_up != bs->local_state || !bs->remote_demand || + if (BFD_STATE_up != bs->local_state || + (!bs->remote_demand && bs->remote_min_rx_usec) || BFD_POLL_NOT_NEEDED != bs->poll_state) { tx_timeout = bs->tx_timeout_clocks; @@ -348,7 +353,7 @@ bfd_set_effective_desired_min_tx (bfd_main_t * bm, static void bfd_set_effective_required_min_rx (bfd_main_t * bm, - bfd_session_t * bs, u64 now, + bfd_session_t * bs, u64 required_min_rx_clocks) { bs->effective_required_min_rx_clocks = required_min_rx_clocks; @@ -393,44 +398,33 @@ void bfd_session_start (bfd_main_t * bm, bfd_session_t * bs) { BFD_DBG ("\nStarting session: %U", format_bfd_session, bs); + bfd_set_effective_required_min_rx (bm, bs, + bs->config_required_min_rx_clocks); bfd_recalc_tx_interval (bm, bs); vlib_process_signal_event (bm->vlib_main, bm->bfd_process_node_index, BFD_EVENT_NEW_SESSION, bs->bs_idx); } -vnet_api_error_t -bfd_del_session (uword bs_idx) -{ - const bfd_main_t *bm = &bfd_main; - if (!pool_is_free_index (bm->sessions, bs_idx)) - { - bfd_session_t *bs = pool_elt_at_index (bm->sessions, bs_idx); - pool_put (bm->sessions, bs); - return 0; - } - else - { - BFD_ERR ("no such session"); - return VNET_API_ERROR_BFD_ENOENT; - } - return 0; -} - void bfd_session_set_flags (bfd_session_t * bs, u8 admin_up_down) { bfd_main_t *bm = &bfd_main; + u64 now = clib_cpu_time_now (); if (admin_up_down) { BFD_DBG ("Session set admin-up, bs-idx=%u", bs->bs_idx); bfd_set_state (bm, bs, BFD_STATE_down, 0); bfd_set_diag (bs, BFD_DIAG_CODE_no_diag); + bfd_calc_next_tx (bm, bs, now); + bfd_set_timer (bm, bs, now, 0); } else { BFD_DBG ("Session set admin-down, bs-idx=%u", bs->bs_idx); bfd_set_diag (bs, BFD_DIAG_CODE_admin_down); bfd_set_state (bm, bs, BFD_STATE_admin_down, 0); + bfd_calc_next_tx (bm, bs, now); + bfd_set_timer (bm, bs, now, 0); } } @@ -515,7 +509,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, clib_max (bs->config_desired_min_tx_clocks, bm->default_desired_min_tx_clocks)); - bfd_set_effective_required_min_rx (bm, bs, now, + bfd_set_effective_required_min_rx (bm, bs, bs->config_required_min_rx_clocks); bfd_set_timer (bm, bs, now, handling_wakeup); break; @@ -525,7 +519,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, clib_max (bs->config_desired_min_tx_clocks, bm->default_desired_min_tx_clocks)); - bfd_set_effective_required_min_rx (bm, bs, now, + bfd_set_effective_required_min_rx (bm, bs, bs->config_required_min_rx_clocks); bfd_set_timer (bm, bs, now, handling_wakeup); break; @@ -540,7 +534,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, bs->config_desired_min_tx_clocks); if (BFD_POLL_NOT_NEEDED == bs->poll_state) { - bfd_set_effective_required_min_rx (bm, bs, now, + bfd_set_effective_required_min_rx (bm, bs, bs->config_required_min_rx_clocks); } bfd_set_timer (bm, bs, now, handling_wakeup); @@ -932,7 +926,7 @@ bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm, bs->echo = 1; bs->echo_last_rx_clocks = now; bs->echo_tx_timeout_clocks = now; - bfd_set_effective_required_min_rx (bm, bs, now, + bfd_set_effective_required_min_rx (bm, bs, clib_max (bm->min_required_min_rx_while_echo_clocks, bs->config_required_min_rx_clocks)); @@ -1569,6 +1563,7 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) bs->remote_discr = pkt->my_disc; bs->remote_state = bfd_pkt_get_state (pkt); bs->remote_demand = bfd_pkt_get_demand (pkt); + bs->remote_diag = bfd_pkt_get_diag_code (pkt); u64 now = clib_cpu_time_now (); bs->last_rx_clocks = now; if (bfd_pkt_get_auth_present (pkt)) @@ -1621,7 +1616,7 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) bfd_set_poll_state (bs, BFD_POLL_NOT_NEEDED); if (BFD_STATE_up == bs->local_state) { - bfd_set_effective_required_min_rx (bm, bs, now, + bfd_set_effective_required_min_rx (bm, bs, clib_max (bs->echo * bm->min_required_min_rx_while_echo_clocks, bs->config_required_min_rx_clocks)); @@ -1914,6 +1909,97 @@ bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, return 0; } +vnet_api_error_t +bfd_auth_set_key (u32 conf_key_id, u8 auth_type, u8 key_len, + const u8 * key_data) +{ +#if WITH_LIBSSL > 0 + bfd_auth_key_t *auth_key = NULL; + if (!key_len || key_len > bfd_max_len_for_auth_type (auth_type)) + { + clib_warning ("Invalid authentication key length for auth_type=%d:%s " + "(key_len=%u, must be " + "non-zero, expected max=%u)", + auth_type, bfd_auth_type_str (auth_type), key_len, + (u32) bfd_max_len_for_auth_type (auth_type)); + return VNET_API_ERROR_INVALID_VALUE; + } + if (!bfd_auth_type_supported (auth_type)) + { + clib_warning ("Unsupported auth type=%d:%s", auth_type, + bfd_auth_type_str (auth_type)); + return VNET_API_ERROR_BFD_NOTSUPP; + } + bfd_main_t *bm = &bfd_main; + uword *key_idx_p = hash_get (bm->auth_key_by_conf_key_id, conf_key_id); + if (key_idx_p) + { + /* modifying existing key - must not be used */ + const uword key_idx = *key_idx_p; + auth_key = pool_elt_at_index (bm->auth_keys, key_idx); + if (auth_key->use_count > 0) + { + clib_warning ("Authentication key with conf ID %u in use by %u BFD " + "session(s) - cannot modify", + conf_key_id, auth_key->use_count); + return VNET_API_ERROR_BFD_EINUSE; + } + } + else + { + /* adding new key */ + pool_get (bm->auth_keys, auth_key); + auth_key->conf_key_id = conf_key_id; + hash_set (bm->auth_key_by_conf_key_id, conf_key_id, + auth_key - bm->auth_keys); + } + auth_key->auth_type = auth_type; + memset (auth_key->key, 0, sizeof (auth_key->key)); + clib_memcpy (auth_key->key, key_data, key_len); + return 0; +#else + clib_warning ("SSL missing, cannot manipulate authentication keys"); + return VNET_API_ERROR_BFD_NOTSUPP; +#endif +} + +vnet_api_error_t +bfd_auth_del_key (u32 conf_key_id) +{ +#if WITH_LIBSSL > 0 + bfd_auth_key_t *auth_key = NULL; + bfd_main_t *bm = &bfd_main; + uword *key_idx_p = hash_get (bm->auth_key_by_conf_key_id, conf_key_id); + if (key_idx_p) + { + /* deleting existing key - must not be used */ + const uword key_idx = *key_idx_p; + auth_key = pool_elt_at_index (bm->auth_keys, key_idx); + if (auth_key->use_count > 0) + { + clib_warning ("Authentication key with conf ID %u in use by %u BFD " + "session(s) - cannot delete", + conf_key_id, auth_key->use_count); + return VNET_API_ERROR_BFD_EINUSE; + } + hash_unset (bm->auth_key_by_conf_key_id, conf_key_id); + memset (auth_key, 0, sizeof (*auth_key)); + pool_put (bm->auth_keys, auth_key); + } + else + { + /* no such key */ + clib_warning ("Authentication key with conf ID %u does not exist", + conf_key_id); + return VNET_API_ERROR_BFD_ENOENT; + } + return 0; +#else + clib_warning ("SSL missing, cannot manipulate authentication keys"); + return VNET_API_ERROR_BFD_NOTSUPP; +#endif +} + bfd_main_t bfd_main; /* diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index d8063f9d..3be3694c 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -74,11 +74,14 @@ typedef struct bfd_session_s /* session state */ bfd_state_e local_state; + /* remote session state */ + bfd_state_e remote_state; + /* local diagnostics */ bfd_diag_code_e local_diag; - /* remote session state */ - bfd_state_e remote_state; + /* remote diagnostics */ + bfd_diag_code_e remote_diag; /* local discriminator */ u32 local_discr; @@ -315,6 +318,7 @@ void bfd_event (bfd_main_t * bm, bfd_session_t * bs); void bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b, bfd_main_t * bm, bfd_session_t * bs); u8 *format_bfd_session (u8 * s, va_list * args); +u8 *format_bfd_auth_key (u8 * s, va_list * args); void bfd_session_set_flags (bfd_session_t * bs, u8 admin_up_down); unsigned bfd_auth_type_supported (bfd_auth_type_e auth_type); vnet_api_error_t bfd_auth_activate (bfd_session_t * bs, u32 conf_key_id, @@ -325,6 +329,9 @@ vnet_api_error_t bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, u32 required_min_rx_usec, u8 detect_mult); +u32 bfd_clocks_to_usec (const bfd_main_t * bm, u64 clocks); +const char *bfd_poll_state_string (bfd_poll_state_e state); + #define USEC_PER_MS 1000LL #define USEC_PER_SECOND (1000 * USEC_PER_MS) diff --git a/src/vnet/bfd/bfd_protocol.c b/src/vnet/bfd/bfd_protocol.c index 92b226bd..5deb9702 100644 --- a/src/vnet/bfd/bfd_protocol.c +++ b/src/vnet/bfd/bfd_protocol.c @@ -82,11 +82,13 @@ bfd_pkt_get_control_plane_independent (const bfd_pkt_t * pkt) return (pkt->head.sta_flags >> 3) & 1; } +#if 0 void bfd_pkt_set_control_plane_independent (bfd_pkt_t * pkt) { pkt->head.sta_flags |= 1 << 3; } +#endif u8 bfd_pkt_get_auth_present (const bfd_pkt_t * pkt) @@ -106,11 +108,13 @@ bfd_pkt_get_demand (const bfd_pkt_t * pkt) return (pkt->head.sta_flags >> 1) & 1; } +#if 0 void bfd_pkt_set_demand (bfd_pkt_t * pkt) { pkt->head.sta_flags |= 1 << 1; } +#endif u8 bfd_pkt_get_multipoint (const bfd_pkt_t * pkt) @@ -118,11 +122,13 @@ bfd_pkt_get_multipoint (const bfd_pkt_t * pkt) return (pkt->head.sta_flags >> 0) & 1; } +#if 0 void bfd_pkt_set_multipoint (bfd_pkt_t * pkt) { pkt->head.sta_flags |= 1 << 0; } +#endif u32 bfd_max_len_for_auth_type (bfd_auth_type_e auth_type) diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index cf05089b..207f3b8c 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -60,8 +60,7 @@ vnet_api_error_t bfd_udp_set_echo_source (u32 sw_if_index) { vnet_sw_interface_t *sw_if = - vnet_get_sw_interface_safe (bfd_udp_main.vnet_main, - bfd_udp_main.echo_source_sw_if_index); + vnet_get_sw_interface_safe (bfd_udp_main.vnet_main, sw_if_index); if (sw_if) { bfd_udp_main.echo_source_sw_if_index = sw_if_index; @@ -84,6 +83,7 @@ bfd_udp_is_echo_available (bfd_transport_e transport) { if (!bfd_udp_main.echo_source_is_set) { + BFD_DBG ("UDP echo source not set - echo not available"); return 0; } /* @@ -127,6 +127,7 @@ bfd_udp_is_echo_available (bfd_transport_e transport) /* *INDENT-ON* */ } } + BFD_DBG ("No usable IP address for UDP echo - echo not available"); return 0; } @@ -144,11 +145,6 @@ bfd_udp_bs_idx_to_sport (u32 bs_idx) return 49152 + bs_idx % (65535 - 49152 + 1); } -static void -lol () -{ -} - int bfd_udp_get_echo_src_ip4 (ip4_address_t * addr) { @@ -204,7 +200,6 @@ bfd_udp_get_echo_src_ip6 (ip6_address_t * addr) { *addr = *x; addr->as_u8[15] ^= 1; /* flip the last bit of the address */ - lol (); return 1; } })); @@ -213,6 +208,24 @@ bfd_udp_get_echo_src_ip6 (ip6_address_t * addr) return 0; } +void +bfd_udp_get_echo_source (int *is_set, u32 * sw_if_index, int *have_usable_ip4, + ip4_address_t * ip4, int *have_usable_ip6, + ip6_address_t * ip6) +{ + if (bfd_udp_main.echo_source_is_set) + { + *is_set = 1; + *sw_if_index = bfd_udp_main.echo_source_sw_if_index; + *have_usable_ip4 = bfd_udp_get_echo_src_ip4 (ip4); + *have_usable_ip6 = bfd_udp_get_echo_src_ip6 (ip6); + } + else + { + *is_set = 0; + } +} + int bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, const bfd_session_t * bs, int is_echo) @@ -660,97 +673,6 @@ bfd_udp_session_set_flags (u32 sw_if_index, return 0; } -vnet_api_error_t -bfd_auth_set_key (u32 conf_key_id, u8 auth_type, u8 key_len, - const u8 * key_data) -{ -#if WITH_LIBSSL > 0 - bfd_auth_key_t *auth_key = NULL; - if (!key_len || key_len > bfd_max_len_for_auth_type (auth_type)) - { - clib_warning ("Invalid authentication key length for auth_type=%d:%s " - "(key_len=%u, must be " - "non-zero, expected max=%u)", - auth_type, bfd_auth_type_str (auth_type), key_len, - (u32) bfd_max_len_for_auth_type (auth_type)); - return VNET_API_ERROR_INVALID_VALUE; - } - if (!bfd_auth_type_supported (auth_type)) - { - clib_warning ("Unsupported auth type=%d:%s", auth_type, - bfd_auth_type_str (auth_type)); - return VNET_API_ERROR_BFD_NOTSUPP; - } - bfd_main_t *bm = bfd_udp_main.bfd_main; - uword *key_idx_p = hash_get (bm->auth_key_by_conf_key_id, conf_key_id); - if (key_idx_p) - { - /* modifying existing key - must not be used */ - const uword key_idx = *key_idx_p; - auth_key = pool_elt_at_index (bm->auth_keys, key_idx); - if (auth_key->use_count > 0) - { - clib_warning ("Authentication key with conf ID %u in use by %u BFD " - "sessions - cannot modify", - conf_key_id, auth_key->use_count); - return VNET_API_ERROR_BFD_EINUSE; - } - } - else - { - /* adding new key */ - pool_get (bm->auth_keys, auth_key); - auth_key->conf_key_id = conf_key_id; - hash_set (bm->auth_key_by_conf_key_id, conf_key_id, - auth_key - bm->auth_keys); - } - auth_key->auth_type = auth_type; - memset (auth_key->key, 0, sizeof (auth_key->key)); - clib_memcpy (auth_key->key, key_data, key_len); - return 0; -#else - clib_warning ("SSL missing, cannot manipulate authentication keys"); - return VNET_API_ERROR_BFD_NOTSUPP; -#endif -} - -vnet_api_error_t -bfd_auth_del_key (u32 conf_key_id) -{ -#if WITH_LIBSSL > 0 - bfd_auth_key_t *auth_key = NULL; - bfd_main_t *bm = bfd_udp_main.bfd_main; - uword *key_idx_p = hash_get (bm->auth_key_by_conf_key_id, conf_key_id); - if (key_idx_p) - { - /* deleting existing key - must not be used */ - const uword key_idx = *key_idx_p; - auth_key = pool_elt_at_index (bm->auth_keys, key_idx); - if (auth_key->use_count > 0) - { - clib_warning ("Authentication key with conf ID %u in use by %u BFD " - "sessions - cannot delete", - conf_key_id, auth_key->use_count); - return VNET_API_ERROR_BFD_EINUSE; - } - hash_unset (bm->auth_key_by_conf_key_id, conf_key_id); - memset (auth_key, 0, sizeof (*auth_key)); - pool_put (bm->auth_keys, auth_key); - } - else - { - /* no such key */ - clib_warning ("Authentication key with conf ID %u does not exist", - conf_key_id); - return VNET_API_ERROR_BFD_ENOENT; - } - return 0; -#else - clib_warning ("SSL missing, cannot manipulate authentication keys"); - return VNET_API_ERROR_BFD_NOTSUPP; -#endif -} - vnet_api_error_t bfd_udp_auth_activate (u32 sw_if_index, const ip46_address_t * local_addr, diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h index ce2ee3cb..e33b7407 100644 --- a/src/vnet/bfd/bfd_udp.h +++ b/src/vnet/bfd/bfd_udp.h @@ -62,6 +62,11 @@ int bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, */ int bfd_udp_is_echo_available (bfd_transport_e transport); +void +bfd_udp_get_echo_source (int *is_set, u32 * sw_if_index, int *have_usable_ip4, + ip4_address_t * ip4, int *have_usable_ip6, + ip6_address_t * ip6); + #endif /* __included_bfd_udp_h__ */ /* diff --git a/test/bfd.py b/test/bfd.py index b467cc79..d1d948b3 100644 --- a/test/bfd.py +++ b/test/bfd.py @@ -198,6 +198,10 @@ class VppBFDAuthKey(VppObject): """ key data """ return self._key + @key.setter + def key(self, value): + self._key = value + @property def conf_key_id(self): """ configuration key ID """ @@ -249,7 +253,10 @@ class VppBFDUDPSession(VppObject): self._required_min_rx = required_min_rx self._detect_mult = detect_mult self._sha1_key = sha1_key - self._bfd_key_id = bfd_key_id if bfd_key_id else randint(0, 255) + if bfd_key_id is not None: + self._bfd_key_id = bfd_key_id + else: + self._bfd_key_id = randint(0, 255) @property def test(self): diff --git a/test/test_bfd.py b/test/test_bfd.py index ce0cca55..e7ebb214 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -6,8 +6,9 @@ import unittest import hashlib import binascii import time +from struct import pack, unpack from random import randint, shuffle, getrandbits -from socket import AF_INET, AF_INET6 +from socket import AF_INET, AF_INET6, inet_ntop from scapy.packet import Raw from scapy.layers.l2 import Ether from scapy.layers.inet import UDP, IP @@ -17,6 +18,7 @@ from bfd import VppBFDAuthKey, BFD, BFDAuthType, VppBFDUDPSession, \ from framework import VppTestCase, VppTestRunner from vpp_pg_interface import CaptureTimeoutError from util import ppp +from vpp_papi_provider import UnexpectedApiReturnValueError USEC_IN_SEC = 1000000 @@ -461,19 +463,25 @@ def bfd_session_up(test): test.vpp_clock_offset) if old_offset: test.assertAlmostEqual( - old_offset, test.vpp_clock_offset, delta=0.1, + old_offset, test.vpp_clock_offset, delta=0.5, msg="vpp clock offset not stable (new: %s, old: %s)" % (test.vpp_clock_offset, old_offset)) test.logger.info("BFD: Sending Init") test.test_session.update(my_discriminator=randint(0, 40000000), your_discriminator=p[BFD].my_discriminator, state=BFDState.init) + if test.test_session.sha1_key and test.test_session.sha1_key.auth_type == \ + BFDAuthType.meticulous_keyed_sha1: + test.test_session.inc_seq_num() test.test_session.send_packet() test.logger.info("BFD: Waiting for event") e = test.vapi.wait_for_event(1, "bfd_udp_session_details") verify_event(test, e, expected_state=BFDState.up) test.logger.info("BFD: Session is Up") test.test_session.update(state=BFDState.up) + if test.test_session.sha1_key and test.test_session.sha1_key.auth_type == \ + BFDAuthType.meticulous_keyed_sha1: + test.test_session.inc_seq_num() test.test_session.send_packet() test.assert_equal(test.vpp_session.state, BFDState.up, BFDState) @@ -482,6 +490,9 @@ def bfd_session_down(test): """ Bring BFD session down """ test.assert_equal(test.vpp_session.state, BFDState.up, BFDState) test.test_session.update(state=BFDState.down) + if test.test_session.sha1_key and test.test_session.sha1_key.auth_type == \ + BFDAuthType.meticulous_keyed_sha1: + test.test_session.inc_seq_num() test.test_session.send_packet() test.logger.info("BFD: Waiting for event") e = test.vapi.wait_for_event(1, "bfd_udp_session_details") @@ -490,6 +501,30 @@ def bfd_session_down(test): test.assert_equal(test.vpp_session.state, BFDState.down, BFDState) +def verify_bfd_session_config(test, session, state=None): + dump = session.get_bfd_udp_session_dump_entry() + test.assertIsNotNone(dump) + # since dump is not none, we have verified that sw_if_index and addresses + # are valid (in get_bfd_udp_session_dump_entry) + if state: + test.assert_equal(dump.state, state, "session state") + test.assert_equal(dump.required_min_rx, session.required_min_rx, + "required min rx interval") + test.assert_equal(dump.desired_min_tx, session.desired_min_tx, + "desired min tx interval") + test.assert_equal(dump.detect_mult, session.detect_mult, + "detect multiplier") + if session.sha1_key is None: + test.assert_equal(dump.is_authenticated, 0, "is_authenticated flag") + else: + test.assert_equal(dump.is_authenticated, 1, "is_authenticated flag") + test.assert_equal(dump.bfd_key_id, session.bfd_key_id, + "bfd key id") + test.assert_equal(dump.conf_key_id, + session.sha1_key.conf_key_id, + "config key id") + + def verify_ip(test, packet): """ Verify correctness of IP layer. """ if test.vpp_session.af == AF_INET6: @@ -626,6 +661,32 @@ class BFD4TestCase(VppTestCase): """ bring BFD session up """ bfd_session_up(self) + def test_session_up_by_ip(self): + """ bring BFD session up - first frame looked up by address pair """ + self.logger.info("BFD: Sending Slow control frame") + self.test_session.update(my_discriminator=randint(0, 40000000)) + self.test_session.send_packet() + self.pg0.enable_capture() + p = self.pg0.wait_for_packet(1) + self.assert_equal(p[BFD].your_discriminator, + self.test_session.my_discriminator, + "BFD - your discriminator") + self.assert_equal(p[BFD].state, BFDState.init, BFDState) + self.test_session.update(your_discriminator=p[BFD].my_discriminator, + state=BFDState.up) + self.logger.info("BFD: Waiting for event") + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + verify_event(self, e, expected_state=BFDState.init) + self.logger.info("BFD: Sending Up") + self.test_session.send_packet() + self.logger.info("BFD: Waiting for event") + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + verify_event(self, e, expected_state=BFDState.up) + self.logger.info("BFD: Session is Up") + self.test_session.update(state=BFDState.up) + self.test_session.send_packet() + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + def test_session_down(self): """ bring BFD session down """ bfd_session_up(self) @@ -905,6 +966,16 @@ class BFD4TestCase(VppTestCase): self.assertNotIn("P", p.sprintf("%BFD.flags%"), "Poll bit set in BFD packet") + def test_poll_response(self): + """ test correct response to control frame with poll bit set """ + bfd_session_up(self) + poll = self.test_session.create_packet() + poll[BFD].flags = "P" + self.test_session.send_packet(poll) + final = wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) + self.assertIn("F", final.sprintf("%BFD.flags%")) + def test_no_periodic_if_remote_demand(self): """ no periodic frames outside poll sequence if remote demand set """ bfd_session_up(self) @@ -1091,6 +1162,36 @@ class BFD4TestCase(VppTestCase): events = self.vapi.collect_events() self.assert_equal(len(events), 0, "number of bfd events") + def test_echo_source_removed(self): + """ echo function stops if echo source is removed """ + bfd_session_up(self) + self.test_session.update(required_min_echo_rx=50000) + self.test_session.send_packet() + self.vapi.bfd_udp_set_echo_source(self.loopback0.sw_if_index) + # wait for first echo packet + while True: + p = self.pg0.wait_for_packet(1) + self.logger.debug(ppp("Got packet:", p)) + if p[UDP].dport == BFD.udp_dport_echo: + self.logger.debug(ppp("Looping back packet:", p)) + self.pg0.add_stream(p) + self.pg_start() + break + elif p.haslayer(BFD): + # ignore BFD + pass + else: + raise Exception(ppp("Received unknown packet:", p)) + self.vapi.bfd_udp_del_echo_source() + self.test_session.send_packet() + # echo packets shouldn't arrive anymore + for dummy in range(5): + wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) + self.test_session.send_packet() + events = self.vapi.collect_events() + self.assert_equal(len(events), 0, "number of bfd events") + def test_stale_echo(self): """ stale echo packets don't keep a session up """ bfd_session_up(self) @@ -1199,28 +1300,31 @@ class BFD4TestCase(VppTestCase): verify_event(self, e, expected_state=BFDState.admin_down) for dummy in range(2): p = wait_for_bfd_packet(self) - self.assert_equal(BFDState.admin_down, p[BFD].state, BFDState) + self.assert_equal(p[BFD].state, BFDState.admin_down, BFDState) # try to bring session up - shouldn't be possible self.test_session.update(state=BFDState.init) self.test_session.send_packet() for dummy in range(2): p = wait_for_bfd_packet(self) - self.assert_equal(BFDState.admin_down, p[BFD].state, BFDState) + self.assert_equal(p[BFD].state, BFDState.admin_down, BFDState) self.vpp_session.admin_up() self.test_session.update(state=BFDState.down) e = self.vapi.wait_for_event(1, "bfd_udp_session_details") verify_event(self, e, expected_state=BFDState.down) - p = wait_for_bfd_packet(self) - self.assert_equal(BFDState.down, p[BFD].state, BFDState) + p = wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) + self.assert_equal(p[BFD].state, BFDState.down, BFDState) self.test_session.send_packet() - p = wait_for_bfd_packet(self) - self.assert_equal(BFDState.init, p[BFD].state, BFDState) + p = wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) + self.assert_equal(p[BFD].state, BFDState.init, BFDState) e = self.vapi.wait_for_event(1, "bfd_udp_session_details") verify_event(self, e, expected_state=BFDState.init) self.test_session.update(state=BFDState.up) self.test_session.send_packet() - p = wait_for_bfd_packet(self) - self.assert_equal(BFDState.up, p[BFD].state, BFDState) + p = wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) + self.assert_equal(p[BFD].state, BFDState.up, BFDState) e = self.vapi.wait_for_event(1, "bfd_udp_session_details") verify_event(self, e, expected_state=BFDState.up) @@ -1232,7 +1336,8 @@ class BFD4TestCase(VppTestCase): self.test_session.send_packet(demand) self.vpp_session.modify_parameters( required_min_rx=2 * self.vpp_session.required_min_rx) - p = wait_for_bfd_packet(self) + p = wait_for_bfd_packet( + self, pcap_time_min=time.time() - self.vpp_clock_offset) # poll bit must be set self.assertIn("P", p.sprintf("%BFD.flags%"), "Poll bit not set") # terminate poll sequence @@ -1314,6 +1419,32 @@ class BFD6TestCase(VppTestCase): """ bring BFD session up """ bfd_session_up(self) + def test_session_up_by_ip(self): + """ bring BFD session up - first frame looked up by address pair """ + self.logger.info("BFD: Sending Slow control frame") + self.test_session.update(my_discriminator=randint(0, 40000000)) + self.test_session.send_packet() + self.pg0.enable_capture() + p = self.pg0.wait_for_packet(1) + self.assert_equal(p[BFD].your_discriminator, + self.test_session.my_discriminator, + "BFD - your discriminator") + self.assert_equal(p[BFD].state, BFDState.init, BFDState) + self.test_session.update(your_discriminator=p[BFD].my_discriminator, + state=BFDState.up) + self.logger.info("BFD: Waiting for event") + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + verify_event(self, e, expected_state=BFDState.init) + self.logger.info("BFD: Sending Up") + self.test_session.send_packet() + self.logger.info("BFD: Waiting for event") + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + verify_event(self, e, expected_state=BFDState.up) + self.logger.info("BFD: Session is Up") + self.test_session.update(state=BFDState.up) + self.test_session.send_packet() + self.assert_equal(self.vpp_session.state, BFDState.up, BFDState) + def test_hold_up(self): """ hold BFD session up """ bfd_session_up(self) @@ -1512,7 +1643,6 @@ class BFDSHA1TestCase(VppTestCase): self.vpp_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, sha1_key=key) self.vpp_session.add_vpp_config() - self.vpp_session.admin_up() self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id) @@ -1547,7 +1677,6 @@ class BFDSHA1TestCase(VppTestCase): self.vpp_session = vpp_bfd_udp_session self.vpp_session.add_vpp_config() - self.vpp_session.admin_up() self.test_session = legitimate_test_session # bring vpp session up bfd_session_up(self) @@ -1625,7 +1754,6 @@ class BFDSHA1TestCase(VppTestCase): self.vpp_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, sha1_key=key) self.vpp_session.add_vpp_config() - self.vpp_session.admin_up() self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id, our_seq_number=0) @@ -1633,7 +1761,7 @@ class BFDSHA1TestCase(VppTestCase): # don't send any packets for 2*detection_time detection_time = self.test_session.detect_mult *\ self.vpp_session.required_min_rx / USEC_IN_SEC - self.sleep(detection_time, "simulating peer restart") + self.sleep(2*detection_time, "simulating peer restart") events = self.vapi.collect_events() self.assert_equal(len(events), 1, "number of bfd events") verify_event(self, events[0], expected_state=BFDState.down) @@ -1685,7 +1813,6 @@ class BFDAuthOnOffTestCase(VppTestCase): self.vpp_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) self.vpp_session.add_vpp_config() - self.vpp_session.admin_up() self.test_session = BFDTestSession(self, self.pg0, AF_INET) bfd_session_up(self) for dummy in range(self.test_session.detect_mult * 2): @@ -1710,7 +1837,6 @@ class BFDAuthOnOffTestCase(VppTestCase): self.vpp_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, sha1_key=key) self.vpp_session.add_vpp_config() - self.vpp_session.admin_up() self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id) @@ -1742,7 +1868,6 @@ class BFDAuthOnOffTestCase(VppTestCase): self.vpp_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, sha1_key=key1) self.vpp_session.add_vpp_config() - self.vpp_session.admin_up() self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key1, bfd_key_id=self.vpp_session.bfd_key_id) @@ -1769,7 +1894,6 @@ class BFDAuthOnOffTestCase(VppTestCase): self.vpp_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) self.vpp_session.add_vpp_config() - self.vpp_session.admin_up() self.test_session = BFDTestSession(self, self.pg0, AF_INET) bfd_session_up(self) for dummy in range(self.test_session.detect_mult * 2): @@ -1798,7 +1922,6 @@ class BFDAuthOnOffTestCase(VppTestCase): self.vpp_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, sha1_key=key) self.vpp_session.add_vpp_config() - self.vpp_session.admin_up() self.test_session = BFDTestSession( self, self.pg0, AF_INET, sha1_key=key, bfd_key_id=self.vpp_session.bfd_key_id) @@ -1857,5 +1980,421 @@ class BFDAuthOnOffTestCase(VppTestCase): self.assert_equal(len(self.vapi.collect_events()), 0, "number of bfd events") + +class BFDCLITestCase(VppTestCase): + """Bidirectional Forwarding Detection (BFD) (CLI) """ + pg0 = None + + @classmethod + def setUpClass(cls): + super(BFDCLITestCase, cls).setUpClass() + + try: + cls.create_pg_interfaces((0,)) + cls.pg0.config_ip4() + cls.pg0.config_ip6() + cls.pg0.resolve_arp() + cls.pg0.resolve_ndp() + + except Exception: + super(BFDCLITestCase, cls).tearDownClass() + raise + + def setUp(self): + super(BFDCLITestCase, self).setUp() + self.factory = AuthKeyFactory() + self.pg0.enable_capture() + + def tearDown(self): + try: + self.vapi.want_bfd_events(enable_disable=0) + except UnexpectedApiReturnValueError: + # some tests aren't subscribed, so this is not an issue + pass + self.vapi.collect_events() # clear the event queue + super(BFDCLITestCase, self).tearDown() + + def cli_verify_no_response(self, cli): + """ execute a CLI, asserting that the response is empty """ + self.assert_equal(self.vapi.cli(cli), + "", + "CLI command response") + + def cli_verify_response(self, cli, expected): + """ execute a CLI, asserting that the response matches expectation """ + self.assert_equal(self.vapi.cli(cli).strip(), + expected, + "CLI command response") + + def test_show(self): + """ show commands """ + k1 = self.factory.create_random_key(self) + k1.add_vpp_config() + k2 = self.factory.create_random_key( + self, auth_type=BFDAuthType.meticulous_keyed_sha1) + k2.add_vpp_config() + s1 = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) + s1.add_vpp_config() + s2 = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip6, af=AF_INET6, + sha1_key=k2) + s2.add_vpp_config() + self.logger.info(self.vapi.ppcli("show bfd keys")) + self.logger.info(self.vapi.ppcli("show bfd sessions")) + self.logger.info(self.vapi.ppcli("show bfd")) + + def test_set_del_sha1_key(self): + """ set/delete SHA1 auth key """ + k = self.factory.create_random_key(self) + self.registry.register(k, self.logger) + self.cli_verify_no_response( + "bfd key set conf-key-id %s type keyed-sha1 secret %s" % + (k.conf_key_id, + "".join("{:02x}".format(ord(c)) for c in k.key))) + self.assertTrue(k.query_vpp_config()) + self.vpp_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip4, sha1_key=k) + self.vpp_session.add_vpp_config() + self.test_session = \ + BFDTestSession(self, self.pg0, AF_INET, sha1_key=k, + bfd_key_id=self.vpp_session.bfd_key_id) + self.vapi.want_bfd_events() + bfd_session_up(self) + bfd_session_down(self) + # try to replace the secret for the key - should fail because the key + # is in-use + k2 = self.factory.create_random_key(self) + self.cli_verify_response( + "bfd key set conf-key-id %s type keyed-sha1 secret %s" % + (k.conf_key_id, + "".join("{:02x}".format(ord(c)) for c in k2.key)), + "bfd key set: `bfd_auth_set_key' API call failed, " + "rv=-103:BFD object in use") + # manipulating the session using old secret should still work + bfd_session_up(self) + bfd_session_down(self) + self.vpp_session.remove_vpp_config() + self.cli_verify_no_response( + "bfd key del conf-key-id %s" % k.conf_key_id) + self.assertFalse(k.query_vpp_config()) + + def test_set_del_meticulous_sha1_key(self): + """ set/delete meticulous SHA1 auth key """ + k = self.factory.create_random_key( + self, auth_type=BFDAuthType.meticulous_keyed_sha1) + self.registry.register(k, self.logger) + self.cli_verify_no_response( + "bfd key set conf-key-id %s type meticulous-keyed-sha1 secret %s" % + (k.conf_key_id, + "".join("{:02x}".format(ord(c)) for c in k.key))) + self.assertTrue(k.query_vpp_config()) + self.vpp_session = VppBFDUDPSession(self, self.pg0, + self.pg0.remote_ip6, af=AF_INET6, + sha1_key=k) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = \ + BFDTestSession(self, self.pg0, AF_INET6, sha1_key=k, + bfd_key_id=self.vpp_session.bfd_key_id) + self.vapi.want_bfd_events() + bfd_session_up(self) + bfd_session_down(self) + # try to replace the secret for the key - should fail because the key + # is in-use + k2 = self.factory.create_random_key(self) + self.cli_verify_response( + "bfd key set conf-key-id %s type keyed-sha1 secret %s" % + (k.conf_key_id, + "".join("{:02x}".format(ord(c)) for c in k2.key)), + "bfd key set: `bfd_auth_set_key' API call failed, " + "rv=-103:BFD object in use") + # manipulating the session using old secret should still work + bfd_session_up(self) + bfd_session_down(self) + self.vpp_session.remove_vpp_config() + self.cli_verify_no_response( + "bfd key del conf-key-id %s" % k.conf_key_id) + self.assertFalse(k.query_vpp_config()) + + def test_add_mod_del_bfd_udp(self): + """ create/modify/delete IPv4 BFD UDP session """ + vpp_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip4) + self.registry.register(vpp_session, self.logger) + cli_add_cmd = "bfd udp session add interface %s local-addr %s " \ + "peer-addr %s desired-min-tx %s required-min-rx %s "\ + "detect-mult %s" % (self.pg0.name, self.pg0.local_ip4, + self.pg0.remote_ip4, + vpp_session.desired_min_tx, + vpp_session.required_min_rx, + vpp_session.detect_mult) + self.cli_verify_no_response(cli_add_cmd) + # 2nd add should fail + self.cli_verify_response( + cli_add_cmd, + "bfd udp session add: `bfd_add_add_session' API call" + " failed, rv=-101:Duplicate BFD object") + verify_bfd_session_config(self, vpp_session) + mod_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip4, + required_min_rx=2 * vpp_session.required_min_rx, + desired_min_tx=3 * vpp_session.desired_min_tx, + detect_mult=4 * vpp_session.detect_mult) + self.cli_verify_no_response( + "bfd udp session mod interface %s local-addr %s peer-addr %s " + "desired-min-tx %s required-min-rx %s detect-mult %s" % + (self.pg0.name, self.pg0.local_ip4, self.pg0.remote_ip4, + mod_session.desired_min_tx, mod_session.required_min_rx, + mod_session.detect_mult)) + verify_bfd_session_config(self, mod_session) + cli_del_cmd = "bfd udp session del interface %s local-addr %s "\ + "peer-addr %s" % (self.pg0.name, + self.pg0.local_ip4, self.pg0.remote_ip4) + self.cli_verify_no_response(cli_del_cmd) + # 2nd del is expected to fail + self.cli_verify_response( + cli_del_cmd, "bfd udp session del: `bfd_udp_del_session' API call" + " failed, rv=-102:No such BFD object") + self.assertFalse(vpp_session.query_vpp_config()) + + def test_add_mod_del_bfd_udp6(self): + """ create/modify/delete IPv6 BFD UDP session """ + vpp_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip6, af=AF_INET6) + self.registry.register(vpp_session, self.logger) + cli_add_cmd = "bfd udp session add interface %s local-addr %s " \ + "peer-addr %s desired-min-tx %s required-min-rx %s "\ + "detect-mult %s" % (self.pg0.name, self.pg0.local_ip6, + self.pg0.remote_ip6, + vpp_session.desired_min_tx, + vpp_session.required_min_rx, + vpp_session.detect_mult) + self.cli_verify_no_response(cli_add_cmd) + # 2nd add should fail + self.cli_verify_response( + cli_add_cmd, + "bfd udp session add: `bfd_add_add_session' API call" + " failed, rv=-101:Duplicate BFD object") + verify_bfd_session_config(self, vpp_session) + mod_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip6, af=AF_INET6, + required_min_rx=2 * vpp_session.required_min_rx, + desired_min_tx=3 * vpp_session.desired_min_tx, + detect_mult=4 * vpp_session.detect_mult) + self.cli_verify_no_response( + "bfd udp session mod interface %s local-addr %s peer-addr %s " + "desired-min-tx %s required-min-rx %s detect-mult %s" % + (self.pg0.name, self.pg0.local_ip6, self.pg0.remote_ip6, + mod_session.desired_min_tx, + mod_session.required_min_rx, mod_session.detect_mult)) + verify_bfd_session_config(self, mod_session) + cli_del_cmd = "bfd udp session del interface %s local-addr %s "\ + "peer-addr %s" % (self.pg0.name, + self.pg0.local_ip6, self.pg0.remote_ip6) + self.cli_verify_no_response(cli_del_cmd) + # 2nd del is expected to fail + self.cli_verify_response( + cli_del_cmd, + "bfd udp session del: `bfd_udp_del_session' API call" + " failed, rv=-102:No such BFD object") + self.assertFalse(vpp_session.query_vpp_config()) + + def test_add_mod_del_bfd_udp_auth(self): + """ create/modify/delete IPv4 BFD UDP session (authenticated) """ + key = self.factory.create_random_key(self) + key.add_vpp_config() + vpp_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip4, sha1_key=key) + self.registry.register(vpp_session, self.logger) + cli_add_cmd = "bfd udp session add interface %s local-addr %s " \ + "peer-addr %s desired-min-tx %s required-min-rx %s "\ + "detect-mult %s conf-key-id %s bfd-key-id %s"\ + % (self.pg0.name, self.pg0.local_ip4, self.pg0.remote_ip4, + vpp_session.desired_min_tx, vpp_session.required_min_rx, + vpp_session.detect_mult, key.conf_key_id, + vpp_session.bfd_key_id) + self.cli_verify_no_response(cli_add_cmd) + # 2nd add should fail + self.cli_verify_response( + cli_add_cmd, + "bfd udp session add: `bfd_add_add_session' API call" + " failed, rv=-101:Duplicate BFD object") + verify_bfd_session_config(self, vpp_session) + mod_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip4, sha1_key=key, + bfd_key_id=vpp_session.bfd_key_id, + required_min_rx=2 * vpp_session.required_min_rx, + desired_min_tx=3 * vpp_session.desired_min_tx, + detect_mult=4 * vpp_session.detect_mult) + self.cli_verify_no_response( + "bfd udp session mod interface %s local-addr %s peer-addr %s " + "desired-min-tx %s required-min-rx %s detect-mult %s" % + (self.pg0.name, self.pg0.local_ip4, self.pg0.remote_ip4, + mod_session.desired_min_tx, + mod_session.required_min_rx, mod_session.detect_mult)) + verify_bfd_session_config(self, mod_session) + cli_del_cmd = "bfd udp session del interface %s local-addr %s "\ + "peer-addr %s" % (self.pg0.name, + self.pg0.local_ip4, self.pg0.remote_ip4) + self.cli_verify_no_response(cli_del_cmd) + # 2nd del is expected to fail + self.cli_verify_response( + cli_del_cmd, + "bfd udp session del: `bfd_udp_del_session' API call" + " failed, rv=-102:No such BFD object") + self.assertFalse(vpp_session.query_vpp_config()) + + def test_add_mod_del_bfd_udp6_auth(self): + """ create/modify/delete IPv6 BFD UDP session (authenticated) """ + key = self.factory.create_random_key( + self, auth_type=BFDAuthType.meticulous_keyed_sha1) + key.add_vpp_config() + vpp_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip6, af=AF_INET6, sha1_key=key) + self.registry.register(vpp_session, self.logger) + cli_add_cmd = "bfd udp session add interface %s local-addr %s " \ + "peer-addr %s desired-min-tx %s required-min-rx %s "\ + "detect-mult %s conf-key-id %s bfd-key-id %s" \ + % (self.pg0.name, self.pg0.local_ip6, self.pg0.remote_ip6, + vpp_session.desired_min_tx, vpp_session.required_min_rx, + vpp_session.detect_mult, key.conf_key_id, + vpp_session.bfd_key_id) + self.cli_verify_no_response(cli_add_cmd) + # 2nd add should fail + self.cli_verify_response( + cli_add_cmd, + "bfd udp session add: `bfd_add_add_session' API call" + " failed, rv=-101:Duplicate BFD object") + verify_bfd_session_config(self, vpp_session) + mod_session = VppBFDUDPSession( + self, self.pg0, self.pg0.remote_ip6, af=AF_INET6, sha1_key=key, + bfd_key_id=vpp_session.bfd_key_id, + required_min_rx=2 * vpp_session.required_min_rx, + desired_min_tx=3 * vpp_session.desired_min_tx, + detect_mult=4 * vpp_session.detect_mult) + self.cli_verify_no_response( + "bfd udp session mod interface %s local-addr %s peer-addr %s " + "desired-min-tx %s required-min-rx %s detect-mult %s" % + (self.pg0.name, self.pg0.local_ip6, self.pg0.remote_ip6, + mod_session.desired_min_tx, + mod_session.required_min_rx, mod_session.detect_mult)) + verify_bfd_session_config(self, mod_session) + cli_del_cmd = "bfd udp session del interface %s local-addr %s "\ + "peer-addr %s" % (self.pg0.name, + self.pg0.local_ip6, self.pg0.remote_ip6) + self.cli_verify_no_response(cli_del_cmd) + # 2nd del is expected to fail + self.cli_verify_response( + cli_del_cmd, + "bfd udp session del: `bfd_udp_del_session' API call" + " failed, rv=-102:No such BFD object") + self.assertFalse(vpp_session.query_vpp_config()) + + def test_auth_on_off(self): + """ turn authentication on and off """ + key = self.factory.create_random_key( + self, auth_type=BFDAuthType.meticulous_keyed_sha1) + key.add_vpp_config() + session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) + auth_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, + sha1_key=key) + session.add_vpp_config() + cli_activate = \ + "bfd udp session auth activate interface %s local-addr %s "\ + "peer-addr %s conf-key-id %s bfd-key-id %s"\ + % (self.pg0.name, self.pg0.local_ip4, self.pg0.remote_ip4, + key.conf_key_id, auth_session.bfd_key_id) + self.cli_verify_no_response(cli_activate) + verify_bfd_session_config(self, auth_session) + self.cli_verify_no_response(cli_activate) + verify_bfd_session_config(self, auth_session) + cli_deactivate = \ + "bfd udp session auth deactivate interface %s local-addr %s "\ + "peer-addr %s "\ + % (self.pg0.name, self.pg0.local_ip4, self.pg0.remote_ip4) + self.cli_verify_no_response(cli_deactivate) + verify_bfd_session_config(self, session) + self.cli_verify_no_response(cli_deactivate) + verify_bfd_session_config(self, session) + + def test_auth_on_off_delayed(self): + """ turn authentication on and off (delayed) """ + key = self.factory.create_random_key( + self, auth_type=BFDAuthType.meticulous_keyed_sha1) + key.add_vpp_config() + session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) + auth_session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4, + sha1_key=key) + session.add_vpp_config() + cli_activate = \ + "bfd udp session auth activate interface %s local-addr %s "\ + "peer-addr %s conf-key-id %s bfd-key-id %s delayed yes"\ + % (self.pg0.name, self.pg0.local_ip4, self.pg0.remote_ip4, + key.conf_key_id, auth_session.bfd_key_id) + self.cli_verify_no_response(cli_activate) + verify_bfd_session_config(self, auth_session) + self.cli_verify_no_response(cli_activate) + verify_bfd_session_config(self, auth_session) + cli_deactivate = \ + "bfd udp session auth deactivate interface %s local-addr %s "\ + "peer-addr %s delayed yes"\ + % (self.pg0.name, self.pg0.local_ip4, self.pg0.remote_ip4) + self.cli_verify_no_response(cli_deactivate) + verify_bfd_session_config(self, session) + self.cli_verify_no_response(cli_deactivate) + verify_bfd_session_config(self, session) + + def test_admin_up_down(self): + """ put session admin-up and admin-down """ + session = VppBFDUDPSession(self, self.pg0, self.pg0.remote_ip4) + session.add_vpp_config() + cli_down = \ + "bfd udp session set-flags admin down interface %s local-addr %s "\ + "peer-addr %s "\ + % (self.pg0.name, self.pg0.local_ip4, self.pg0.remote_ip4) + cli_up = \ + "bfd udp session set-flags admin up interface %s local-addr %s "\ + "peer-addr %s "\ + % (self.pg0.name, self.pg0.local_ip4, self.pg0.remote_ip4) + self.cli_verify_no_response(cli_down) + verify_bfd_session_config(self, session, state=BFDState.admin_down) + self.cli_verify_no_response(cli_up) + verify_bfd_session_config(self, session, state=BFDState.down) + + def test_set_del_udp_echo_source(self): + """ set/del udp echo source """ + self.create_loopback_interfaces([0]) + self.loopback0 = self.lo_interfaces[0] + self.loopback0.admin_up() + self.cli_verify_response("show bfd echo-source", + "UDP echo source is not set.") + cli_set = "bfd udp echo-source set interface %s" % self.loopback0.name + self.cli_verify_no_response(cli_set) + self.cli_verify_response("show bfd echo-source", + "UDP echo source is: %s\n" + "IPv4 address usable as echo source: none\n" + "IPv6 address usable as echo source: none" % + self.loopback0.name) + self.loopback0.config_ip4() + unpacked = unpack("!L", self.loopback0.local_ip4n) + echo_ip4 = inet_ntop(AF_INET, pack("!L", unpacked[0] ^ 1)) + self.cli_verify_response("show bfd echo-source", + "UDP echo source is: %s\n" + "IPv4 address usable as echo source: %s\n" + "IPv6 address usable as echo source: none" % + (self.loopback0.name, echo_ip4)) + unpacked = unpack("!LLLL", self.loopback0.local_ip6n) + echo_ip6 = inet_ntop(AF_INET6, pack("!LLLL", unpacked[0], unpacked[1], + unpacked[2], unpacked[3] ^ 1)) + self.loopback0.config_ip6() + self.cli_verify_response("show bfd echo-source", + "UDP echo source is: %s\n" + "IPv4 address usable as echo source: %s\n" + "IPv6 address usable as echo source: %s" % + (self.loopback0.name, echo_ip4, echo_ip6)) + cli_del = "bfd udp echo-source del" + self.cli_verify_no_response(cli_del) + self.cli_verify_response("show bfd echo-source", + "UDP echo source is not set.") + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_object.py b/test/vpp_object.py index 0d74baa5..61a96ec2 100644 --- a/test/vpp_object.py +++ b/test/vpp_object.py @@ -46,6 +46,7 @@ class VppObjectRegistry(object): if obj.object_id() not in self._object_dict: self._object_registry.append(obj) self._object_dict[obj.object_id()] = obj + logger.debug("REG: registering %s" % obj) else: logger.debug("REG: duplicate add, ignoring (%s)" % obj) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 67b3e141..bebbe76d 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -29,6 +29,11 @@ class L2_VTR_OP: L2_POP_1 = 3 +class UnexpectedApiReturnValueError(Exception): + """ exception raised when the API return value is unexpected """ + pass + + class VppPapiProvider(object): """VPP-api provider using vpp-papi @@ -144,13 +149,13 @@ class VppPapiProvider(object): "return value instead of %d in %s" % \ (reply.retval, repr(reply)) self.test_class.logger.info(msg) - raise Exception(msg) + raise UnexpectedApiReturnValueError(msg) elif self._expect_api_retval == self._zero: if hasattr(reply, 'retval') and reply.retval != expected_retval: msg = "API call failed, expected zero return value instead "\ "of %d in %s" % (expected_retval, repr(reply)) self.test_class.logger.info(msg) - raise Exception(msg) + raise UnexpectedApiReturnValueError(msg) else: raise Exception("Internal error, unexpected value for " "self._expect_api_retval %s" % @@ -1188,6 +1193,9 @@ class VppPapiProvider(object): return self.api(self.papi.bfd_udp_set_echo_source, {'sw_if_index': sw_if_index}) + def bfd_udp_del_echo_source(self): + return self.api(self.papi.bfd_udp_del_echo_source, {}) + def classify_add_del_table( self, is_add, -- cgit 1.2.3-korg From 0491ac9aed90e26a54d7b45b469812e6c9fe2f4c Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Mon, 6 Mar 2017 03:27:05 +0100 Subject: BFD: remove unneeded code Change-Id: I0371e8e3c94bb793e3c64d5f51aaebf19dddc4b4 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_udp.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 207f3b8c..e6dbaabb 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -411,21 +411,6 @@ bfd_udp_add_session_internal (bfd_udp_main_t * bum, u32 sw_if_index, BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, VNET_LINK_IP4, %U, %d) " "returns %d", format_ip46_address, &key->peer_addr, IP46_TYPE_ANY, key->sw_if_index, bus->adj_index); - - fib_prefix_t fib_prefix; - memset (&fib_prefix, 0, sizeof (fib_prefix)); - fib_prefix.fp_len = 0; - fib_prefix.fp_proto = FIB_PROTOCOL_IP4; - fib_prefix.fp_addr = key->local_addr; - u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, 0); /* FIXME table id 0? */ - dpo_id_t dpo = DPO_INVALID; - dpo_proto_t dproto; - dproto = fib_proto_to_dpo (fib_prefix.fp_proto); - receive_dpo_add_or_lock (dproto, ~0, NULL, &dpo); - fib_table_entry_special_dpo_update (fib_index, &fib_prefix, - FIB_SOURCE_API, - FIB_ENTRY_FLAG_LOCAL, &dpo); - dpo_reset (&dpo); } else { -- cgit 1.2.3-korg From 0e2e0dfe8b6783641e779b01a4b8e9ebf0e6fc77 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Fri, 3 Mar 2017 08:51:08 +0100 Subject: BFD: drop rpc call if packet doesn't match session In a very unlikely scenario, during which a session is removed and replaced with a different session sharing the same session index, while a packet is in-flight during RPC call, drop that packet. Change-Id: If1c4a77fc2ab460bae2435db066a133185b98747 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 01de6375..d38623c1 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -1555,7 +1555,7 @@ void bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) { bfd_session_t *bs = bfd_find_session_by_idx (bm, bs_idx); - if (!bs) + if (!bs || (pkt->your_disc && pkt->your_disc != bs->local_discr)) { return; } -- cgit 1.2.3-korg From b16bfe3f94739821c7382bd0849630b21e03a8b7 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Tue, 28 Feb 2017 11:56:48 +0100 Subject: BFD: documentation Change-Id: I06a23d24340c5527f3848177d2178bf3e55f7614 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_api.c | 4 + src/vnet/bfd/bfd_api.h | 32 +++- src/vnet/bfd/bfd_cli.c | 4 +- src/vnet/bfd/bfd_doc.md | 375 +++++++++++++++++++++++++++++++++++++++++++- src/vnet/bfd/bfd_main.c | 6 +- src/vnet/bfd/bfd_main.h | 140 ++++++++--------- src/vnet/bfd/bfd_protocol.c | 6 +- src/vnet/bfd/bfd_protocol.h | 5 +- src/vnet/bfd/bfd_udp.c | 4 + src/vnet/bfd/bfd_udp.h | 39 +++-- src/vnet/bfd/dir.dox | 2 +- 11 files changed, 526 insertions(+), 91 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c index e64df48e..185c03cf 100644 --- a/src/vnet/bfd/bfd_api.c +++ b/src/vnet/bfd/bfd_api.c @@ -16,6 +16,10 @@ * limitations under the License. *------------------------------------------------------------------ */ +/** + * @file + * @brief BFD binary API implementation + */ #include #include diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h index 35ad3cda..9f0509d5 100644 --- a/src/vnet/bfd/bfd_api.h +++ b/src/vnet/bfd/bfd_api.h @@ -14,7 +14,7 @@ */ /** * @file - * @brief BFD global declarations + * @brief BFD API declarations */ #ifndef __included_bfd_api_h__ #define __included_bfd_api_h__ @@ -34,6 +34,9 @@ typedef enum #undef F } bfd_transport_e; +/** + * @brief create a new bfd session + */ vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, const ip46_address_t * peer_addr, @@ -41,39 +44,66 @@ bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, u8 detect_mult, u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id); +/** + * @brief modify existing session + */ vnet_api_error_t bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t * local_addr, const ip46_address_t * peer_addr, u32 desired_min_tx_usec, u32 required_min_rx_usec, u8 detect_mult); +/** + * @brief delete existing session + */ vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, const ip46_address_t * local_addr, const ip46_address_t * peer_addr); +/** + * @brief set session admin down/up + */ vnet_api_error_t bfd_udp_session_set_flags (u32 sw_if_index, const ip46_address_t * local_addr, const ip46_address_t * peer_addr, u8 admin_up_down); +/** + * @brief create or modify bfd authentication key + */ vnet_api_error_t bfd_auth_set_key (u32 conf_key_id, u8 auth_type, u8 key_len, const u8 * key); +/** + * @brief delete existing authentication key + */ vnet_api_error_t bfd_auth_del_key (u32 conf_key_id); +/** + * @brief activate authentication for existing session + */ vnet_api_error_t bfd_udp_auth_activate (u32 sw_if_index, const ip46_address_t * local_addr, const ip46_address_t * peer_addr, u32 conf_key_id, u8 bfd_key_id, u8 is_delayed); +/** + * @brief deactivate authentication for existing session + */ vnet_api_error_t bfd_udp_auth_deactivate (u32 sw_if_index, const ip46_address_t * local_addr, const ip46_address_t * peer_addr, u8 is_delayed); +/** + * @brief set echo-source interface + */ vnet_api_error_t bfd_udp_set_echo_source (u32 loopback_sw_if_index); +/** + * @brief unset echo-source interface + */ vnet_api_error_t bfd_udp_del_echo_source (); #endif /* __included_bfd_api_h__ */ diff --git a/src/vnet/bfd/bfd_cli.c b/src/vnet/bfd/bfd_cli.c index a3736d98..f15acb4b 100644 --- a/src/vnet/bfd/bfd_cli.c +++ b/src/vnet/bfd/bfd_cli.c @@ -737,7 +737,7 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_activate_command, static) = { " peer-addr " " conf-key-id " " bfd-key-id " - " [ delayed ]", + " [ delayed ]", .function = bfd_cli_udp_session_auth_activate, }; @@ -815,7 +815,7 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = { " interface " " local-addr " " peer-addr " - "[ delayed ]", + "[ delayed ]", .function = bfd_cli_udp_session_auth_deactivate, }; /* *INDENT-ON* */ diff --git a/src/vnet/bfd/bfd_doc.md b/src/vnet/bfd/bfd_doc.md index 3e86b178..7d7606e4 100644 --- a/src/vnet/bfd/bfd_doc.md +++ b/src/vnet/bfd/bfd_doc.md @@ -1,3 +1,374 @@ -# BFD Notes {#bfd_doc} +# BFD module {#bfd_doc} -@todo Someone needs to produce this or remove the stub file. +## Overview + +Bidirectional Forwarding Detection in VPP currently supports single-hop UDP +transport based on RFC 5880 and RFC 5881. + +## Usage + +### General usage + +BFD sessions are created using APIs only. The following CLIs are implemented, +which call the APIs to manipulate the BFD: + +#### Show commands: + +> show bfd [keys|sessions|echo-source] + +Show the existing keys, sessions or echo-source. + +#### Key manipulation + +##### Create a new key or modify an existing key + +> bfd key set conf-key-id type secret + +Parameters: + +* conf-key-id - local configuration key ID, used to uniquely identify this key +* type - type of the key +* secret - shared secret (hex data) + +Example: + +> bfd key set conf-key-id 2368880803 type meticulous-keyed-sha1 secret 69d685b0d990cdba46872706dc + +Notes: + +* in-use key cannot be modified + +##### Delete an existing key + +> bfd key del conf-key-id + +Parameters: + +* conf-key-id - local configuration key ID, used to uniquely identify this key + +Example: + +> bfd key del conf-key-id 2368880803 + +Notes: + +* in-use key cannot be deleted + +##### Create a new (plain or authenticated) BFD session + +> bfd udp session add interface local-addr
peer-addr
desired-min-tx required-min-rx detect-mult [ conf-key-id bfd-key-id ] + +Parameters: + +* interface - interface to which this session is tied to +* local-addr - local address (ipv4 or ipv6) +* peer-addr - peer address (ipv4 or ipv6, must match local-addr family) +* desired-min-tx - desired minimum tx interval (microseconds) +* required-min-rx - required minimum rx interval (microseconds) +* detect-mult - detect multiplier (must be non-zero) +* conf-key-id - local configuration key ID +* bfd-key-id - BFD key ID, as carried in BFD control frames + +Example: + +> bfd udp session add interface pg0 local-addr fd01:1::1 peer-addr fd01:1::2 desired-min-tx 100000 required-min-rx 100000 detect-mult 3 conf-key-id 1029559112 bfd-key-id 13 + +Notes: + +* if conf-key-id and bfd-key-id are not specified, session is non-authenticated +* desired-min-tx controls desired transmission rate of both control frames and echo packets + +##### Modify BFD session + +> bfd udp session mod interface local-addr
peer-addr
desired-min-tx required-min-rx detect-mult + +Parameters: + +* interface - interface to which this session is tied to +* local-addr - local address (ipv4 or ipv6) +* peer-addr - peer address (ipv4 or ipv6, must match local-addr family) +* desired-min-tx - desired minimum tx interval (microseconds) +* required-min-rx - required minimum rx interval (microseconds) +* detect-mult - detect multiplier (must be non-zero) + +Example: + +> bfd udp session mod interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2 desired-min-tx 300000 required-min-rx 200000 detect-mult 12 + +Notes: + +* desired-min-tx controls desired transmission rate of both control frames and echo packets + +##### Delete an existing BFD session + +> bfd udp session del interface local-addr
peer-addr
+ +Parameters: + +* interface - interface to which this session is tied to +* local-addr - local address (ipv4 or ipv6) +* peer-addr - peer address (ipv4 or ipv6, must match local-addr family) + +Example: + +> bfd udp session del interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2 + +##### Set session admin-up or admin-down + +> bfd udp session set-flags interface local-addr
peer-addr
admin + +Parameters: + +* interface - interface to which this session is tied to +* local-addr - local address (ipv4 or ipv6) +* peer-addr - peer address (ipv4 or ipv6, must match local-addr family) +* admin - up/down based on desired action + +Example: + +> bfd udp session set-flags admin down interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2 + +##### Activate/change authentication for existing session + +> bfd udp session auth activate interface local-addr
peer-addr
conf-key-id bfd-key-id [ delayed ] + +Parameters: + +* interface - interface to which this session is tied to +* local-addr - local address (ipv4 or ipv6) +* peer-addr - peer address (ipv4 or ipv6, must match local-addr family) +* conf-key-id - local configuration key ID +* bfd-key-id - BFD key ID, as carried in BFD control frames +* delayed - is yes then this action is delayed until the peer performs the same action + +Example: + +> bfd udp session auth activate interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2 conf-key-id 540928695 bfd-key-id 239 delayed yes + +Notes: + +* see [Delayed option] for more information + +##### Deactivate authentication for existing session + +> bfd udp session auth deactivate interface local-addr
peer-addr
[ delayed ] + +Parameters: + +* interface - interface to which this session is tied to +* local-addr - local address (ipv4 or ipv6) +* peer-addr - peer address (ipv4 or ipv6, must match local-addr family) +* delayed - is yes then this action is delayed until the peer performs the same action + +Example: + +> bfd udp session auth deactivate interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2 + +Notes: + +* see [Delayed option] for more information + +##### Set echo-source interface + +> bfd udp echo-source set interface + +Parameters: + +* interface - interface used for getting source address for echo packets + +Example: + +> bfd udp echo-source set interface loop0 + +##### Delete echo-source interface + +> bfd udp echo-source del + +Example: + +> bfd udp echo-source del + +### Authentication + +BFD sessions should be authenticated for security purposes. SHA1 and meticulous +SHA1 authentication is supported by VPP. First, authentication keys are +configured in VPP and afterwards they can be used by sessions. + +There are two key IDs in the scope of BFD session: + +* configuration key ID is the internal unique key ID inside VPP and is never + communicated to any peer, it serves only the purpose of identifying the key +* BFD key ID is the key ID carried in BFD control frames and is used for + verifying authentication + +#### Turning auth on/off + +Authentication can be turned on or off at any time. Care must be taken however, +to either synchronize the authentication manipulation with peer's actions +to avoid the session going down. + +##### Delayed option + +Delayed option is useful for synchronizing authentication changes with a peer. +If it's specified, then authentication change is not performed immediately. +In this case, VPP continues to transmit packets using the old authentication +method (unauthenticated or using old sha1 key). If a packet is received, which +does not pass the current authentication, then VPP tries to authenticate it +using the new method (which might be none, if deactivating authentication) +and if it passes, then the new authentication method is put in use. + +The recommended procedure for enabling/changing/disabling session +authentication is: + +1. perform authentication change on vpp's side with delayed option set to yes +2. perform authentication change on peer's side (without delayed option) + +Notes: + +* if both peers use delayed option at the same time, the change will never + be carried out, since none of the peers will see any packet with the new + authentication which could trigger the change +* remote peer does not need to support or even be aware of this mechanism + for it to work properly + + +### Echo function + +Echo function is used by VPP whenever a peer declares the willingness +to support it, echo-source is set and it contains a usable subnet (see below). +When echo function is switched on, the required min rx interval advertised +to peer is set to 1 second (or the configured value, if its higher). + +#### Echo source address + +Because echo packets are only looped back (and not processed in any way) +by a peer, it's necessary to set the source address in a way which avoids +packet drop due to spoofing protection by VPP. Per RFC, the source address +should not be in the subnet set on the interface over which the echo packets +are sent. Also, it must not be any VPP-local address, otherwise the packet +gets dropped on receipt by VPP. The solution is to create a loopback interface +with a (private) IPv4/IPv6 subnet assigned as echo-source. The BFD then picks +an unused address from the subnet by flipping the last bit and uses that as +source address in the echo packets, thus meeting RFC recommendation while +avoiding spoofing protection. + +Example: if 10.10.10.3/31 is the subnet, then 10.10.10.2 will be used as + source address in (IPv4) echo packets + +### Demand mode + +Demand mode is respected by VPP, but not used locally. The only scenario when +demand mode could make sense currently is when echo is active. Because echo +packets are inherently insecure against an adversary looping them back a poll +sequence would be required for slow periodic connectivity verification anyway. +It's more efficient to just ask the remote peer to send slow periodic control +frames without VPP initiating periodic poll sequences. + +### Admin-down + +Session may be put admin-down at any time. This immediately causes the state +to be changed to AdminDown and remain so unless the session is put admin-up. + +## BFD implementation notes + +Because BFD can work over different transport layers, the BFD code is separated +into core BFD functionality - main module implemented in bfd_main.c +and transport-specific code implemented in bfd_udp.c. + +### Main module + +Main module is responsible for handling all the BFD functionality defined +in RFC 5880. + +#### Internal API + +Internal APIs defined in bfd_main.h are called from transport-specific code +to create/modify/delete + +#### Packet receipt + +When a packet is received by the transport layer, it is forwarded to main +module (to main thread) via an RPC call. At this point, the authentication has +been verified, so the packet is consumed, session parameters are updated +accordingly and state change (if applicable). Based on these, the timeouts +are adjusted if required and an event is sent to the process node to wake up +and recalculate sleep time. + +#### Packet transmit + +Main module allocates a vlib_buffer_t, creates the required BFD frame (control +or echo in it), then calls the transport layer to add the transport layer. +Then a frame containing the buffer to the aprropriate node is created +and enqueued. + +#### Process node + +Main module implements one process node which is a simple loop. The process +node gets next timeout from the timer wheel, sleeps until the timeout expires +and then calls a timeout routine which drives the state machine for each +session which timed out. The sleep is interrupted externally via vlib event, +when a session is added or modified in a way which might require timer wheel +manipulation. In this case the caller inserts the necessary timeout to timer +wheel and then signals the process node to wake up early, handle possible +timeouts and recalculate the sleep time again. + +#### State machine + +Default state of BFD session when created is Down, per RFC 5880. State changes +to Init, Up or Down based on events like received state from peer and timeouts. +The session state can be set AdminDown using a binary API, which prevents it +from going to any other state, until this limitation is removed. This state +is advertised to peers in slow periodic control frames. + +For each session, the following timeouts are maintained: + +1. tx timeout - used for sending out control frames +2. rx timeout - used for detecting session timeout +3. echo tx timeout - used for sending out echo frames +3. echo rx timeout - used for detecting session timeout based on echo + +These timeouts are maintained in cpu clocks and recalculated when appropriate +(e.g. rx timeout is bumped when a packet is received, keeping the session +alive). Only the earliest timeout is inserted into the timer wheel at a time +and timer wheel events are never deleted, rather spurious events are ignored. +This allows efficient operation, like not inserting events into timing wheel +for each packet received or ignoring left-over events in case a bfd session +gets removed and a new one is recreated with the same session index. + +#### Authentication keys management + +Authentication keys are managed internally in a pool, with each key tracking +it's use count. The removal/modification is only allowed if the key is not in +use. + +### UDP module + +UDP module is responsible for: + +1. public APIs/CLIs to configure BFD over UDP. +2. support code called by main module to encapsulate/decapsulate BFD packets + +This module implements two graph nodes - for consuming ipv4 and ipv6 packets +target at BFD ports 3874 and 3875. + +#### Packet receipt + +BFD packet receipt receipt starts in the bfd udp graph nodes. Since the code +needs to verify IP/UDP header data, it relies on ip4-local (and ip6-local) +nodes to store pointers to the appropriate headers. First, your discriminator +is extracted from BFD packet and used to lookup the existing session. In case +it's zero, the pair of IP addresses and sw_if_index is used to lookup session. +Then, main module is called to verify the authentication, if present. +Afterwards a check is made if the IP/UDP headers are correct. If yes, then +an RPC call is made to the main thread to consume the packet and take action +upon it. + +#### Packet transmission + +When process node decides that there is a need to transmit the packet, it +creates a buffer, fills the BFD frame data in and calls the UDP module to +add the transport layer. This is a simple operation for the control frames +consisting of just adding UDP/IP headers based on session data. For echo +frames, an additional step, looking at the echo-source interface and picking +and address is performed and if this fails, then the packet cannot be +transmitted and an error is returned to main thread. diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index d38623c1..ea6db1f9 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -1606,8 +1606,6 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) bfd_set_remote_required_min_echo_rx (bm, bs, now, clib_net_to_host_u32 (pkt->req_min_echo_rx)); - /* FIXME 6.8.2 */ - /* FIXME 6.8.4 */ if (bfd_pkt_get_final (pkt)) { if (BFD_POLL_IN_PROGRESS == bs->poll_state) @@ -1915,13 +1913,13 @@ bfd_auth_set_key (u32 conf_key_id, u8 auth_type, u8 key_len, { #if WITH_LIBSSL > 0 bfd_auth_key_t *auth_key = NULL; - if (!key_len || key_len > bfd_max_len_for_auth_type (auth_type)) + if (!key_len || key_len > bfd_max_key_len_for_auth_type (auth_type)) { clib_warning ("Invalid authentication key length for auth_type=%d:%s " "(key_len=%u, must be " "non-zero, expected max=%u)", auth_type, bfd_auth_type_str (auth_type), key_len, - (u32) bfd_max_len_for_auth_type (auth_type)); + (u32) bfd_max_key_len_for_auth_type (auth_type)); return VNET_API_ERROR_INVALID_VALUE; } if (!bfd_auth_type_supported (auth_type)) diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index 3be3694c..4d460f41 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -37,19 +37,19 @@ typedef enum typedef struct { - /* global configuration key ID */ + /** global configuration key ID */ u32 conf_key_id; - /* keeps track of how many sessions reference this key */ + /** keeps track of how many sessions reference this key */ u32 use_count; - /* + /** * key data directly usable for bfd purposes - already padded with zeroes * (so we don't need the actual length) */ u8 key[20]; - /* authentication type for this key */ + /** authentication type for this key */ bfd_auth_type_e auth_type; } bfd_auth_key_t; @@ -68,152 +68,152 @@ typedef enum typedef struct bfd_session_s { - /* index in bfd_main.sessions pool */ + /** index in bfd_main.sessions pool */ u32 bs_idx; - /* session state */ + /** session state */ bfd_state_e local_state; - /* remote session state */ + /** remote session state */ bfd_state_e remote_state; - /* local diagnostics */ + /** local diagnostics */ bfd_diag_code_e local_diag; - /* remote diagnostics */ + /** remote diagnostics */ bfd_diag_code_e remote_diag; - /* local discriminator */ + /** local discriminator */ u32 local_discr; - /* remote discriminator */ + /** remote discriminator */ u32 remote_discr; - /* configured desired min tx interval (microseconds) */ + /** configured desired min tx interval (microseconds) */ u32 config_desired_min_tx_usec; - /* configured desired min tx interval (clocks) */ + /** configured desired min tx interval (clocks) */ u64 config_desired_min_tx_clocks; - /* effective desired min tx interval (clocks) */ + /** effective desired min tx interval (clocks) */ u64 effective_desired_min_tx_clocks; - /* configured required min rx interval (microseconds) */ + /** configured required min rx interval (microseconds) */ u32 config_required_min_rx_usec; - /* configured required min rx interval (clocks) */ + /** configured required min rx interval (clocks) */ u64 config_required_min_rx_clocks; - /* effective required min rx interval (clocks) */ + /** effective required min rx interval (clocks) */ u64 effective_required_min_rx_clocks; - /* remote min rx interval (microseconds) */ + /** remote min rx interval (microseconds) */ u64 remote_min_rx_usec; - /* remote min rx interval (clocks) */ + /** remote min rx interval (clocks) */ u64 remote_min_rx_clocks; - /* remote min echo rx interval (microseconds) */ + /** remote min echo rx interval (microseconds) */ u64 remote_min_echo_rx_usec; - /* remote min echo rx interval (clocks) */ + /** remote min echo rx interval (clocks) */ u64 remote_min_echo_rx_clocks; - /* remote desired min tx interval (clocks) */ + /** remote desired min tx interval (clocks) */ u64 remote_desired_min_tx_clocks; - /* configured detect multiplier */ + /** configured detect multiplier */ u8 local_detect_mult; - /* 1 if remote system sets demand mode, 0 otherwise */ + /** 1 if remote system sets demand mode, 0 otherwise */ u8 remote_demand; - /* remote detect multiplier */ + /** remote detect multiplier */ u8 remote_detect_mult; - /* 1 is echo function is active, 0 otherwise */ + /** 1 is echo function is active, 0 otherwise */ u8 echo; - /* set to value of timer in timing wheel, 0 if never set */ + /** set to value of timer in timing wheel, 0 if never set */ u64 wheel_time_clocks; - /* transmit interval */ + /** transmit interval */ u64 transmit_interval_clocks; - /* next time at which to transmit a packet */ + /** next time at which to transmit a packet */ u64 tx_timeout_clocks; - /* timestamp of last packet transmitted */ + /** timestamp of last packet transmitted */ u64 last_tx_clocks; - /* timestamp of last packet received */ + /** timestamp of last packet received */ u64 last_rx_clocks; - /* transmit interval for echo packets */ + /** transmit interval for echo packets */ u64 echo_transmit_interval_clocks; - /* next time at which to transmit echo packet */ + /** next time at which to transmit echo packet */ u64 echo_tx_timeout_clocks; - /* timestamp of last echo packet transmitted */ + /** timestamp of last echo packet transmitted */ u64 echo_last_tx_clocks; - /* timestamp of last echo packet received */ + /** timestamp of last echo packet received */ u64 echo_last_rx_clocks; - /* secret used for calculating/checking checksum of echo packets */ + /** secret used for calculating/checking checksum of echo packets */ u32 echo_secret; - /* detection time */ + /** detection time */ u64 detection_time_clocks; - /* state info regarding poll sequence */ + /** state info regarding poll sequence */ bfd_poll_state_e poll_state; - /* + /** * helper for delayed poll sequence - marks either start of running poll * sequence or timeout, after which we can start the next poll sequnce */ u64 poll_state_start_or_timeout_clocks; - /* authentication information */ + /** authentication information */ struct { - /* current key in use */ + /** current key in use */ bfd_auth_key_t *curr_key; - /* + /** * set to next key to use if delayed switch is enabled - in that case * the key is switched when first incoming packet is signed with next_key */ bfd_auth_key_t *next_key; - /* sequence number incremented occasionally or always (if meticulous) */ + /** sequence number incremented occasionally or always (if meticulous) */ u32 local_seq_number; - /* remote sequence number */ + /** remote sequence number */ u32 remote_seq_number; - /* set to 1 if remote sequence number is known */ + /** set to 1 if remote sequence number is known */ u8 remote_seq_number_known; - /* current key ID sent out in bfd packet */ + /** current key ID sent out in bfd packet */ u8 curr_bfd_key_id; - /* key ID to use when switched to next_key */ + /** key ID to use when switched to next_key */ u8 next_bfd_key_id; - /* + /** * set to 1 if delayed action is pending, which might be activation * of authentication, change of key or deactivation */ u8 is_delayed; } auth; - /* transport type for this session */ + /** transport type for this session */ bfd_transport_e transport; - /* union of transport-specific data */ + /** union of transport-specific data */ union { bfd_udp_session_t udp; @@ -222,48 +222,48 @@ typedef struct bfd_session_s typedef struct { - /* pool of bfd sessions context data */ + /** pool of bfd sessions context data */ bfd_session_t *sessions; - /* timing wheel for scheduling timeouts */ + /** timing wheel for scheduling timeouts */ timing_wheel_t wheel; - /* timing wheel inaccuracy, in clocks */ + /** timing wheel inaccuracy, in clocks */ u64 wheel_inaccuracy; - /* hashmap - bfd session by discriminator */ + /** hashmap - bfd session by discriminator */ u32 *session_by_disc; - /* background process node index */ + /** background process node index */ u32 bfd_process_node_index; - /* convenience variables */ + /** convenience variables */ vlib_main_t *vlib_main; vnet_main_t *vnet_main; - /* cpu clocks per second */ + /** cpu clocks per second */ f64 cpu_cps; - /* default desired min tx in clocks */ + /** default desired min tx in clocks */ u64 default_desired_min_tx_clocks; - /* minimum required min rx while echo function is active - clocks */ + /** minimum required min rx while echo function is active - clocks */ u64 min_required_min_rx_while_echo_clocks; - /* for generating random numbers */ + /** for generating random numbers */ u32 random_seed; - /* pool of authentication keys */ + /** pool of authentication keys */ bfd_auth_key_t *auth_keys; - /* hashmap - index in pool auth_keys by conf_key_id */ + /** hashmap - index in pool auth_keys by conf_key_id */ u32 *auth_key_by_conf_key_id; } bfd_main_t; extern bfd_main_t bfd_main; -/* Packet counters */ +/** Packet counters */ #define foreach_bfd_error(F) \ F (NONE, "good bfd packets (processed)") \ F (BAD, "invalid bfd packets") \ @@ -277,7 +277,7 @@ typedef enum BFD_N_ERROR, } bfd_error_t; -/* bfd packet trace capture */ +/** bfd packet trace capture */ typedef struct { u32 len; @@ -291,14 +291,14 @@ enum BFD_EVENT_CONFIG_CHANGED, } bfd_process_event_e; -/* echo packet structure */ /* *INDENT-OFF* */ +/** echo packet structure */ typedef CLIB_PACKED (struct { - /* local discriminator */ + /** local discriminator */ u32 discriminator; - /* expire time of this packet - clocks */ + /** expire time of this packet - clocks */ u64 expire_time_clocks; - /* checksum - based on discriminator, local secret and expire time */ + /** checksum - based on discriminator, local secret and expire time */ u64 checksum; }) bfd_echo_pkt_t; /* *INDENT-ON* */ @@ -335,10 +335,10 @@ const char *bfd_poll_state_string (bfd_poll_state_e state); #define USEC_PER_MS 1000LL #define USEC_PER_SECOND (1000 * USEC_PER_MS) -/* default, slow transmission interval for BFD packets, per spec at least 1s */ +/** default, slow transmission interval for BFD packets, per spec at least 1s */ #define BFD_DEFAULT_DESIRED_MIN_TX_USEC USEC_PER_SECOND -/* +/** * minimum required min rx set locally when echo function is used, per spec * should be set to at least 1s */ diff --git a/src/vnet/bfd/bfd_protocol.c b/src/vnet/bfd/bfd_protocol.c index 5deb9702..cd51e91a 100644 --- a/src/vnet/bfd/bfd_protocol.c +++ b/src/vnet/bfd/bfd_protocol.c @@ -12,6 +12,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @file + * @brief BFD protocol implementation + */ #include u8 @@ -131,7 +135,7 @@ bfd_pkt_set_multipoint (bfd_pkt_t * pkt) #endif u32 -bfd_max_len_for_auth_type (bfd_auth_type_e auth_type) +bfd_max_key_len_for_auth_type (bfd_auth_type_e auth_type) { #define F(t, l, n, s) \ if (auth_type == t) \ diff --git a/src/vnet/bfd/bfd_protocol.h b/src/vnet/bfd/bfd_protocol.h index cdbb8fa7..210c561b 100644 --- a/src/vnet/bfd/bfd_protocol.h +++ b/src/vnet/bfd/bfd_protocol.h @@ -40,7 +40,10 @@ typedef enum #undef F } bfd_auth_type_e; -u32 bfd_max_len_for_auth_type (bfd_auth_type_e auth_type); +/** + * @brief get the maximum length of key data for given auth type + */ +u32 bfd_max_key_len_for_auth_type (bfd_auth_type_e auth_type); const char *bfd_auth_type_str (bfd_auth_type_e auth_type); /* *INDENT-OFF* */ diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index e6dbaabb..b3eabc9c 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -12,6 +12,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +/** + * @file + * @brief BFD UDP transport layer implementation + */ #include #include #include diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h index e33b7407..5080ec98 100644 --- a/src/vnet/bfd/bfd_udp.h +++ b/src/vnet/bfd/bfd_udp.h @@ -13,7 +13,7 @@ */ /** * @file - * @brief BFD global declarations + * @brief BFD UDP transport layer declarations */ #ifndef __included_bfd_udp_h__ @@ -25,23 +25,27 @@ #include /* *INDENT-OFF* */ +/** identifier of BFD session based on UDP transport only */ typedef CLIB_PACKED (struct { - + /** interface to which the session is tied */ u32 sw_if_index; + /** local address */ ip46_address_t local_addr; + /** peer address */ ip46_address_t peer_addr; - }) bfd_udp_key_t; /* *INDENT-ON* */ +/** UDP transport specific data embedded in bfd_session's union */ typedef struct { + /** key identifying this session */ bfd_udp_key_t key; - + /** adjacency index returned from adj lock call */ adj_index_t adj_index; } bfd_udp_session_t; -/* bfd udp echo packet trace capture */ +/** bfd udp echo packet trace capture */ typedef struct { u32 len; @@ -50,8 +54,23 @@ typedef struct struct bfd_session_s; +/** + * @brief add the necessary transport layer by prepending it to existing data + * + * @param is_echo 1 if this is echo packet, 0 if control frame + * + * @return 1 on success, 0 on failure + */ int bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, const struct bfd_session_s *bs, int is_echo); + +/** + * @brief add the necessary transport layer by prepending it to existing data + * + * @param is_echo 1 if this is echo packet, 0 if control frame + * + * @return 1 on success, 0 on failure + */ int bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, const struct bfd_session_s *bs, int is_echo); @@ -62,10 +81,12 @@ int bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, */ int bfd_udp_is_echo_available (bfd_transport_e transport); -void -bfd_udp_get_echo_source (int *is_set, u32 * sw_if_index, int *have_usable_ip4, - ip4_address_t * ip4, int *have_usable_ip6, - ip6_address_t * ip6); +/** + * @brief get echo source information - used by CLI + */ +void bfd_udp_get_echo_source (int *is_set, u32 * sw_if_index, + int *have_usable_ip4, ip4_address_t * ip4, + int *have_usable_ip6, ip6_address_t * ip6); #endif /* __included_bfd_udp_h__ */ diff --git a/src/vnet/bfd/dir.dox b/src/vnet/bfd/dir.dox index ed656b52..b9a5978f 100644 --- a/src/vnet/bfd/dir.dox +++ b/src/vnet/bfd/dir.dox @@ -13,6 +13,6 @@ * limitations under the License. */ /** - @dir vnet/vnet/bfd + @dir @brief Bidirectional Forwarding Detection (BFD) implementation */ -- cgit 1.2.3-korg From e50e8568c160f51cc2a268b59e209d13cb7344be Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Tue, 4 Apr 2017 16:19:48 +0200 Subject: BFD: add ARP-awareness, fix bugs Make BFD ARP-aware when sending out packets. Fix a few one-liner bugs discovered while integrating with cisco nexus. Enhance CLI view to better observe session state. Change-Id: I266c29492f351207b84328ab665d9d697969da9c Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_cli.c | 57 ++++++++++++++++---- src/vnet/bfd/bfd_main.c | 136 ++++++++++++++++++++++++++++------------------- src/vnet/bfd/bfd_main.h | 3 +- src/vnet/bfd/bfd_udp.c | 138 ++++++++++++++++++++++++++++++++++++++++++++---- src/vnet/bfd/bfd_udp.h | 25 ++++++++- 5 files changed, 282 insertions(+), 77 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_cli.c b/src/vnet/bfd/bfd_cli.c index f15acb4b..44e671c5 100644 --- a/src/vnet/bfd/bfd_cli.c +++ b/src/vnet/bfd/bfd_cli.c @@ -28,6 +28,7 @@ static u8 * format_bfd_session_cli (u8 * s, va_list * args) { + vlib_main_t *vm = va_arg (*args, vlib_main_t *); bfd_main_t *bm = va_arg (*args, bfd_main_t *); bfd_session_t *bs = va_arg (*args, bfd_session_t *); switch (bs->transport) @@ -51,7 +52,7 @@ format_bfd_session_cli (u8 * s, va_list * args) bfd_diag_code_string (bs->remote_diag)); s = format (s, "%10s %-32s %20u %20u\n", "", "Detect multiplier", bs->local_detect_mult, bs->remote_detect_mult); - s = format (s, "%10s %-32s %20u %20u\n", "", + s = format (s, "%10s %-32s %20u %20llu\n", "", "Required Min Rx Interval (usec)", bs->config_required_min_rx_usec, bs->remote_min_rx_usec); s = format (s, "%10s %-32s %20u %20u\n", "", @@ -61,18 +62,54 @@ format_bfd_session_cli (u8 * s, va_list * args) s = format (s, "%10s %-32s %20u\n", "", "Transmit interval", bfd_clocks_to_usec (bm, bs->transmit_interval_clocks)); + u64 now = clib_cpu_time_now (); + u8 *tmp = NULL; + if (bs->last_tx_clocks > 0) + { + tmp = format (tmp, "%.2fs ago", (now - bs->last_tx_clocks) * + vm->clib_time.seconds_per_clock); + s = format (s, "%10s %-32s %20v\n", "", "Last control frame tx", tmp); + vec_reset_length (tmp); + } + if (bs->last_rx_clocks) + { + tmp = format (tmp, "%.2fs ago", (now - bs->last_rx_clocks) * + vm->clib_time.seconds_per_clock); + s = format (s, "%10s %-32s %20v\n", "", "Last control frame rx", tmp); + vec_reset_length (tmp); + } s = - format (s, "%10s %-32s %20s %20s\n", "", "Demand mode", "no", - bs->remote_demand ? "yes" : "no"); - s = - format (s, "%10s %-32s %20s\n", "", "Poll state", - bfd_poll_state_string (bs->poll_state)); + format (s, "%10s %-32s %20u %20llu\n", "", "Min Echo Rx Interval (usec)", + 1, bs->remote_min_echo_rx_usec); + if (bs->echo) + { + s = format (s, "%10s %-32s %20u\n", "", "Echo transmit interval", + bfd_clocks_to_usec (bm, bs->echo_transmit_interval_clocks)); + tmp = format (tmp, "%.2fs ago", (now - bs->echo_last_tx_clocks) * + vm->clib_time.seconds_per_clock); + s = format (s, "%10s %-32s %20v\n", "", "Last echo frame tx", tmp); + vec_reset_length (tmp); + tmp = format (tmp, "%.6fs", + (bs->echo_last_rx_clocks - bs->echo_last_tx_clocks) * + vm->clib_time.seconds_per_clock); + s = + format (s, "%10s %-32s %20v\n", "", "Last echo frame roundtrip time", + tmp); + } + vec_free (tmp); + tmp = NULL; + s = format (s, "%10s %-32s %20s %20s\n", "", "Demand mode", "no", + bs->remote_demand ? "yes" : "no"); + s = format (s, "%10s %-32s %20s\n", "", "Poll state", + bfd_poll_state_string (bs->poll_state)); if (bs->auth.curr_key) { s = format (s, "%10s %-32s %20u\n", "", "Authentication config key ID", bs->auth.curr_key->conf_key_id); s = format (s, "%10s %-32s %20u\n", "", "Authentication BFD key ID", bs->auth.curr_bfd_key_id); + s = format (s, "%10s %-32s %20u %20u\n", "", "Sequence number", + bs->auth.local_seq_number, bs->auth.remote_seq_number); } return s; } @@ -96,6 +133,7 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input, }); /* *INDENT-ON* */ vlib_cli_output (vm, "%v\n", s); + vec_free (s); vlib_cli_output (vm, "Number of configured BFD keys: %lu\n", (u64) pool_elts (bm->auth_keys)); } @@ -104,8 +142,9 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input, u8 *s = format (NULL, "%=10s %=32s %=20s %=20s\n", "Index", "Property", "Local value", "Remote value"); /* *INDENT-OFF* */ - pool_foreach (bs, bm->sessions, - { s = format (s, "%U", format_bfd_session_cli, bm, bs); }); + pool_foreach (bs, bm->sessions, { + s = format (s, "%U", format_bfd_session_cli, vm, bm, bs); + }); /* *INDENT-ON* */ vlib_cli_output (vm, "%v", s); vec_free (s); @@ -349,7 +388,7 @@ static const unsigned optional = 0; #define CHECK_MANDATORY(t, n, s, r, ...) \ if (mandatory == r && !have_##n) \ { \ - ret = clib_error_return (0, "Required parameter `%s' missing.", n); \ + ret = clib_error_return (0, "Required parameter `%s' missing.", s); \ goto out; \ } diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index ea6db1f9..2b70a20c 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -63,13 +63,6 @@ bfd_clocks_to_usec (const bfd_main_t * bm, u64 clocks) static vlib_node_registration_t bfd_process_node; -/* set to 0 here, real values filled at startup */ -static u32 bfd_node_index_by_transport[] = { -#define F(t, n) [BFD_TRANSPORT_##t] = 0, - foreach_bfd_transport (F) -#undef F -}; - u8 * format_bfd_auth_key (u8 * s, va_list * args) { @@ -560,51 +553,70 @@ bfd_on_config_change (vlib_main_t * vm, vlib_node_runtime_t * rt, } static void -bfd_add_transport_layer (vlib_main_t * vm, vlib_buffer_t * b, - bfd_session_t * bs) +bfd_add_transport_layer (vlib_main_t * vm, u32 bi, bfd_session_t * bs) +{ + switch (bs->transport) + { + case BFD_TRANSPORT_UDP4: + BFD_DBG ("Transport bfd via udp4, bs_idx=%u", bs->bs_idx); + bfd_add_udp4_transport (vm, bi, bs, 0 /* is_echo */ ); + break; + case BFD_TRANSPORT_UDP6: + BFD_DBG ("Transport bfd via udp6, bs_idx=%u", bs->bs_idx); + bfd_add_udp6_transport (vm, bi, bs, 0 /* is_echo */ ); + break; + } +} + +static int +bfd_transport_control_frame (vlib_main_t * vm, u32 bi, bfd_session_t * bs) { switch (bs->transport) { case BFD_TRANSPORT_UDP4: BFD_DBG ("Transport bfd via udp4, bs_idx=%u", bs->bs_idx); - bfd_add_udp4_transport (vm, b, bs, 0 /* is_echo */ ); + return bfd_transport_udp4 (vm, bi, bs); break; case BFD_TRANSPORT_UDP6: BFD_DBG ("Transport bfd via udp6, bs_idx=%u", bs->bs_idx); - bfd_add_udp6_transport (vm, b, bs, 0 /* is_echo */ ); + return bfd_transport_udp6 (vm, bi, bs); break; } + return 0; } static int -bfd_echo_add_transport_layer (vlib_main_t * vm, vlib_buffer_t * b, - bfd_session_t * bs) +bfd_echo_add_transport_layer (vlib_main_t * vm, u32 bi, bfd_session_t * bs) { switch (bs->transport) { case BFD_TRANSPORT_UDP4: BFD_DBG ("Transport bfd echo via udp4, bs_idx=%u", bs->bs_idx); - return bfd_add_udp4_transport (vm, b, bs, 1 /* is_echo */ ); + return bfd_add_udp4_transport (vm, bi, bs, 1 /* is_echo */ ); break; case BFD_TRANSPORT_UDP6: BFD_DBG ("Transport bfd echo via udp6, bs_idx=%u", bs->bs_idx); - return bfd_add_udp6_transport (vm, b, bs, 1 /* is_echo */ ); + return bfd_add_udp6_transport (vm, bi, bs, 1 /* is_echo */ ); break; } return 0; } -static void -bfd_create_frame_to_next_node (vlib_main_t * vm, bfd_session_t * bs, u32 bi) +static int +bfd_transport_echo (vlib_main_t * vm, u32 bi, bfd_session_t * bs) { - - vlib_frame_t *f = - vlib_get_frame_to_node (vm, bfd_node_index_by_transport[bs->transport]); - - u32 *to_next = vlib_frame_vector_args (f); - to_next[0] = bi; - f->n_vectors = 1; - vlib_put_frame_to_node (vm, bfd_node_index_by_transport[bs->transport], f); + switch (bs->transport) + { + case BFD_TRANSPORT_UDP4: + BFD_DBG ("Transport bfd echo via udp4, bs_idx=%u", bs->bs_idx); + return bfd_transport_udp4 (vm, bi, bs); + break; + case BFD_TRANSPORT_UDP6: + BFD_DBG ("Transport bfd echo via udp6, bs_idx=%u", bs->bs_idx); + return bfd_transport_udp6 (vm, bi, bs); + break; + } + return 0; } #if WITH_LIBSSL > 0 @@ -704,7 +716,7 @@ bfd_init_control_frame (bfd_main_t * bm, bfd_session_t * bs, bfd_pkt_set_diag_code (pkt, bs->local_diag); bfd_pkt_set_state (pkt, bs->local_state); pkt->head.detect_mult = bs->local_detect_mult; - pkt->head.length = clib_host_to_net_u32 (bfd_length); + pkt->head.length = bfd_length; pkt->my_disc = bs->local_discr; pkt->your_disc = bs->remote_discr; pkt->des_min_tx = clib_host_to_net_u32 (bs->config_desired_min_tx_usec); @@ -725,8 +737,7 @@ bfd_init_control_frame (bfd_main_t * bm, bfd_session_t * bs, static void bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt, - bfd_main_t * bm, bfd_session_t * bs, u64 now, - int handling_wakeup) + bfd_main_t * bm, bfd_session_t * bs, u64 now) { if (!bfd_is_echo_possible (bs)) { @@ -734,7 +745,8 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt, bs->echo = 0; return; } - /* sometimes the wheel expires an event a bit sooner than requested, account + /* sometimes the wheel expires an event a bit sooner than requested, + account for that here */ if (now + bm->wheel_inaccuracy >= bs->echo_tx_timeout_clocks) { @@ -747,6 +759,8 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt, } vlib_buffer_t *b = vlib_get_buffer (vm, bi); ASSERT (b->current_data == 0); + memset (vnet_buffer (b), 0, sizeof (*vnet_buffer (b))); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); bfd_echo_pkt_t *pkt = vlib_buffer_get_current (b); memset (pkt, 0, sizeof (*pkt)); pkt->discriminator = bs->local_discr; @@ -756,7 +770,14 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_calc_echo_checksum (bs->local_discr, pkt->expire_time_clocks, bs->echo_secret); b->current_length = sizeof (*pkt); - if (!bfd_echo_add_transport_layer (vm, b, bs)) + if (!bfd_echo_add_transport_layer (vm, bi, bs)) + { + BFD_ERR ("cannot send echo packet out, turning echo off"); + bs->echo = 0; + vlib_buffer_free_one (vm, bi); + return; + } + if (!bfd_transport_echo (vm, bi, bs)) { BFD_ERR ("cannot send echo packet out, turning echo off"); bs->echo = 0; @@ -765,7 +786,6 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt, } bs->echo_last_tx_clocks = now; bfd_calc_next_echo_tx (bm, bs, now); - bfd_create_frame_to_next_node (vm, bs, bi); } else { @@ -777,8 +797,7 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt, static void bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, - bfd_main_t * bm, bfd_session_t * bs, u64 now, - int handling_wakeup) + bfd_main_t * bm, bfd_session_t * bs, u64 now) { if (!bs->remote_min_rx_usec && BFD_POLL_NOT_NEEDED == bs->poll_state) { @@ -798,8 +817,10 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, BFD_DBG ("Remote demand is set, not sending periodic control frame"); return; } - /* sometimes the wheel expires an event a bit sooner than requested, account - for that here */ + /* + * sometimes the wheel expires an event a bit sooner than requested, account + * for that here + */ if (now + bm->wheel_inaccuracy >= bs->tx_timeout_clocks) { BFD_DBG ("\nSending periodic control frame: %U", format_bfd_session, @@ -812,6 +833,8 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, } vlib_buffer_t *b = vlib_get_buffer (vm, bi); ASSERT (b->current_data == 0); + memset (vnet_buffer (b), 0, sizeof (*vnet_buffer (b))); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); bfd_init_control_frame (bm, bs, b); switch (bs->poll_state) { @@ -837,10 +860,13 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, break; } bfd_add_auth_section (b, bs); - bfd_add_transport_layer (vm, b, bs); + bfd_add_transport_layer (vm, bi, bs); + if (!bfd_transport_control_frame (vm, bi, bs)) + { + vlib_buffer_free_one (vm, bi); + } bs->last_tx_clocks = now; bfd_calc_next_tx (bm, bs, now); - bfd_create_frame_to_next_node (vm, bs, bi); } else { @@ -852,13 +878,15 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, void bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b, - bfd_main_t * bm, bfd_session_t * bs) + bfd_main_t * bm, bfd_session_t * bs, + int is_local) { BFD_DBG ("Send final control frame for bs_idx=%lu", bs->bs_idx); bfd_init_control_frame (bm, bs, b); bfd_pkt_set_final (vlib_buffer_get_current (b)); bfd_add_auth_section (b, bs); - bfd_add_transport_layer (vm, b, bs); + u32 bi = vlib_get_buffer_index (vm, b); + bfd_add_transport_layer (vm, bi, bs); bs->last_tx_clocks = clib_cpu_time_now (); /* * RFC allows to include changes in final frame, so if there were any @@ -871,8 +899,10 @@ static void bfd_check_rx_timeout (bfd_main_t * bm, bfd_session_t * bs, u64 now, int handling_wakeup) { - /* sometimes the wheel expires an event a bit sooner than requested, account - for that here */ + /* + * sometimes the wheel expires an event a bit sooner than requested, account + * for that here + */ if (bs->last_rx_clocks + bs->detection_time_clocks <= now + bm->wheel_inaccuracy) { @@ -907,14 +937,14 @@ bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm, switch (bs->local_state) { case BFD_STATE_admin_down: - bfd_send_periodic (vm, rt, bm, bs, now, 1); + bfd_send_periodic (vm, rt, bm, bs, now); break; case BFD_STATE_down: - bfd_send_periodic (vm, rt, bm, bs, now, 1); + bfd_send_periodic (vm, rt, bm, bs, now); break; case BFD_STATE_init: bfd_check_rx_timeout (bm, bs, now, 1); - bfd_send_periodic (vm, rt, bm, bs, now, 1); + bfd_send_periodic (vm, rt, bm, bs, now); break; case BFD_STATE_up: bfd_check_rx_timeout (bm, bs, now, 1); @@ -932,10 +962,10 @@ bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm, bs->config_required_min_rx_clocks)); bfd_set_poll_state (bs, BFD_POLL_NEEDED); } - bfd_send_periodic (vm, rt, bm, bs, now, 1); + bfd_send_periodic (vm, rt, bm, bs, now); if (bs->echo) { - bfd_send_echo (vm, rt, bm, bs, now, 1); + bfd_send_echo (vm, rt, bm, bs, now); } break; } @@ -996,7 +1026,8 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { bfd_session_t *bs = pool_elt_at_index (bm->sessions, *event_data); - bfd_send_periodic (vm, rt, bm, bs, now, 1); + bfd_send_periodic (vm, rt, bm, bs, now); + bfd_set_timer (bm, bs, now, 1); } else { @@ -1113,14 +1144,6 @@ bfd_main_init (vlib_main_t * vm) const u64 now = clib_cpu_time_now (); timing_wheel_init (&bm->wheel, now, bm->cpu_cps); bm->wheel_inaccuracy = 2 << bm->wheel.log2_clocks_per_bin; - - vlib_node_t *node = NULL; -#define F(t, n) \ - node = vlib_get_node_by_name (vm, (u8 *)n); \ - bfd_node_index_by_transport[BFD_TRANSPORT_##t] = node->index; \ - BFD_DBG ("node '%s' has index %u", n, node->index); - foreach_bfd_transport (F); -#undef F return 0; } @@ -1654,10 +1677,12 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) { if (BFD_STATE_down == bs->remote_state) { + bfd_set_diag (bs, BFD_DIAG_CODE_no_diag); bfd_set_state (bm, bs, BFD_STATE_init, 0); } else if (BFD_STATE_init == bs->remote_state) { + bfd_set_diag (bs, BFD_DIAG_CODE_no_diag); bfd_set_state (bm, bs, BFD_STATE_up, 0); } } @@ -1666,6 +1691,7 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) if (BFD_STATE_up == bs->remote_state || BFD_STATE_init == bs->remote_state) { + bfd_set_diag (bs, BFD_DIAG_CODE_no_diag); bfd_set_state (bm, bs, BFD_STATE_up, 0); } } diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index 4d460f41..d722a552 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -316,7 +316,8 @@ int bfd_verify_pkt_auth (const bfd_pkt_t * pkt, u16 pkt_size, bfd_session_t * bs); void bfd_event (bfd_main_t * bm, bfd_session_t * bs); void bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b, - bfd_main_t * bm, bfd_session_t * bs); + bfd_main_t * bm, bfd_session_t * bs, + int is_local); u8 *format_bfd_session (u8 * s, va_list * args); u8 *format_bfd_auth_key (u8 * s, va_list * args); void bfd_session_set_flags (bfd_session_t * bs, u8 admin_up_down); diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index b3eabc9c..ebee590b 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -51,6 +51,14 @@ typedef struct int echo_source_is_set; /* loopback interface used to get echo source ip */ u32 echo_source_sw_if_index; + /* node index of "ip4-arp" node */ + u32 ip4_arp_idx; + /* node index of "ip6-discover-neighbor" node */ + u32 ip6_ndp_idx; + /* node index of "ip4-rewrite" node */ + u32 ip4_rewrite_idx; + /* node index of "ip6-rewrite" node */ + u32 ip6_rewrite_idx; } bfd_udp_main_t; static vlib_node_registration_t bfd_udp4_input_node; @@ -231,15 +239,18 @@ bfd_udp_get_echo_source (int *is_set, u32 * sw_if_index, int *have_usable_ip4, } int -bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, +bfd_add_udp4_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, int is_echo) { const bfd_udp_session_t *bus = &bs->udp; const bfd_udp_key_t *key = &bus->key; + vlib_buffer_t *b = vlib_get_buffer (vm, bi); b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; + vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; typedef struct { ip4_header_t ip4; @@ -283,15 +294,18 @@ bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, } int -bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, +bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, int is_echo) { const bfd_udp_session_t *bus = &bs->udp; const bfd_udp_key_t *key = &bus->key; + vlib_buffer_t *b = vlib_get_buffer (vm, bi); b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; + vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b)->sw_if_index[VLIB_TX] = 0; typedef struct { ip6_header_t ip6; @@ -346,6 +360,76 @@ bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, return 1; } +static void +bfd_create_frame_to_next_node (vlib_main_t * vm, u32 bi, u32 next_node) +{ + vlib_frame_t *f = vlib_get_frame_to_node (vm, next_node); + u32 *to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, next_node, f); +} + +int +bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node) +{ + const bfd_udp_session_t *bus = &bs->udp; + ip_adjacency_t *adj = adj_get (bus->adj_index); + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + switch (bs->transport) + { + case BFD_TRANSPORT_UDP4: + *next_node = bfd_udp_main.ip4_arp_idx; + return 1; + case BFD_TRANSPORT_UDP6: + *next_node = bfd_udp_main.ip6_ndp_idx; + return 1; + } + break; + case IP_LOOKUP_NEXT_REWRITE: + switch (bs->transport) + { + case BFD_TRANSPORT_UDP4: + *next_node = bfd_udp_main.ip4_rewrite_idx; + return 1; + case BFD_TRANSPORT_UDP6: + *next_node = bfd_udp_main.ip6_rewrite_idx; + return 1; + } + break; + default: + /* drop */ + break; + } + return 0; +} + +int +bfd_transport_udp4 (vlib_main_t * vm, u32 bi, const struct bfd_session_s *bs) +{ + u32 next_node; + int rv = bfd_udp_calc_next_node (bs, &next_node); + if (rv) + { + bfd_create_frame_to_next_node (vm, bi, next_node); + } + return rv; +} + +int +bfd_transport_udp6 (vlib_main_t * vm, u32 bi, const struct bfd_session_s *bs) +{ + u32 next_node; + int rv = bfd_udp_calc_next_node (bs, &next_node); + if (rv) + { + bfd_create_frame_to_next_node (vm, bi, next_node); + } + return 1; +} + static bfd_session_t * bfd_lookup_session (bfd_udp_main_t * bum, const bfd_udp_key_t * key) { @@ -703,7 +787,8 @@ bfd_udp_auth_deactivate (u32 sw_if_index, typedef enum { BFD_UDP_INPUT_NEXT_NORMAL, - BFD_UDP_INPUT_NEXT_REPLY, + BFD_UDP_INPUT_NEXT_REPLY_ARP, + BFD_UDP_INPUT_NEXT_REPLY_REWRITE, BFD_UDP_INPUT_N_NEXT, } bfd_udp_input_next_t; @@ -1112,8 +1197,11 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, const bfd_pkt_t *pkt = vlib_buffer_get_current (b0); if (bfd_pkt_get_poll (pkt)) { + b0->current_data = 0; + b0->current_length = 0; + memset (vnet_buffer (b0), 0, sizeof (*vnet_buffer (b0))); bfd_init_final_control_frame (vm, b0, bfd_udp_main.bfd_main, - bs); + bs, 0); if (is_ipv6) { vlib_node_increment_counter (vm, bfd_udp6_input_node.index, @@ -1124,7 +1212,20 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_node_increment_counter (vm, bfd_udp4_input_node.index, b0->error, 1); } - next0 = BFD_UDP_INPUT_NEXT_REPLY; + const bfd_udp_session_t *bus = &bs->udp; + ip_adjacency_t *adj = adj_get (bus->adj_index); + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + next0 = BFD_UDP_INPUT_NEXT_REPLY_ARP; + break; + case IP_LOOKUP_NEXT_REWRITE: + next0 = BFD_UDP_INPUT_NEXT_REPLY_REWRITE; + break; + default: + /* drop */ + break; + } } } vlib_set_next_frame_buffer (vm, rt, next0, bi0); @@ -1161,7 +1262,8 @@ VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = { .next_nodes = { [BFD_UDP_INPUT_NEXT_NORMAL] = "error-drop", - [BFD_UDP_INPUT_NEXT_REPLY] = "ip4-lookup", + [BFD_UDP_INPUT_NEXT_REPLY_ARP] = "ip4-arp", + [BFD_UDP_INPUT_NEXT_REPLY_REWRITE] = "ip4-lookup", }, }; /* *INDENT-ON* */ @@ -1188,7 +1290,8 @@ VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = { .next_nodes = { [BFD_UDP_INPUT_NEXT_NORMAL] = "error-drop", - [BFD_UDP_INPUT_NEXT_REPLY] = "ip6-lookup", + [BFD_UDP_INPUT_NEXT_REPLY_ARP] = "ip6-discover-neighbor", + [BFD_UDP_INPUT_NEXT_REPLY_REWRITE] = "ip6-lookup", }, }; /* *INDENT-ON* */ @@ -1246,7 +1349,7 @@ bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_node_increment_counter (vm, bfd_udp_echo4_input_node.index, b0->error, 1); } - next0 = BFD_UDP_INPUT_NEXT_REPLY; + next0 = BFD_UDP_INPUT_NEXT_REPLY_REWRITE; } vlib_set_next_frame_buffer (vm, rt, next0, bi0); @@ -1300,7 +1403,8 @@ VLIB_REGISTER_NODE (bfd_udp_echo4_input_node, static) = { .next_nodes = { [BFD_UDP_INPUT_NEXT_NORMAL] = "error-drop", - [BFD_UDP_INPUT_NEXT_REPLY] = "ip4-lookup", + [BFD_UDP_INPUT_NEXT_REPLY_ARP] = "ip4-arp", + [BFD_UDP_INPUT_NEXT_REPLY_REWRITE] = "ip4-lookup", }, }; /* *INDENT-ON* */ @@ -1328,7 +1432,8 @@ VLIB_REGISTER_NODE (bfd_udp_echo6_input_node, static) = { .next_nodes = { [BFD_UDP_INPUT_NEXT_NORMAL] = "error-drop", - [BFD_UDP_INPUT_NEXT_REPLY] = "ip6-lookup", + [BFD_UDP_INPUT_NEXT_REPLY_ARP] = "ip6-discover-neighbor", + [BFD_UDP_INPUT_NEXT_REPLY_REWRITE] = "ip6-lookup", }, }; @@ -1375,6 +1480,19 @@ bfd_udp_init (vlib_main_t * vm) bfd_udp_echo4_input_node.index, 1); udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo6, bfd_udp_echo6_input_node.index, 0); + vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip4-arp"); + ASSERT (node); + bfd_udp_main.ip4_arp_idx = node->index; + node = vlib_get_node_by_name (vm, (u8 *) "ip6-discover-neighbor"); + ASSERT (node); + bfd_udp_main.ip6_ndp_idx = node->index; + node = vlib_get_node_by_name (vm, (u8 *) "ip4-rewrite"); + ASSERT (node); + bfd_udp_main.ip4_rewrite_idx = node->index; + node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite"); + ASSERT (node); + bfd_udp_main.ip6_rewrite_idx = node->index; + return 0; } diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h index 5080ec98..a4adbadf 100644 --- a/src/vnet/bfd/bfd_udp.h +++ b/src/vnet/bfd/bfd_udp.h @@ -57,11 +57,12 @@ struct bfd_session_s; /** * @brief add the necessary transport layer by prepending it to existing data * + * * @param is_echo 1 if this is echo packet, 0 if control frame * * @return 1 on success, 0 on failure */ -int bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, +int bfd_add_udp4_transport (vlib_main_t * vm, u32 bi, const struct bfd_session_s *bs, int is_echo); /** @@ -71,9 +72,29 @@ int bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, * * @return 1 on success, 0 on failure */ -int bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, +int bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const struct bfd_session_s *bs, int is_echo); +/** + * @brief transport packet over udpv4 + * + * @param is_echo 1 if this is echo packet, 0 if control frame + * + * @return 1 on success, 0 on failure + */ +int bfd_transport_udp4 (vlib_main_t * vm, u32 bi, + const struct bfd_session_s *bs); + +/** + * @brief transport packet over udpv6 + * + * @param is_echo 1 if this is echo packet, 0 if control frame + * + * @return 1 on success, 0 on failure + */ +int bfd_transport_udp6 (vlib_main_t * vm, u32 bi, + const struct bfd_session_s *bs); + /** * @brief check if the bfd udp layer is echo-capable at this time * -- cgit 1.2.3-korg From 88fc83eb716bf07f4634de6de5b569f795a56418 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 5 Apr 2017 08:11:14 -0700 Subject: BFD-FIB interactions - single-hop BFD: attach a delegate to the appropriate adjacency - multi-hop BFD [not supported yet]: attach a delegate to the FIB entry. adjacency/fib_entry state tracks the BFD session state. when the state is down the object does not contribute forwarding hence and hence dependent objects will not use it. For example, if a route is ECMP via two adjacencies and one of them is BFD down, then only the other is used to forward (i.e. we don't drop half the traffic). Change-Id: I0ef53e20e73b067001a132cd0a3045408811a822 Signed-off-by: Neale Ranns --- src/vnet.am | 7 +- src/vnet/adj/adj.c | 51 +++- src/vnet/adj/adj.h | 15 ++ src/vnet/adj/adj_bfd.c | 184 +++++++++++++ src/vnet/adj/adj_delegate.c | 144 ++++++++++ src/vnet/adj/adj_delegate.h | 104 ++++++++ src/vnet/bfd/bfd_main.c | 23 ++ src/vnet/bfd/bfd_main.h | 44 ++++ src/vnet/bfd/bfd_udp.h | 8 +- src/vnet/fib/fib_attached_export.c | 75 +++--- src/vnet/fib/fib_attached_export.h | 4 +- src/vnet/fib/fib_bfd.c | 197 ++++++++++++++ src/vnet/fib/fib_entry.c | 50 ++-- src/vnet/fib/fib_entry.h | 1 + src/vnet/fib/fib_entry_cover.c | 45 ---- src/vnet/fib/fib_entry_cover.h | 5 - src/vnet/fib/fib_entry_delegate.c | 100 +++++++ src/vnet/fib/fib_entry_delegate.h | 33 +++ src/vnet/fib/fib_path.c | 52 ++-- src/vnet/fib/fib_test.c | 522 +++++++++++++++++++++++++++++++++++++ src/vnet/fib/ip4_fib.c | 16 +- src/vnet/fib/ip6_fib.c | 21 +- test/framework.py | 2 +- test/test_bfd.py | 104 +++++++- test/vpp_papi_provider.py | 5 +- 25 files changed, 1662 insertions(+), 150 deletions(-) create mode 100644 src/vnet/adj/adj_bfd.c create mode 100644 src/vnet/adj/adj_delegate.c create mode 100644 src/vnet/adj/adj_delegate.h create mode 100644 src/vnet/fib/fib_bfd.c (limited to 'src/vnet/bfd') diff --git a/src/vnet.am b/src/vnet.am index 1d52ae10..643ae92e 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -936,7 +936,8 @@ libvnet_la_SOURCES += \ vnet/fib/fib_path.c \ vnet/fib/fib_path_ext.c \ vnet/fib/fib_urpf_list.c \ - vnet/fib/fib_attached_export.c + vnet/fib/fib_attached_export.c \ + vnet/fib/fib_bfd.c nobase_include_HEADERS += \ vnet/fib/fib.h \ @@ -962,7 +963,9 @@ libvnet_la_SOURCES += \ vnet/adj/adj_l2.c \ vnet/adj/adj_nsh.c \ vnet/adj/adj.c \ - vnet/adj/rewrite.c + vnet/adj/rewrite.c \ + vnet/adj/adj_bfd.c \ + vnet/adj/adj_delegate.c nobase_include_HEADERS += \ vnet/adj/adj.h \ diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index 7cf9e9d0..90182006 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -18,6 +18,7 @@ #include #include #include +#include #include /* Adjacency packet/byte counters indexed by adjacency index. */ @@ -57,13 +58,14 @@ adj_alloc (fib_protocol_t proto) vlib_validate_combined_counter(&adjacency_counters, adj_get_index(adj)); - adj->rewrite_header.sw_if_index = ~0; - adj->lookup_next_index = 0; - fib_node_init(&adj->ia_node, FIB_NODE_TYPE_ADJ); + adj->ia_nh_proto = proto; adj->ia_flags = 0; + adj->rewrite_header.sw_if_index = ~0; + adj->lookup_next_index = 0; + adj->ia_delegates = NULL; ip4_main.lookup_main.adjacency_heap = adj_pool; ip6_main.lookup_main.adjacency_heap = adj_pool; @@ -122,11 +124,19 @@ format_ip_adjacency (u8 * s, va_list * args) if (fiaf & FORMAT_IP_ADJACENCY_DETAIL) { + adj_delegate_type_t adt; + adj_delegate_t *aed; vlib_counter_t counts; vlib_get_combined_counter(&adjacency_counters, adj_index, &counts); s = format (s, "\n counts:[%Ld:%Ld]", counts.packets, counts.bytes); s = format (s, "\n locks:%d", adj->ia_node.fn_locks); + s = format(s, "\n delegates:\n "); + FOR_EACH_ADJ_DELEGATE(adj, adt, aed, + { + s = format(s, " %U\n", format_adj_deletegate, aed); + }); + s = format(s, "\n children:\n "); s = fib_node_children_format(adj->ia_node.fn_children, s); } @@ -173,7 +183,11 @@ adj_last_lock_gone (ip_adjacency_t *adj) adj_mcast_remove(adj->ia_nh_proto, adj->rewrite_header.sw_if_index); break; - default: + case IP_LOOKUP_NEXT_DROP: + case IP_LOOKUP_NEXT_PUNT: + case IP_LOOKUP_NEXT_LOCAL: + case IP_LOOKUP_NEXT_ICMP_ERROR: + case IP_LOOKUP_N_NEXT: /* * type not stored in any DB from which we need to remove it */ @@ -183,6 +197,8 @@ adj_last_lock_gone (ip_adjacency_t *adj) vlib_worker_thread_barrier_release(vm); fib_node_deinit(&adj->ia_node); + ASSERT(0 == vec_len(adj->ia_delegates)); + vec_free(adj->ia_delegates); pool_put(adj_pool, adj); } @@ -351,6 +367,33 @@ adj_get_sw_if_index (adj_index_t ai) return (adj->rewrite_header.sw_if_index); } +/** + * @brief Return true if the adjacency is 'UP', i.e. can be used for forwarding + * 0 is down, !0 is up. + */ +int +adj_is_up (adj_index_t ai) +{ + const adj_delegate_t *aed; + + aed = adj_delegate_get(adj_get(ai), ADJ_DELEGATE_BFD); + + if (NULL == aed) + { + /* + * no BFD tracking - resolved + */ + return (!0); + } + else + { + /* + * defer to the state of the BFD tracking + */ + return (ADJ_BFD_STATE_UP == aed->ad_bfd_state); + } +} + /** * @brief Return the rewrite string of the adjacency */ diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h index af7730f7..32997c91 100644 --- a/src/vnet/adj/adj.h +++ b/src/vnet/adj/adj.h @@ -255,6 +255,15 @@ typedef struct ip_adjacency_t_ /* Rewrite in second/third cache lines */ vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE); + + /** + * more control plane members that do not fit on the first cacheline + */ + /** + * A sorted vector of delegates + */ + struct adj_delegate_t_ *ia_delegates; + } ip_adjacency_t; STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline0) == 0), @@ -307,6 +316,12 @@ extern vnet_link_t adj_get_link_type (adj_index_t ai); */ extern u32 adj_get_sw_if_index (adj_index_t ai); +/** + * @brief Return true if the adjacency is 'UP', i.e. can be used for forwarding. + * 0 is down, !0 is up. + */ +extern int adj_is_up (adj_index_t ai); + /** * @brief Return the link type of the adjacency */ diff --git a/src/vnet/adj/adj_bfd.c b/src/vnet/adj/adj_bfd.c new file mode 100644 index 00000000..3d294c46 --- /dev/null +++ b/src/vnet/adj/adj_bfd.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +static adj_bfd_state_t +adj_bfd_bfd_state_to_fib (bfd_state_e bstate) +{ + switch (bstate) + { + case BFD_STATE_up: + return (ADJ_BFD_STATE_UP); + case BFD_STATE_down: + case BFD_STATE_admin_down: + case BFD_STATE_init: + return (ADJ_BFD_STATE_DOWN); + } + return (ADJ_BFD_STATE_DOWN); +} + +static void +adj_bfd_update_walk (adj_index_t ai) +{ + /* + * initiate a backwalk of dependent children + * to notify of the state change of this adj. + */ + fib_node_back_walk_ctx_t ctx = { + .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE, + }; + fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &ctx); +} + +/** + * @brief Callback function registered with BFD module to receive notifications + * of the CRUD of BFD sessions + * would be static but for the fact it's called from the unit-tests + */ +void +adj_bfd_notify (bfd_listen_event_e event, + const bfd_session_t *session) +{ + const bfd_udp_key_t *key; + fib_protocol_t fproto; + adj_delegate_t *aed; + adj_index_t ai; + + if (BFD_HOP_TYPE_SINGLE != session->hop_type) + { + /* + * multi-hop BFD sessions attach directly to the FIB entry + * single-hop adj to the associate adjacency. + */ + return; + } + + key = &session->udp.key; + + fproto = (ip46_address_is_ip4 (&key->peer_addr) ? + FIB_PROTOCOL_IP4: + FIB_PROTOCOL_IP6); + + /* + * find the adj that corresponds to the BFD session. + */ + ai = adj_nbr_add_or_lock(fproto, + fib_proto_to_link(fproto), + &key->peer_addr, + key->sw_if_index); + + switch (event) + { + case BFD_LISTEN_EVENT_CREATE: + /* + * The creation of a new session + */ + if ((ADJ_INDEX_INVALID != ai) && + (aed = adj_delegate_get(adj_get(ai), + ADJ_DELEGATE_BFD))) + { + /* + * already got state for this adj + */ + } + else + { + /* + * lock the adj. add the delegate. + * Lockinging the adj prevents it being removed and thus maintains + * the BFD derived states + */ + adj_lock(ai); + + aed = adj_delegate_find_or_add(adj_get(ai), ADJ_DELEGATE_BFD); + + /* + * pretend the session is up and skip the walk. + * If we set it down then we get traffic loss on new children. + * if we walk then we lose traffic for existing children. Wait + * for the first BFD UP/DOWN before we let the session's state + * influence forwarding. + */ + aed->ad_bfd_state = ADJ_BFD_STATE_UP; + aed->ad_bfd_index = session->bs_idx; + } + break; + + case BFD_LISTEN_EVENT_UPDATE: + /* + * state change up/dowm and + */ + aed = adj_delegate_get(adj_get(ai), ADJ_DELEGATE_BFD); + + if (NULL != aed) + { + aed->ad_bfd_state = adj_bfd_bfd_state_to_fib(session->local_state); + adj_bfd_update_walk(ai); + } + /* + * else + * not an adj with BFD state + */ + break; + + case BFD_LISTEN_EVENT_DELETE: + /* + * session has been removed. + */ + + if (adj_delegate_get(adj_get(ai), ADJ_DELEGATE_BFD)) + { + /* + * has an associated BFD tracking delegate + * remove the BFD tracking deletgate, update children, then + * unlock the adj + */ + adj_delegate_remove(adj_get(ai), ADJ_DELEGATE_BFD); + + adj_bfd_update_walk(ai); + adj_unlock(ai); + } + /* + * else + * no BFD associated state + */ + break; + } + + /* + * unlock match of the add-or-lock at the start + */ + adj_unlock(ai); +} + +static clib_error_t * +adj_bfd_main_init (vlib_main_t * vm) +{ + clib_error_t * error = NULL; + + if ((error = vlib_call_init_function (vm, bfd_main_init))) + return (error); + + bfd_register_listener(adj_bfd_notify); + + return (error); +} + +VLIB_INIT_FUNCTION (adj_bfd_main_init); diff --git a/src/vnet/adj/adj_delegate.c b/src/vnet/adj/adj_delegate.c new file mode 100644 index 00000000..701b36e2 --- /dev/null +++ b/src/vnet/adj/adj_delegate.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +static adj_delegate_t * +adj_delegate_find_i (const ip_adjacency_t *adj, + adj_delegate_type_t type, + u32 *index) +{ + adj_delegate_t *delegate; + int ii; + + ii = 0; + vec_foreach(delegate, adj->ia_delegates) + { + if (delegate->ad_type == type) + { + if (NULL != index) + *index = ii; + + return (delegate); + } + else + { + ii++; + } + } + + return (NULL); +} + +adj_delegate_t * +adj_delegate_get (const ip_adjacency_t *adj, + adj_delegate_type_t type) +{ + return (adj_delegate_find_i(adj, type, NULL)); +} + +void +adj_delegate_remove (ip_adjacency_t *adj, + adj_delegate_type_t type) +{ + adj_delegate_t *aed; + u32 index = ~0; + + aed = adj_delegate_find_i(adj, type, &index); + + ASSERT(NULL != aed); + + vec_del1(adj->ia_delegates, index); +} + +static int +adj_delegate_cmp_for_sort (void * v1, + void * v2) +{ + adj_delegate_t *delegate1 = v1, *delegate2 = v2; + + return (delegate1->ad_type - delegate2->ad_type); +} + +static void +adj_delegate_init (ip_adjacency_t *adj, + adj_delegate_type_t type) + +{ + adj_delegate_t delegate = { + .ad_adj_index = adj_get_index(adj), + .ad_type = type, + }; + + vec_add1(adj->ia_delegates, delegate); + vec_sort_with_function(adj->ia_delegates, + adj_delegate_cmp_for_sort); +} + +adj_delegate_t * +adj_delegate_find_or_add (ip_adjacency_t *adj, + adj_delegate_type_t adt) +{ + adj_delegate_t *delegate; + + delegate = adj_delegate_get(adj, adt); + + if (NULL == delegate) + { + adj_delegate_init(adj, adt); + } + + return (adj_delegate_get(adj, adt)); +} + +/** + * typedef for printing a delegate + */ +typedef u8 * (*adj_delegate_format_t)(const adj_delegate_t *aed, + u8 *s); + +/** + * Print a delegate that represents BFD tracking + */ +static u8 * +adj_delegate_fmt_bfd (const adj_delegate_t *aed, + u8 *s) +{ + s = format(s, "BFD:[state:%d index:%d]", + aed->ad_bfd_state, + aed->ad_bfd_index); + + return (s); +} + +/** + * A delegate type to formatter map + */ +static adj_delegate_format_t aed_formatters[] = +{ + [ADJ_DELEGATE_BFD] = adj_delegate_fmt_bfd, +}; + +u8 * +format_adj_deletegate (u8 * s, va_list * args) +{ + adj_delegate_t *aed; + + aed = va_arg (*args, adj_delegate_t *); + + return (aed_formatters[aed->ad_type](aed, s)); +} diff --git a/src/vnet/adj/adj_delegate.h b/src/vnet/adj/adj_delegate.h new file mode 100644 index 00000000..17651203 --- /dev/null +++ b/src/vnet/adj/adj_delegate.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ADJ_DELEGATE_T__ +#define __ADJ_DELEGATE_T__ + +#include + +/** + * Delegate types + */ +typedef enum adj_delegate_type_t_ { + /** + * BFD session state + */ + ADJ_DELEGATE_BFD, +} adj_delegate_type_t; + +#define FOR_EACH_ADJ_DELEGATE(_adj, _adt, _aed, _body) \ +{ \ + for (_adt = ADJ_DELEGATE_BFD; \ + _adt <= ADJ_DELEGATE_BFD; \ + _adt++) \ + { \ + _aed = adj_delegate_get(_adj, _adt); \ + if (NULL != _aed) { \ + _body; \ + } \ + } \ +} + +/** + * Distillation of the BFD session states into a go/no-go for using + * the associated tracked adjacency + */ +typedef enum adj_bfd_state_t_ +{ + ADJ_BFD_STATE_DOWN, + ADJ_BFD_STATE_UP, +} adj_bfd_state_t; + +/** + * A Delagate is a means to implement the Delagation design pattern; + * the extension of an object's functionality through the composition of, + * and delgation to, other objects. + * These 'other' objects are delegates. Delagates are thus attached to + * ADJ objects to extend their functionality. + */ +typedef struct adj_delegate_t_ +{ + /** + * The ADJ entry object to which the delagate is attached + */ + adj_index_t ad_adj_index; + + /** + * The delagate type + */ + adj_delegate_type_t ad_type; + + /** + * A union of data for the different delegate types + */ + union + { + /** + * BFD delegate daa + */ + struct { + /** + * BFD session state + */ + adj_bfd_state_t ad_bfd_state; + /** + * BFD session index + */ + u32 ad_bfd_index; + }; + }; +} adj_delegate_t; + +extern void adj_delegate_remove(ip_adjacency_t *adj, + adj_delegate_type_t type); + +extern adj_delegate_t *adj_delegate_find_or_add(ip_adjacency_t *adj, + adj_delegate_type_t fdt); +extern adj_delegate_t *adj_delegate_get(const ip_adjacency_t *adj, + adj_delegate_type_t type); + +extern u8 *format_adj_deletegate(u8 * s, va_list * args); + +#endif diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 2b70a20c..66b31ce5 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -101,6 +101,7 @@ bfd_set_defaults (bfd_main_t * bm, bfd_session_t * bs) bs->local_diag = BFD_DIAG_CODE_no_diag; bs->remote_state = BFD_STATE_down; bs->remote_discr = 0; + bs->hop_type = BFD_HOP_TYPE_SINGLE; bs->config_desired_min_tx_usec = BFD_DEFAULT_DESIRED_MIN_TX_USEC; bs->config_desired_min_tx_clocks = bm->default_desired_min_tx_clocks; bs->effective_desired_min_tx_clocks = bm->default_desired_min_tx_clocks; @@ -387,6 +388,17 @@ bfd_set_remote_required_min_echo_rx (bfd_main_t * bm, bfd_session_t * bs, } } +static void +bfd_notify_listeners (bfd_main_t * bm, + bfd_listen_event_e event, const bfd_session_t * bs) +{ + bfd_notify_fn_t *fn; + vec_foreach (fn, bm->listeners) + { + (*fn) (event, bs); + } +} + void bfd_session_start (bfd_main_t * bm, bfd_session_t * bs) { @@ -396,6 +408,7 @@ bfd_session_start (bfd_main_t * bm, bfd_session_t * bs) bfd_recalc_tx_interval (bm, bs); vlib_process_signal_event (bm->vlib_main, bm->bfd_process_node_index, BFD_EVENT_NEW_SESSION, bs->bs_idx); + bfd_notify_listeners (bm, BFD_LISTEN_EVENT_CREATE, bs); } void @@ -533,6 +546,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, bfd_set_timer (bm, bs, now, handling_wakeup); break; } + bfd_notify_listeners (bm, BFD_LISTEN_EVENT_UPDATE, bs); } static void @@ -1121,6 +1135,14 @@ bfd_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bfd_hw_interface_up_down); +void +bfd_register_listener (bfd_notify_fn_t fn) +{ + bfd_main_t *bm = &bfd_main; + + vec_add1 (bm->listeners, fn); +} + /* * setup function */ @@ -1180,6 +1202,7 @@ bfd_get_session (bfd_main_t * bm, bfd_transport_e t) void bfd_put_session (bfd_main_t * bm, bfd_session_t * bs) { + bfd_notify_listeners (bm, BFD_LISTEN_EVENT_DELETE, bs); if (bs->auth.curr_key) { --bs->auth.curr_key->use_count; diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index d722a552..93adac3d 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -66,6 +66,20 @@ typedef enum #undef F } bfd_poll_state_e; +/** + * hop types + */ +#define foreach_bfd_hop(F) \ + F (SINGLE, "single") \ + F (MULTI, "multi") \ + +typedef enum +{ +#define F(sym, str) BFD_HOP_TYPE_##sym, + foreach_bfd_hop (F) +#undef F +} bfd_hop_type_e; + typedef struct bfd_session_s { /** index in bfd_main.sessions pool */ @@ -77,6 +91,9 @@ typedef struct bfd_session_s /** remote session state */ bfd_state_e remote_state; + /** BFD hop type */ + bfd_hop_type_e hop_type; + /** local diagnostics */ bfd_diag_code_e local_diag; @@ -220,6 +237,26 @@ typedef struct bfd_session_s }; } bfd_session_t; +/** + * listener events + */ +#define foreach_bfd_listen_event(F) \ + F (CREATE, "sesion-created") \ + F (UPDATE, "session-updated") \ + F (DELETE, "session-deleted") + +typedef enum +{ +#define F(sym, str) BFD_LISTEN_EVENT_##sym, + foreach_bfd_listen_event (F) +#undef F +} bfd_listen_event_e; + +/** + * session nitification call back function type + */ +typedef void (*bfd_notify_fn_t) (bfd_listen_event_e, const bfd_session_t *); + typedef struct { /** pool of bfd sessions context data */ @@ -259,6 +296,8 @@ typedef struct /** hashmap - index in pool auth_keys by conf_key_id */ u32 *auth_key_by_conf_key_id; + /** A vector of callback notification functions */ + bfd_notify_fn_t *listeners; } bfd_main_t; extern bfd_main_t bfd_main; @@ -345,6 +384,11 @@ const char *bfd_poll_state_string (bfd_poll_state_e state); */ #define BFD_REQUIRED_MIN_RX_USEC_WHILE_ECHO USEC_PER_SECOND +/** + * Register a callback function to receive session notifications. + */ +void bfd_register_listener (bfd_notify_fn_t fn); + #endif /* __included_bfd_main_h__ */ /* diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h index a4adbadf..87868104 100644 --- a/src/vnet/bfd/bfd_udp.h +++ b/src/vnet/bfd/bfd_udp.h @@ -27,8 +27,12 @@ /* *INDENT-OFF* */ /** identifier of BFD session based on UDP transport only */ typedef CLIB_PACKED (struct { - /** interface to which the session is tied */ - u32 sw_if_index; + union { + /** interface to which the session is tied - single-hop */ + u32 sw_if_index; + /** the FIB index the peer is in - multi-hop*/ + u32 fib_index; + }; /** local address */ ip46_address_t local_addr; /** peer address */ diff --git a/src/vnet/fib/fib_attached_export.c b/src/vnet/fib/fib_attached_export.c index 715e63e7..cc8ebc86 100644 --- a/src/vnet/fib/fib_attached_export.c +++ b/src/vnet/fib/fib_attached_export.c @@ -514,67 +514,52 @@ fib_attached_export_cover_update (fib_entry_t *fib_entry) } u8* -fib_ae_import_format (fib_entry_t *fib_entry, +fib_ae_import_format (fib_node_index_t impi, u8* s) { - fib_entry_delegate_t *fed; - - fed = fib_entry_delegate_get(fib_entry, - FIB_ENTRY_DELEGATE_ATTACHED_IMPORT); - - if (NULL != fed) - { - fib_node_index_t *index; - fib_ae_import_t *import; - - import = pool_elt_at_index(fib_ae_import_pool, fed->fd_index); + fib_node_index_t *index; + fib_ae_import_t *import; - s = format(s, "\n Attached-Import:%d:[", (import - fib_ae_import_pool)); - s = format(s, "export-prefix:%U ", format_fib_prefix, &import->faei_prefix); - s = format(s, "export-entry:%d ", import->faei_export_entry); - s = format(s, "export-sibling:%d ", import->faei_export_sibling); - s = format(s, "exporter:%d ", import->faei_exporter); - s = format(s, "export-fib:%d ", import->faei_export_fib); + import = pool_elt_at_index(fib_ae_import_pool, impi); - s = format(s, "import-entry:%d ", import->faei_import_entry); - s = format(s, "import-fib:%d ", import->faei_import_fib); + s = format(s, "\n Attached-Import:%d:[", (import - fib_ae_import_pool)); + s = format(s, "export-prefix:%U ", format_fib_prefix, &import->faei_prefix); + s = format(s, "export-entry:%d ", import->faei_export_entry); + s = format(s, "export-sibling:%d ", import->faei_export_sibling); + s = format(s, "exporter:%d ", import->faei_exporter); + s = format(s, "export-fib:%d ", import->faei_export_fib); + + s = format(s, "import-entry:%d ", import->faei_import_entry); + s = format(s, "import-fib:%d ", import->faei_import_fib); - s = format(s, "importeds:["); - vec_foreach(index, import->faei_importeds) - { - s = format(s, "%d, ", *index); - } - s = format(s, "]]"); + s = format(s, "importeds:["); + vec_foreach(index, import->faei_importeds) + { + s = format(s, "%d, ", *index); } + s = format(s, "]]"); return (s); } u8* -fib_ae_export_format (fib_entry_t *fib_entry, +fib_ae_export_format (fib_node_index_t expi, u8* s) { - fib_entry_delegate_t *fed; - - fed = fib_entry_delegate_get(fib_entry, - FIB_ENTRY_DELEGATE_ATTACHED_EXPORT); - - if (NULL != fed) - { - fib_node_index_t *index; - fib_ae_export_t *export; + fib_node_index_t *index; + fib_ae_export_t *export; - export = pool_elt_at_index(fib_ae_export_pool, fed->fd_list); + export = pool_elt_at_index(fib_ae_export_pool, expi); - s = format(s, "\n Attached-Export:%d:[", (export - fib_ae_export_pool)); - s = format(s, "export-entry:%d ", export->faee_ei); + s = format(s, "\n Attached-Export:%d:[", (export - fib_ae_export_pool)); + s = format(s, "export-entry:%d ", export->faee_ei); - s = format(s, "importers:["); - vec_foreach(index, export->faee_importers) - { - s = format(s, "%d, ", *index); - } - s = format(s, "]]"); + s = format(s, "importers:["); + vec_foreach(index, export->faee_importers) + { + s = format(s, "%d, ", *index); } + s = format(s, "]]"); + return (s); } diff --git a/src/vnet/fib/fib_attached_export.h b/src/vnet/fib/fib_attached_export.h index fa28a6e1..d4c2b57c 100644 --- a/src/vnet/fib/fib_attached_export.h +++ b/src/vnet/fib/fib_attached_export.h @@ -51,7 +51,7 @@ extern void fib_attached_export_covered_removed(fib_entry_t *cover, extern void fib_attached_export_cover_change(fib_entry_t *fib_entry); extern void fib_attached_export_cover_update(fib_entry_t *fib_entry); -extern u8* fib_ae_import_format(fib_entry_t *fib_entry, u8*s); -extern u8* fib_ae_export_format(fib_entry_t *fib_entry, u8*s); +extern u8* fib_ae_import_format(fib_node_index_t impi, u8*s); +extern u8* fib_ae_export_format(fib_node_index_t expi, u8*s); #endif diff --git a/src/vnet/fib/fib_bfd.c b/src/vnet/fib/fib_bfd.c new file mode 100644 index 00000000..e5affb8d --- /dev/null +++ b/src/vnet/fib/fib_bfd.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +static fib_bfd_state_t +fib_bfd_bfd_state_to_fib (bfd_state_e bstate) +{ + switch (bstate) + { + case BFD_STATE_up: + return (FIB_BFD_STATE_UP); + case BFD_STATE_down: + case BFD_STATE_admin_down: + case BFD_STATE_init: + return (FIB_BFD_STATE_DOWN); + } + return (FIB_BFD_STATE_DOWN); +} + +static void +fib_bfd_update_walk (fib_node_index_t fei) +{ + /* + * initiate a backwalk of dependent children + * to notify of the state change of this entry. + */ + fib_node_back_walk_ctx_t ctx = { + .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE, + }; + fib_walk_sync(FIB_NODE_TYPE_ENTRY, fei, &ctx); +} + +/** + * @brief Callback function registered with BFD module to receive notifications + * of the CRUD of BFD sessions + * would be static but for the fact it's called from the unit-tests + */ +void +fib_bfd_notify (bfd_listen_event_e event, + const bfd_session_t *session) +{ + fib_entry_delegate_t *fed; + const bfd_udp_key_t *key; + fib_node_index_t fei; + + if (BFD_HOP_TYPE_MULTI != session->hop_type) + { + /* + * multi-hop BFD sessions attach directly to the FIB entry + * single-hop adj to the associate adjacency. + */ + return; + } + + key = &session->udp.key; + + fib_prefix_t pfx = { + .fp_addr = key->peer_addr, + .fp_proto = (ip46_address_is_ip4 (&key->peer_addr) ? + FIB_PROTOCOL_IP4: + FIB_PROTOCOL_IP6), + .fp_len = (ip46_address_is_ip4 (&key->peer_addr) ? + 32: + 128), + }; + + /* + * get the FIB entry + */ + fei = fib_table_lookup_exact_match(key->fib_index, &pfx); + + switch (event) + { + case BFD_LISTEN_EVENT_CREATE: + /* + * The creation of a new session + */ + if ((FIB_NODE_INDEX_INVALID != fei) && + (fed = fib_entry_delegate_get(fib_entry_get(fei), + FIB_ENTRY_DELEGATE_BFD))) + { + /* + * already got state for this entry + */ + } + else + { + /* + * source and lock the entry. add the delegate + */ + fei = fib_table_entry_special_add(key->fib_index, + &pfx, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE, + ADJ_INDEX_INVALID); + fib_entry_lock(fei); + + fed = fib_entry_delegate_find_or_add(fib_entry_get(fei), + FIB_ENTRY_DELEGATE_BFD); + + /* + * pretend the session is up and skip the walk. + * If we set it down then we get traffic loss on new children. + * if we walk then we lose traffic for existing children. Wait + * for the first BFD UP/DOWN before we let the session's state + * influence forwarding. + */ + fed->fd_bfd_state = FIB_BFD_STATE_UP; + } + break; + + case BFD_LISTEN_EVENT_UPDATE: + /* + * state change up/dowm and + */ + ASSERT(FIB_NODE_INDEX_INVALID != fei); + + fed = fib_entry_delegate_get(fib_entry_get(fei), + FIB_ENTRY_DELEGATE_BFD); + + if (NULL != fed) + { + fed->fd_bfd_state = fib_bfd_bfd_state_to_fib(session->local_state); + fib_bfd_update_walk(fei); + } + /* + * else + * no BFD state + */ + break; + + case BFD_LISTEN_EVENT_DELETE: + /* + * session has been removed. + */ + if (FIB_NODE_INDEX_INVALID == fei) + { + /* + * no FIB entry + */ + } + else if (fib_entry_delegate_get(fib_entry_get(fei), + FIB_ENTRY_DELEGATE_BFD)) + { + /* + * has an associated BFD tracking delegate + * usource the entry and remove the BFD tracking deletgate + */ + fib_entry_delegate_remove(fib_entry_get(fei), + FIB_ENTRY_DELEGATE_BFD); + fib_bfd_update_walk(fei); + + fib_table_entry_special_remove(key->fib_index, + &pfx, + FIB_SOURCE_RR); + fib_entry_unlock(fei); + } + /* + * else + * no BFD associated state + */ + break; + } +} + +static clib_error_t * +fib_bfd_main_init (vlib_main_t * vm) +{ + clib_error_t * error = NULL; + + if ((error = vlib_call_init_function (vm, bfd_main_init))) + return (error); + + bfd_register_listener(fib_bfd_notify); + + return (error); +} + +VLIB_INIT_FUNCTION (fib_bfd_main_init); diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index 6ac5461d..dac1fce9 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -99,7 +99,6 @@ format_fib_entry (u8 * s, va_list * args) fib_entry_src_t *src; fib_node_index_t fei; fib_source_t source; - u32 n_covered; int level; fei = va_arg (*args, fib_node_index_t); @@ -143,14 +142,6 @@ format_fib_entry (u8 * s, va_list * args) } })); - n_covered = fib_entry_cover_get_size(fib_entry); - if (n_covered > 0) { - s = format(s, "\n tracking %d covered: ", n_covered); - s = fib_entry_cover_list_format(fib_entry, s); - } - s = fib_ae_import_format(fib_entry, s); - s = fib_ae_export_format(fib_entry, s); - s = format (s, "\n forwarding: "); } else @@ -179,20 +170,17 @@ format_fib_entry (u8 * s, va_list * args) fib_entry_delegate_type_t fdt; fib_entry_delegate_t *fed; - FOR_EACH_DELEGATE_CHAIN(fib_entry, fdt, fed, + s = format (s, " Delegates:\n"); + FOR_EACH_DELEGATE(fib_entry, fdt, fed, { - s = format(s, " %U-chain\n %U", - format_fib_forw_chain_type, - fib_entry_delegate_type_to_chain_type(fdt), - format_dpo_id, &fed->fd_dpo, 2); - s = format(s, "\n"); + s = format(s, " %U\n", format_fib_entry_deletegate, fed); }); } } if (level >= FIB_ENTRY_FORMAT_DETAIL2) { - s = format(s, "\nchildren:"); + s = format(s, " Children:"); s = fib_node_children_format(fib_entry->fe_node.fn_children, s); } @@ -1339,6 +1327,36 @@ fib_entry_get_best_source (fib_node_index_t entry_index) return (fib_entry_src_get_source(bsrc)); } +/** + * Return !0 is the entry is reoslved, i.e. will return a valid forwarding + * chain + */ +int +fib_entry_is_resolved (fib_node_index_t fib_entry_index) +{ + fib_entry_delegate_t *fed; + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + fed = fib_entry_delegate_get(fib_entry, FIB_ENTRY_DELEGATE_BFD); + + if (NULL == fed) + { + /* + * no BFD tracking - resolved + */ + return (!0); + } + else + { + /* + * defer to the state of the BFD tracking + */ + return (FIB_BFD_STATE_UP == fed->fd_bfd_state); + } +} + static int fib_ip4_address_compare (const ip4_address_t * a1, const ip4_address_t * a2) diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index 12fa9eb4..a3f75e60 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -525,6 +525,7 @@ extern int fib_entry_is_sourced(fib_node_index_t fib_entry_index, fib_source_t source); extern fib_node_index_t fib_entry_get_path_list(fib_node_index_t fib_entry_index); +extern int fib_entry_is_resolved(fib_node_index_t fib_entry_index); extern void fib_entry_module_init(void); diff --git a/src/vnet/fib/fib_entry_cover.c b/src/vnet/fib/fib_entry_cover.c index 147c5daa..814df578 100644 --- a/src/vnet/fib/fib_entry_cover.c +++ b/src/vnet/fib/fib_entry_cover.c @@ -106,51 +106,6 @@ fib_entry_cover_walk (fib_entry_t *cover, &ctx); } -u32 -fib_entry_cover_get_size (fib_entry_t *cover) -{ - fib_entry_delegate_t *fed; - - fed = fib_entry_delegate_get(cover, FIB_ENTRY_DELEGATE_COVERED); - - if (NULL == fed) - return (0); - - return (fib_node_list_get_size(fed->fd_list)); -} - -typedef struct fib_entry_cover_list_format_ctx_t_ { - u8 *s; -} fib_entry_cover_list_format_ctx_t; - -static int -fib_entry_covered_list_format_one (fib_entry_t *cover, - fib_node_index_t covered, - void *args) -{ - fib_entry_cover_list_format_ctx_t * ctx = args; - - ctx->s = format(ctx->s, "%d, ", covered); - - /* continue */ - return (1); -} - -u8* -fib_entry_cover_list_format (fib_entry_t *fib_entry, - u8 *s) -{ - fib_entry_cover_list_format_ctx_t ctx = { - .s = s, - }; - - fib_entry_cover_walk(fib_entry, - fib_entry_covered_list_format_one, - &ctx); - - return (ctx.s); -} - static int fib_entry_cover_change_one (fib_entry_t *cover, fib_node_index_t covered, diff --git a/src/vnet/fib/fib_entry_cover.h b/src/vnet/fib/fib_entry_cover.h index fbbbc211..500d5b33 100644 --- a/src/vnet/fib/fib_entry_cover.h +++ b/src/vnet/fib/fib_entry_cover.h @@ -39,9 +39,4 @@ extern void fib_entry_cover_change_notify(fib_node_index_t cover_index, fib_node_index_t covered_index); extern void fib_entry_cover_update_notify(fib_entry_t *cover); -extern u32 fib_entry_cover_get_size(fib_entry_t *cover); - -extern u8* fib_entry_cover_list_format(fib_entry_t *fib_entry, - u8 *s); - #endif diff --git a/src/vnet/fib/fib_entry_delegate.c b/src/vnet/fib/fib_entry_delegate.c index 70840b16..41af14f2 100644 --- a/src/vnet/fib/fib_entry_delegate.c +++ b/src/vnet/fib/fib_entry_delegate.c @@ -15,6 +15,7 @@ #include #include +#include static fib_entry_delegate_t * fib_entry_delegate_find_i (const fib_entry_t *fib_entry, @@ -149,8 +150,107 @@ fib_entry_delegate_type_to_chain_type (fib_entry_delegate_type_t fdt) case FIB_ENTRY_DELEGATE_COVERED: case FIB_ENTRY_DELEGATE_ATTACHED_IMPORT: case FIB_ENTRY_DELEGATE_ATTACHED_EXPORT: + case FIB_ENTRY_DELEGATE_BFD: break; } ASSERT(0); return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); } + +/** + * typedef for printing a delegate + */ +typedef u8 * (*fib_entry_delegate_format_t)(const fib_entry_delegate_t *fed, + u8 *s); + +/** + * Print a delegate that represents a forwarding chain + */ +static u8 * +fib_entry_delegate_fmt_fwd_chain (const fib_entry_delegate_t *fed, + u8 *s) +{ + s = format(s, "%U-chain\n %U", + format_fib_forw_chain_type, + fib_entry_delegate_type_to_chain_type(fed->fd_type), + format_dpo_id, &fed->fd_dpo, 2); + + return (s); +} + +/** + * Print a delegate that represents cover tracking + */ +static u8 * +fib_entry_delegate_fmt_covered (const fib_entry_delegate_t *fed, + u8 *s) +{ + s = format(s, "covered:["); + s = fib_node_children_format(fed->fd_list, s); + s = format(s, "]"); + + return (s); +} + +/** + * Print a delegate that represents attached-import tracking + */ +static u8 * +fib_entry_delegate_fmt_import (const fib_entry_delegate_t *fed, + u8 *s) +{ + s = format(s, "import:%U", fib_ae_import_format, fed->fd_index); + + return (s); +} + +/** + * Print a delegate that represents attached-export tracking + */ +static u8 * +fib_entry_delegate_fmt_export (const fib_entry_delegate_t *fed, + u8 *s) +{ + s = format(s, "export:%U", fib_ae_export_format, fed->fd_index); + + return (s); +} + +/** + * Print a delegate that represents BFD tracking + */ +static u8 * +fib_entry_delegate_fmt_bfd (const fib_entry_delegate_t *fed, + u8 *s) +{ + s = format(s, "BFD:%d", fed->fd_bfd_state); + + return (s); +} + +/** + * A delegate type to formatter map + */ +static fib_entry_delegate_format_t fed_formatters[] = +{ + [FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4] = fib_entry_delegate_fmt_fwd_chain, + [FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP6] = fib_entry_delegate_fmt_fwd_chain, + [FIB_ENTRY_DELEGATE_CHAIN_MPLS_EOS] = fib_entry_delegate_fmt_fwd_chain, + [FIB_ENTRY_DELEGATE_CHAIN_MPLS_NON_EOS] = fib_entry_delegate_fmt_fwd_chain, + [FIB_ENTRY_DELEGATE_CHAIN_ETHERNET] = fib_entry_delegate_fmt_fwd_chain, + [FIB_ENTRY_DELEGATE_CHAIN_NSH] = fib_entry_delegate_fmt_fwd_chain, + [FIB_ENTRY_DELEGATE_COVERED] = fib_entry_delegate_fmt_covered, + [FIB_ENTRY_DELEGATE_ATTACHED_IMPORT] = fib_entry_delegate_fmt_import, + [FIB_ENTRY_DELEGATE_ATTACHED_EXPORT] = fib_entry_delegate_fmt_export, + [FIB_ENTRY_DELEGATE_BFD] = fib_entry_delegate_fmt_bfd, +}; + +u8 * +format_fib_entry_deletegate (u8 * s, va_list * args) +{ + fib_entry_delegate_t *fed; + + fed = va_arg (*args, fib_entry_delegate_t *); + + return (fed_formatters[fed->fd_type](fed, s)); +} diff --git a/src/vnet/fib/fib_entry_delegate.h b/src/vnet/fib/fib_entry_delegate.h index d9183c5f..333d357c 100644 --- a/src/vnet/fib/fib_entry_delegate.h +++ b/src/vnet/fib/fib_entry_delegate.h @@ -42,6 +42,10 @@ typedef enum fib_entry_delegate_type_t_ { * to their respective cover */ FIB_ENTRY_DELEGATE_COVERED, + /** + * BFD session state + */ + FIB_ENTRY_DELEGATE_BFD, /** * Attached import/export functionality */ @@ -61,6 +65,28 @@ typedef enum fib_entry_delegate_type_t_ { } \ } \ } +#define FOR_EACH_DELEGATE(_entry, _fdt, _fed, _body) \ +{ \ + for (_fdt = FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4; \ + _fdt <= FIB_ENTRY_DELEGATE_ATTACHED_EXPORT; \ + _fdt++) \ + { \ + _fed = fib_entry_delegate_get(_entry, _fdt); \ + if (NULL != _fed) { \ + _body; \ + } \ + } \ +} + +/** + * Distillation of the BFD session states into a go/no-go for using + * the associated tracked FIB entry + */ +typedef enum fib_bfd_state_t_ +{ + FIB_BFD_STATE_UP, + FIB_BFD_STATE_DOWN, +} fib_bfd_state_t; /** * A Delagate is a means to implmenet the Delagation design pattern; the extension of an @@ -103,6 +129,11 @@ typedef struct fib_entry_delegate_t_ * For the cover tracking. The node list; */ fib_node_list_t fd_list; + + /** + * BFD state + */ + fib_bfd_state_t fd_bfd_state; }; } fib_entry_delegate_t; @@ -122,4 +153,6 @@ extern fib_forward_chain_type_t fib_entry_delegate_type_to_chain_type( extern fib_entry_delegate_type_t fib_entry_chain_type_to_delegate_type( fib_forward_chain_type_t type); +extern u8 *format_fib_entry_deletegate(u8 * s, va_list * args); + #endif diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index 928a9d43..6b202a97 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -558,12 +558,6 @@ fib_path_attached_next_hop_set (fib_path_t *path) * resolve directly via the adjacnecy discribed by the * interface and next-hop */ - if (!vnet_sw_interface_is_admin_up(vnet_get_main(), - path->attached_next_hop.fp_interface)) - { - path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; - } - dpo_set(&path->fp_dpo, DPO_ADJACENCY, fib_proto_to_dpo(path->fp_nh_proto), @@ -578,6 +572,13 @@ fib_path_attached_next_hop_set (fib_path_t *path) path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index, FIB_NODE_TYPE_PATH, fib_path_get_index(path)); + + if (!vnet_sw_interface_is_admin_up(vnet_get_main(), + path->attached_next_hop.fp_interface) || + !adj_is_up(path->fp_dpo.dpoi_index)) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + } } /* @@ -653,6 +654,19 @@ fib_path_recursive_adj_update (fib_path_t *path, load_balance_map_path_state_change(fib_path_get_index(path)); } } + /* + * check for over-riding factors on the FIB entry itself + */ + if (!fib_entry_is_resolved(path->fp_via_fib)) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + + /* + * PIC edge trigger. let the load-balance maps know + */ + load_balance_map_path_state_change(fib_path_get_index(path)); + } /* * update the path's contributed DPO @@ -855,15 +869,16 @@ FIXME comment vnet_get_main(), path->attached_next_hop.fp_interface); - if (if_is_up) - { - path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; - } - ai = fib_path_attached_next_hop_get_adj( path, fib_proto_to_link(path->fp_nh_proto)); + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + if (if_is_up && adj_is_up(ai)) + { + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; + } + dpo_set(&path->fp_dpo, DPO_ADJACENCY, fib_proto_to_dpo(path->fp_nh_proto), ai); @@ -1684,11 +1699,11 @@ fib_path_contribute_urpf (fib_node_index_t path_index, { fib_path_t *path; - if (!fib_path_is_resolved(path_index)) - return; - path = fib_path_get(path_index); + /* + * resolved and unresolved paths contribute to the RPF list. + */ switch (path->fp_type) { case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: @@ -1700,7 +1715,14 @@ fib_path_contribute_urpf (fib_node_index_t path_index, break; case FIB_PATH_TYPE_RECURSIVE: - fib_entry_contribute_urpf(path->fp_via_fib, urpf); + if (FIB_NODE_INDEX_INVALID != path->fp_via_fib) + { + /* + * there's unresolved due to constraints, and there's unresolved + * due to ain't go no via. can't do nowt w'out via. + */ + fib_entry_contribute_urpf(path->fp_via_fib, urpf); + } break; case FIB_PATH_TYPE_EXCLUSIVE: diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index 92141ddf..3c9b8a38 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -33,6 +34,11 @@ #include #include +/* + * Add debugs for passing tests + */ +static int fib_test_do_debug; + #define FIB_TEST_I(_cond, _comment, _args...) \ ({ \ int _evald = (_cond); \ @@ -40,6 +46,9 @@ fformat(stderr, "FAIL:%d: " _comment "\n", \ __LINE__, ##_args); \ } else { \ + if (fib_test_do_debug) \ + fformat(stderr, "PASS:%d: " _comment "\n", \ + __LINE__, ##_args); \ } \ _evald; \ }) @@ -6735,6 +6744,509 @@ fib_test_walk (void) return (0); } +/* + * declaration of the otherwise static callback functions + */ +void fib_bfd_notify (bfd_listen_event_e event, + const bfd_session_t *session); +void adj_bfd_notify (bfd_listen_event_e event, + const bfd_session_t *session); + +/** + * Test BFD session interaction with FIB + */ +static int +fib_test_bfd (void) +{ + fib_node_index_t fei; + test_main_t *tm; + int n_feis; + + /* via 10.10.10.1 */ + ip46_address_t nh_10_10_10_1 = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01), + }; + /* via 10.10.10.2 */ + ip46_address_t nh_10_10_10_2 = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02), + }; + /* via 10.10.10.10 */ + ip46_address_t nh_10_10_10_10 = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a0a), + }; + n_feis = fib_entry_pool_size(); + + tm = &test_main; + + /* + * add interface routes. we'll assume this works. it's tested elsewhere + */ + fib_prefix_t pfx_10_10_10_10_s_24 = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = nh_10_10_10_10, + }; + + fib_table_entry_update_one_path(0, &pfx_10_10_10_10_s_24, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, // weight + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fib_prefix_t pfx_10_10_10_10_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = nh_10_10_10_10, + }; + fib_table_entry_update_one_path(0, &pfx_10_10_10_10_s_32, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, // weight + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + /* + * A BFD session via a neighbour we do not yet know + */ + bfd_session_t bfd_10_10_10_1 = { + .udp = { + .key = { + .fib_index = 0, + .peer_addr = nh_10_10_10_1, + }, + }, + .hop_type = BFD_HOP_TYPE_MULTI, + .local_state = BFD_STATE_init, + }; + + fib_bfd_notify (BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_1); + + /* + * A new entry will be created that forwards via the adj + */ + adj_index_t ai_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index); + fib_prefix_t pfx_10_10_10_1_s_32 = { + .fp_addr = nh_10_10_10_1, + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + }; + fib_test_lb_bucket_t adj_o_10_10_10_1 = { + .type = FT_LB_ADJ, + .adj = { + .adj = ai_10_10_10_1, + }, + }; + + fei = fib_table_lookup_exact_match(0, &pfx_10_10_10_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &adj_o_10_10_10_1), + "BFD sourced %U via %U", + format_fib_prefix, &pfx_10_10_10_1_s_32, + format_ip_adjacency, ai_10_10_10_1, FORMAT_IP_ADJACENCY_NONE); + + /* + * Delete the BFD session. Expect the fib_entry to be removed + */ + fib_bfd_notify (BFD_LISTEN_EVENT_DELETE, &bfd_10_10_10_1); + + fei = fib_table_lookup_exact_match(0, &pfx_10_10_10_1_s_32); + FIB_TEST(FIB_NODE_INDEX_INVALID == fei, + "BFD sourced %U removed", + format_fib_prefix, &pfx_10_10_10_1_s_32); + + /* + * Add the BFD source back + */ + fib_bfd_notify (BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_1); + + /* + * source the entry via the ADJ fib + */ + fei = fib_table_entry_update_one_path(0, + &pfx_10_10_10_1_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + /* + * Delete the BFD session. Expect the fib_entry to remain + */ + fib_bfd_notify (BFD_LISTEN_EVENT_DELETE, &bfd_10_10_10_1); + + fei = fib_table_lookup_exact_match(0, &pfx_10_10_10_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &adj_o_10_10_10_1), + "BFD sourced %U remains via %U", + format_fib_prefix, &pfx_10_10_10_1_s_32, + format_ip_adjacency, ai_10_10_10_1, FORMAT_IP_ADJACENCY_NONE); + + /* + * Add the BFD source back + */ + fib_bfd_notify (BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_1); + + /* + * Create another ADJ FIB + */ + fib_prefix_t pfx_10_10_10_2_s_32 = { + .fp_addr = nh_10_10_10_2, + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + }; + fib_table_entry_update_one_path(0, + &pfx_10_10_10_2_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + /* + * A BFD session for the new ADJ FIB + */ + bfd_session_t bfd_10_10_10_2 = { + .udp = { + .key = { + .fib_index = 0, + .peer_addr = nh_10_10_10_2, + }, + }, + .hop_type = BFD_HOP_TYPE_MULTI, + .local_state = BFD_STATE_init, + }; + + fib_bfd_notify (BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_2); + + /* + * remove the adj-fib source whilst the session is present + * then add it back + */ + fib_table_entry_delete(0, &pfx_10_10_10_2_s_32, FIB_SOURCE_ADJ); + fib_table_entry_update_one_path(0, + &pfx_10_10_10_2_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + /* + * Before adding a recursive via the BFD tracked ADJ-FIBs, + * bring one of the sessions UP, leave the other down + */ + bfd_10_10_10_1.local_state = BFD_STATE_up; + fib_bfd_notify (BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_1); + bfd_10_10_10_2.local_state = BFD_STATE_down; + fib_bfd_notify (BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_2); + + /* + * A recursive prefix via both of the ADJ FIBs + */ + fib_prefix_t pfx_200_0_0_0_s_24 = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = 32, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0xc8000000), + }, + }; + const dpo_id_t *dpo_10_10_10_1, *dpo_10_10_10_2; + + dpo_10_10_10_1 = + fib_entry_contribute_ip_forwarding( + fib_table_lookup_exact_match(0, &pfx_10_10_10_1_s_32)); + dpo_10_10_10_2 = + fib_entry_contribute_ip_forwarding( + fib_table_lookup_exact_match(0, &pfx_10_10_10_2_s_32)); + + fib_test_lb_bucket_t lb_o_10_10_10_1 = { + .type = FT_LB_O_LB, + .lb = { + .lb = dpo_10_10_10_1->dpoi_index, + }, + }; + fib_test_lb_bucket_t lb_o_10_10_10_2 = { + .type = FT_LB_O_LB, + .lb = { + .lb = dpo_10_10_10_2->dpoi_index, + }, + }; + + /* + * A prefix via the adj-fib that is BFD down => DROP + */ + fei = fib_table_entry_path_add(0, + &pfx_200_0_0_0_s_24, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + ~0, // recursive + 0, // default fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "%U resolves via drop", + format_fib_prefix, &pfx_200_0_0_0_s_24); + + /* + * add a path via the UP BFD adj-fib. + * we expect that the DOWN BFD ADJ FIB is not used. + */ + fei = fib_table_entry_path_add(0, + &pfx_200_0_0_0_s_24, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + ~0, // recursive + 0, // default fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &lb_o_10_10_10_1), + "Recursive %U only UP BFD adj-fibs", + format_fib_prefix, &pfx_200_0_0_0_s_24); + + /* + * Send a BFD state change to UP - both sessions are now up + * the recursive prefix should LB over both + */ + bfd_10_10_10_2.local_state = BFD_STATE_up; + fib_bfd_notify (BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_2); + + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &lb_o_10_10_10_1, + &lb_o_10_10_10_2), + "Recursive %U via both UP BFD adj-fibs", + format_fib_prefix, &pfx_200_0_0_0_s_24); + + /* + * Send a BFD state change to DOWN + * the recursive prefix should exclude the down + */ + bfd_10_10_10_2.local_state = BFD_STATE_down; + fib_bfd_notify (BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_2); + + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &lb_o_10_10_10_1), + "Recursive %U via only UP", + format_fib_prefix, &pfx_200_0_0_0_s_24); + + /* + * Delete the BFD session while it is in the DOWN state. + * FIB should consider the entry's state as back up + */ + fib_bfd_notify (BFD_LISTEN_EVENT_DELETE, &bfd_10_10_10_2); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &lb_o_10_10_10_1, + &lb_o_10_10_10_2), + "Recursive %U via both UP BFD adj-fibs post down session delete", + format_fib_prefix, &pfx_200_0_0_0_s_24); + + /* + * Delete the BFD other session while it is in the UP state. + */ + fib_bfd_notify (BFD_LISTEN_EVENT_DELETE, &bfd_10_10_10_1); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &lb_o_10_10_10_1, + &lb_o_10_10_10_2), + "Recursive %U via both UP BFD adj-fibs post up session delete", + format_fib_prefix, &pfx_200_0_0_0_s_24); + + /* + * cleaup + */ + fib_table_entry_delete(0, &pfx_200_0_0_0_s_24, FIB_SOURCE_API); + fib_table_entry_delete(0, &pfx_10_10_10_1_s_32, FIB_SOURCE_ADJ); + fib_table_entry_delete(0, &pfx_10_10_10_2_s_32, FIB_SOURCE_ADJ); + + fib_table_entry_delete(0, &pfx_10_10_10_10_s_32, FIB_SOURCE_INTERFACE); + fib_table_entry_delete(0, &pfx_10_10_10_10_s_24, FIB_SOURCE_INTERFACE); + + adj_unlock(ai_10_10_10_1); + /* + * test no-one left behind + */ + FIB_TEST((n_feis == fib_entry_pool_size()), "Entries gone"); + FIB_TEST(0 == adj_nbr_db_size(), "All adjacencies removed"); + + /* + * Single-hop BFD tests + */ + bfd_10_10_10_1.hop_type = BFD_HOP_TYPE_SINGLE; + bfd_10_10_10_1.udp.key.sw_if_index = tm->hw[0]->sw_if_index; + + adj_bfd_notify(BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_1); + + ai_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index); + /* + * whilst the BFD session is not signalled, the adj is up + */ + FIB_TEST(adj_is_up(ai_10_10_10_1), "Adj state up on uninit session"); + + /* + * bring the BFD session up + */ + bfd_10_10_10_1.local_state = BFD_STATE_up; + adj_bfd_notify(BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_1); + FIB_TEST(adj_is_up(ai_10_10_10_1), "Adj state up on UP session"); + + /* + * bring the BFD session down + */ + bfd_10_10_10_1.local_state = BFD_STATE_down; + adj_bfd_notify(BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_1); + FIB_TEST(!adj_is_up(ai_10_10_10_1), "Adj state down on DOWN session"); + + + /* + * add an attached next hop FIB entry via the down adj + */ + fib_prefix_t pfx_5_5_5_5_s_32 = { + .fp_addr = { + .ip4 = { + .as_u32 = clib_host_to_net_u32(0x05050505), + }, + }, + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + }; + + fei = fib_table_entry_path_add(0, + &pfx_5_5_5_5_s_32, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "%U resolves via drop", + format_fib_prefix, &pfx_5_5_5_5_s_32); + + /* + * Add a path via an ADJ that is up + */ + adj_index_t ai_10_10_10_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index); + + fib_test_lb_bucket_t adj_o_10_10_10_2 = { + .type = FT_LB_ADJ, + .adj = { + .adj = ai_10_10_10_2, + }, + }; + adj_o_10_10_10_1.adj.adj = ai_10_10_10_1; + + fei = fib_table_entry_path_add(0, + &pfx_5_5_5_5_s_32, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &adj_o_10_10_10_2), + "BFD sourced %U via %U", + format_fib_prefix, &pfx_5_5_5_5_s_32, + format_ip_adjacency, ai_10_10_10_2, FORMAT_IP_ADJACENCY_NONE); + + /* + * Bring up the down session - should now LB + */ + bfd_10_10_10_1.local_state = BFD_STATE_up; + adj_bfd_notify(BFD_LISTEN_EVENT_UPDATE, &bfd_10_10_10_1); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &adj_o_10_10_10_1, + &adj_o_10_10_10_2), + "BFD sourced %U via noth adjs", + format_fib_prefix, &pfx_5_5_5_5_s_32); + + /* + * remove the BFD session state from the adj + */ + adj_bfd_notify(BFD_LISTEN_EVENT_DELETE, &bfd_10_10_10_1); + + /* + * clean-up + */ + fib_table_entry_delete(0, &pfx_5_5_5_5_s_32, FIB_SOURCE_CLI); + adj_unlock(ai_10_10_10_1); + adj_unlock(ai_10_10_10_2); + + /* + * test no-one left behind + */ + FIB_TEST((n_feis == fib_entry_pool_size()), "Entries gone"); + FIB_TEST(0 == adj_nbr_db_size(), "All adjacencies removed"); + return (0); +} + static int lfib_test (void) { @@ -7119,6 +7631,11 @@ fib_test (vlib_main_t * vm, res = 0; fib_test_mk_intf(4); + if (unformat (input, "debug")) + { + fib_test_do_debug = 1; + } + if (unformat (input, "ip")) { res += fib_test_v4(); @@ -7140,6 +7657,10 @@ fib_test (vlib_main_t * vm, { res += fib_test_walk(); } + else if (unformat (input, "bfd")) + { + res += fib_test_bfd(); + } else { /* @@ -7151,6 +7672,7 @@ fib_test (vlib_main_t * vm, res += fib_test_v4(); res += fib_test_v6(); res += fib_test_ae(); + res += fib_test_bfd(); res += fib_test_label(); res += lfib_test(); } diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index 98d4e52f..b03186e8 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -477,12 +477,15 @@ static void ip4_fib_table_show_one (ip4_fib_t *fib, vlib_main_t * vm, ip4_address_t *address, - u32 mask_len) + u32 mask_len, + int detail) { vlib_cli_output(vm, "%U", format_fib_entry, ip4_fib_table_lookup(fib, address, mask_len), - FIB_ENTRY_FORMAT_DETAIL); + (detail ? + FIB_ENTRY_FORMAT_DETAIL2 : + FIB_ENTRY_FORMAT_DETAIL)); } static clib_error_t * @@ -496,6 +499,7 @@ ip4_show_fib (vlib_main_t * vm, ip4_address_t matching_address; u32 matching_mask = 32; int i, table_id = -1, fib_index = ~0; + int detail = 0; verbose = 1; matching = 0; @@ -506,6 +510,9 @@ ip4_show_fib (vlib_main_t * vm, || unformat (input, "sum")) verbose = 0; + else if (unformat (input, "detail") || unformat (input, "det")) + detail = 1; + else if (unformat (input, "mtrie")) mtrie = 1; @@ -563,7 +570,8 @@ ip4_show_fib (vlib_main_t * vm, } else { - ip4_fib_table_show_one(fib, vm, &matching_address, matching_mask); + ip4_fib_table_show_one(fib, vm, &matching_address, + matching_mask, detail); } })); @@ -717,7 +725,7 @@ ip4_show_fib (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip4_show_fib_command, static) = { .path = "show ip fib", - .short_help = "show ip fib [summary] [table ] [index ] [[/]] [mtrie]", + .short_help = "show ip fib [summary] [table ] [index ] [[/]] [mtrie] [detail]", .function = ip4_show_fib, }; /* *INDENT-ON* */ diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index 0ee029d3..00297140 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -560,12 +560,15 @@ static void ip6_fib_table_show_one (ip6_fib_t *fib, vlib_main_t * vm, ip6_address_t *address, - u32 mask_len) + u32 mask_len, + int detail) { vlib_cli_output(vm, "%U", format_fib_entry, ip6_fib_table_lookup(fib->index, address, mask_len), - FIB_ENTRY_FORMAT_DETAIL); + (detail ? + FIB_ENTRY_FORMAT_DETAIL2: + FIB_ENTRY_FORMAT_DETAIL)); } typedef struct { @@ -573,8 +576,9 @@ typedef struct { u64 count_by_prefix_length[129]; } count_routes_in_fib_at_prefix_length_arg_t; -static void count_routes_in_fib_at_prefix_length -(BVT(clib_bihash_kv) * kvp, void *arg) +static void +count_routes_in_fib_at_prefix_length (BVT(clib_bihash_kv) * kvp, + void *arg) { count_routes_in_fib_at_prefix_length_arg_t * ap = arg; int mask_width; @@ -600,6 +604,7 @@ ip6_show_fib (vlib_main_t * vm, ip6_address_t matching_address; u32 mask_len = 128; int table_id = -1, fib_index = ~0; + int detail = 0; verbose = 1; matching = 0; @@ -610,6 +615,10 @@ ip6_show_fib (vlib_main_t * vm, unformat (input, "summary") || unformat (input, "sum")) verbose = 0; + + else if (unformat (input, "detail") || + unformat (input, "det")) + detail = 1; else if (unformat (input, "%U/%d", unformat_ip6_address, &matching_address, &mask_len)) @@ -667,7 +676,7 @@ ip6_show_fib (vlib_main_t * vm, } else { - ip6_fib_table_show_one(fib, vm, &matching_address, mask_len); + ip6_fib_table_show_one(fib, vm, &matching_address, mask_len, detail); } })); @@ -771,7 +780,7 @@ ip6_show_fib (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (ip6_show_fib_command, static) = { .path = "show ip6 fib", - .short_help = "show ip6 fib [summary] [table ] [index ] [[/]]", + .short_help = "show ip6 fib [summary] [table ] [index ] [[/]] [detail]", .function = ip6_show_fib, }; /* *INDENT-ON* */ diff --git a/test/framework.py b/test/framework.py index fbd21d23..ce70af2e 100644 --- a/test/framework.py +++ b/test/framework.py @@ -359,7 +359,7 @@ class VppTestCase(unittest.TestCase): self._testMethodDoc)) if not self.vpp_dead: self.logger.debug(self.vapi.cli("show trace")) - self.logger.info(self.vapi.ppcli("show interfaces")) + self.logger.info(self.vapi.ppcli("show interface")) self.logger.info(self.vapi.ppcli("show hardware")) self.logger.info(self.vapi.ppcli("show error")) self.logger.info(self.vapi.ppcli("show run")) diff --git a/test/test_bfd.py b/test/test_bfd.py index c9d0abdd..e8f8f338 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -16,9 +16,10 @@ from scapy.layers.inet6 import IPv6 from bfd import VppBFDAuthKey, BFD, BFDAuthType, VppBFDUDPSession, \ BFDDiagCode, BFDState, BFD_vpp_echo from framework import VppTestCase, VppTestRunner, running_extended_tests -from vpp_pg_interface import CaptureTimeoutError +from vpp_pg_interface import CaptureTimeoutError, is_ipv6_misc from util import ppp from vpp_papi_provider import UnexpectedApiReturnValueError +from vpp_ip_route import VppIpRoute, VppRoutePath USEC_IN_SEC = 1000000 @@ -1582,6 +1583,107 @@ class BFD6TestCase(VppTestCase): self.test_session.send_packet() +class BFDFIBTestCase(VppTestCase): + """ BFD-FIB interactions (IPv6) """ + + vpp_session = None + test_session = None + + def setUp(self): + super(BFDFIBTestCase, self).setUp() + self.create_pg_interfaces(range(1)) + + self.vapi.want_bfd_events() + self.pg0.enable_capture() + + for i in self.pg_interfaces: + i.admin_up() + i.config_ip6() + i.configure_ipv6_neighbors() + + def tearDown(self): + if not self.vpp_dead: + self.vapi.want_bfd_events(enable_disable=0) + + super(BFDFIBTestCase, self).tearDown() + + @staticmethod + def pkt_is_not_data_traffic(p): + """ not data traffic implies BFD or the usual IPv6 ND/RA""" + if p.haslayer(BFD) or is_ipv6_misc(p): + return True + return False + + def test_session_with_fib(self): + """ BFD-FIB interactions """ + + # packets to match against both of the routes + p = [(Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IPv6(src="3001::1", dst="2001::1") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100)), + (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IPv6(src="3001::1", dst="2002::1") / + UDP(sport=1234, dport=1234) / + Raw('\xa5' * 100))] + + # A recursive and a non-recursive route via a next-hop that + # will have a BFD session + ip_2001_s_64 = VppIpRoute(self, "2001::", 64, + [VppRoutePath(self.pg0.remote_ip6, + self.pg0.sw_if_index, + is_ip6=1)], + is_ip6=1) + ip_2002_s_64 = VppIpRoute(self, "2002::", 64, + [VppRoutePath(self.pg0.remote_ip6, + 0xffffffff, + is_ip6=1)], + is_ip6=1) + ip_2001_s_64.add_vpp_config() + ip_2002_s_64.add_vpp_config() + + # bring the session up now the routes are present + self.vpp_session = VppBFDUDPSession(self, + self.pg0, + self.pg0.remote_ip6, + af=AF_INET6) + self.vpp_session.add_vpp_config() + self.vpp_session.admin_up() + self.test_session = BFDTestSession(self, self.pg0, AF_INET6) + + # session is up - traffic passes + bfd_session_up(self) + + self.pg0.add_stream(p) + self.pg_start() + for packet in p: + captured = self.pg0.wait_for_packet( + 1, + filter_out_fn=self.pkt_is_not_data_traffic) + self.assertEqual(captured[IPv6].dst, + packet[IPv6].dst) + + # session is up - traffic is dropped + bfd_session_down(self) + + self.pg0.add_stream(p) + self.pg_start() + with self.assertRaises(CaptureTimeoutError): + self.pg0.wait_for_packet(1, self.pkt_is_not_data_traffic) + + # session is up - traffic passes + bfd_session_up(self) + + self.pg0.add_stream(p) + self.pg_start() + for packet in p: + captured = self.pg0.wait_for_packet( + 1, + filter_out_fn=self.pkt_is_not_data_traffic) + self.assertEqual(captured[IPv6].dst, + packet[IPv6].dst) + + class BFDSHA1TestCase(VppTestCase): """Bidirectional Forwarding Detection (BFD) (SHA1 auth) """ diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 7f9e2ae1..e8025dff 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -152,8 +152,9 @@ class VppPapiProvider(object): raise UnexpectedApiReturnValueError(msg) elif self._expect_api_retval == self._zero: if hasattr(reply, 'retval') and reply.retval != expected_retval: - msg = "API call failed, expected zero return value instead "\ - "of %d in %s" % (expected_retval, repr(reply)) + msg = "API call failed, expected %d return value instead "\ + "of %d in %s" % (expected_retval, reply.retval, + repr(reply)) self.test_class.logger.info(msg) raise UnexpectedApiReturnValueError(msg) else: -- cgit 1.2.3-korg From 86326daeaa10c5ce4a8aa0b6d97c75a3bbb73493 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Mon, 24 Apr 2017 02:30:53 +0000 Subject: BFD: disable gcc6 warnings in helper macros Change-Id: Ibec3f1a2619d593accd8c560fb29d39d5617e16a Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_cli.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_cli.c b/src/vnet/bfd/bfd_cli.c index 44e671c5..b2cd8df2 100644 --- a/src/vnet/bfd/bfd_cli.c +++ b/src/vnet/bfd/bfd_cli.c @@ -385,8 +385,19 @@ static const unsigned optional = 0; have_##n = 1; \ } +#if __GNUC__ >= 6 +#define PRAGMA_STR1 \ + _Pragma ("GCC diagnostic ignored \"-Wtautological-compare\""); +#define PRAGMA_STR2 _Pragma ("GCC diagnostic pop"); +#else +#define PRAGMA_STR1 +#define PRAGMA_STR2 +#endif + #define CHECK_MANDATORY(t, n, s, r, ...) \ + PRAGMA_STR1 \ if (mandatory == r && !have_##n) \ + PRAGMA_STR2 \ { \ ret = clib_error_return (0, "Required parameter `%s' missing.", s); \ goto out; \ -- cgit 1.2.3-korg From 11b8dbf78af49d270a0e72abe7dea73eec30d85f Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Mon, 24 Apr 2017 10:46:54 -0400 Subject: "autoreply" flag: autogenerate standard xxx_reply_t messages Change-Id: I72298aaae7d172082ece3a8edea4217c11b28d79 Signed-off-by: Dave Barach --- src/examples/sample-plugin/sample/sample.api | 10 +- src/plugins/acl/acl.api | 60 +--- src/plugins/dpdk/api/dpdk.api | 35 +- src/plugins/flowperpkt/flowperpkt.api | 23 +- .../export-vxlan-gpe/vxlan_gpe_ioam_export.api | 10 +- src/plugins/ioam/export/ioam_export.api | 10 +- src/plugins/ioam/ip6/ioam_cache.api | 10 +- src/plugins/ioam/lib-pot/pot.api | 34 +- src/plugins/ioam/lib-trace/trace.api | 26 +- src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api | 82 +---- src/plugins/lb/lb.api | 21 +- src/plugins/memif/memif.api | 12 +- src/plugins/snat/snat.api | 88 +---- src/tools/vppapigen/gram.y | 3 +- src/tools/vppapigen/lex.c | 57 +++- src/tools/vppapigen/lex.h | 1 + src/tools/vppapigen/node.c | 5 + src/tools/vppapigen/node.h | 2 + src/vlibmemory/memclnt.api | 7 +- src/vlibmemory/memory_vlib.c | 8 +- src/vnet/bfd/bfd.api | 132 +------- src/vnet/classify/classify.api | 37 +-- src/vnet/cop/cop.api | 28 +- src/vnet/devices/af_packet/af_packet.api | 12 +- src/vnet/devices/netmap/netmap.api | 24 +- src/vnet/devices/virtio/vhost_user.api | 24 +- src/vnet/dhcp/dhcp.api | 38 +-- src/vnet/flow/flow.api | 32 +- src/vnet/interface.api | 108 +----- src/vnet/ip/ip.api | 108 +----- src/vnet/ipsec/ipsec.api | 224 ++----------- src/vnet/l2/l2.api | 96 +----- src/vnet/l2tp/l2tp.api | 28 +- src/vnet/lisp-cp/lisp.api | 164 +-------- src/vnet/lisp-cp/one.api | 185 +---------- src/vnet/lisp-gpe/lisp_gpe.api | 48 +-- src/vnet/map/map.api | 22 +- src/vnet/mpls/mpls.api | 26 +- src/vnet/session/session.api | 68 +--- src/vnet/span/span.api | 10 +- src/vnet/sr/sr.api | 60 +--- src/vnet/unix/tap.api | 12 +- src/vnet/vxlan/vxlan.api | 12 +- src/vpp/api/vpe.api | 367 ++------------------- 44 files changed, 271 insertions(+), 2098 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/examples/sample-plugin/sample/sample.api b/src/examples/sample-plugin/sample/sample.api index f99cdb38..d565c0b1 100644 --- a/src/examples/sample-plugin/sample/sample.api +++ b/src/examples/sample-plugin/sample/sample.api @@ -16,7 +16,7 @@ /* Define a simple binary API to control the feature */ -define sample_macswap_enable_disable { +autoreply define sample_macswap_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -29,11 +29,3 @@ define sample_macswap_enable_disable { /* Interface handle */ u32 sw_if_index; }; - -define sample_macswap_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api index d981338d..3b334113 100644 --- a/src/plugins/acl/acl.api +++ b/src/plugins/acl/acl.api @@ -161,24 +161,13 @@ define acl_add_replace_reply @param acl_index - ACL index to delete */ -manual_print define acl_del +autoreply manual_print define acl_del { u32 client_index; u32 context; u32 acl_index; }; -/** \brief Reply to delete the ACL - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define acl_del_reply -{ - u32 context; - i32 retval; -}; - /* acl_interface_add_del(_reply) to be deprecated in lieu of acl_interface_set_acl_list */ /** \brief Use acl_interface_set_acl_list instead Append/remove an ACL index to/from the list of ACLs checked for an interface @@ -190,7 +179,7 @@ define acl_del_reply @param acl_index - index of ACL for the operation */ -manual_print define acl_interface_add_del +autoreply manual_print define acl_interface_add_del { u32 client_index; u32 context; @@ -204,17 +193,6 @@ manual_print define acl_interface_add_del u32 acl_index; }; -/** \brief Reply to alter the ACL list - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define acl_interface_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set the vector of input/output ACLs checked for an interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -224,7 +202,7 @@ define acl_interface_add_del_reply @param acls - vector of ACL indices */ -manual_print define acl_interface_set_acl_list +autoreply manual_print define acl_interface_set_acl_list { u32 client_index; u32 context; @@ -239,12 +217,6 @@ manual_print define acl_interface_set_acl_list @param retval 0 - no error */ -define acl_interface_set_acl_list_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump the specific ACL contents or all of the ACLs' contents @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -341,24 +313,13 @@ define macip_acl_add_reply @param acl_index - MACIP ACL index to delete */ -manual_print define macip_acl_del +autoreply manual_print define macip_acl_del { u32 client_index; u32 context; u32 acl_index; }; -/** \brief Reply to delete the MACIP ACL - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define macip_acl_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add or delete a MACIP ACL to/from interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -367,7 +328,7 @@ define macip_acl_del_reply @param acl_index - MACIP ACL index */ -manual_print define macip_acl_interface_add_del +autoreply manual_print define macip_acl_interface_add_del { u32 client_index; u32 context; @@ -377,17 +338,6 @@ manual_print define macip_acl_interface_add_del u32 acl_index; }; -/** \brief Reply to apply/unapply the MACIP ACL - @param context - returned sender context, to match reply w/ request - @param retval 0 - no error -*/ - -define macip_acl_interface_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump one or all defined MACIP ACLs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/dpdk/api/dpdk.api b/src/plugins/dpdk/api/dpdk.api index 21215d45..d43f8a36 100644 --- a/src/plugins/dpdk/api/dpdk.api +++ b/src/plugins/dpdk/api/dpdk.api @@ -21,7 +21,7 @@ @param pipe - pipe ID within its subport @param profile - pipe profile ID */ -define sw_interface_set_dpdk_hqos_pipe { +autoreply define sw_interface_set_dpdk_hqos_pipe { u32 client_index; u32 context; u32 sw_if_index; @@ -30,15 +30,6 @@ define sw_interface_set_dpdk_hqos_pipe { u32 profile; }; -/** \brief DPDK interface HQoS pipe profile set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_pipe_reply { - u32 context; - i32 retval; -}; - /** \brief DPDK interface HQoS subport parameters set request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -49,7 +40,7 @@ define sw_interface_set_dpdk_hqos_pipe_reply { @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second) @param tc_period - enforcement period for rates (measured in milliseconds) */ -define sw_interface_set_dpdk_hqos_subport { +autoreply define sw_interface_set_dpdk_hqos_subport { u32 client_index; u32 context; u32 sw_if_index; @@ -60,15 +51,6 @@ define sw_interface_set_dpdk_hqos_subport { u32 tc_period; }; -/** \brief DPDK interface HQoS subport parameters set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_subport_reply { - u32 context; - i32 retval; -}; - /** \brief DPDK interface HQoS tctbl entry set request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -77,7 +59,7 @@ define sw_interface_set_dpdk_hqos_subport_reply { @param tc - traffic class (0 .. 3) @param queue - traffic class queue (0 .. 3) */ -define sw_interface_set_dpdk_hqos_tctbl { +autoreply define sw_interface_set_dpdk_hqos_tctbl { u32 client_index; u32 context; u32 sw_if_index; @@ -86,18 +68,9 @@ define sw_interface_set_dpdk_hqos_tctbl { u32 queue; }; -/** \brief DPDK interface HQoS tctbl entry set reply - @param context - sender context, to match reply w/ request - @param retval - request return code -*/ -define sw_interface_set_dpdk_hqos_tctbl_reply { - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/plugins/flowperpkt/flowperpkt.api b/src/plugins/flowperpkt/flowperpkt.api index 1cf62c54..3ff92dca 100644 --- a/src/plugins/flowperpkt/flowperpkt.api +++ b/src/plugins/flowperpkt/flowperpkt.api @@ -12,7 +12,7 @@ @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param sw_if_index - index of the interface */ -manual_print define flowperpkt_tx_interface_add_del +autoreply manual_print define flowperpkt_tx_interface_add_del { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -28,20 +28,7 @@ manual_print define flowperpkt_tx_interface_add_del u32 sw_if_index; }; -/** \brief Reply to enable/disable per-packet IPFIX recording messages - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define flowperpkt_tx_interface_add_del_reply -{ - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; - -define flowperpkt_params +autoreply define flowperpkt_params { u32 client_index; u32 context; @@ -51,9 +38,3 @@ define flowperpkt_params u32 active_timer; /* ~0 is off, 0 is default */ u32 passive_timer; /* ~0 is off, 0 is default */ }; - -define flowperpkt_params_reply -{ - u32 context; - i32 retval; -}; diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api index 7b17c3f7..caa97e6e 100644 --- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api @@ -16,7 +16,7 @@ /* Define a simple binary API to control the feature */ -define vxlan_gpe_ioam_export_enable_disable { +autoreply define vxlan_gpe_ioam_export_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -32,11 +32,3 @@ define vxlan_gpe_ioam_export_enable_disable { /* Src ip address */ }; - -define vxlan_gpe_ioam_export_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; \ No newline at end of file diff --git a/src/plugins/ioam/export/ioam_export.api b/src/plugins/ioam/export/ioam_export.api index f22d9fc8..bb830561 100644 --- a/src/plugins/ioam/export/ioam_export.api +++ b/src/plugins/ioam/export/ioam_export.api @@ -16,7 +16,7 @@ /* Define a simple binary API to control the feature */ -define ioam_export_ip6_enable_disable { +autoreply define ioam_export_ip6_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -32,11 +32,3 @@ define ioam_export_ip6_enable_disable { /* Src ip address */ }; - -define ioam_export_ip6_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; diff --git a/src/plugins/ioam/ip6/ioam_cache.api b/src/plugins/ioam/ip6/ioam_cache.api index de50d57d..dd9c0186 100644 --- a/src/plugins/ioam/ip6/ioam_cache.api +++ b/src/plugins/ioam/ip6/ioam_cache.api @@ -16,7 +16,7 @@ /* API to control ioam caching */ -define ioam_cache_ip6_enable_disable { +autoreply define ioam_cache_ip6_enable_disable { /* Client identifier, set from api_main.my_client_index */ u32 client_index; @@ -27,11 +27,3 @@ define ioam_cache_ip6_enable_disable { u8 is_disable; }; - -define ioam_cache_ip6_enable_disable_reply { - /* From the request */ - u32 context; - - /* Return value, zero means all OK */ - i32 retval; -}; diff --git a/src/plugins/ioam/lib-pot/pot.api b/src/plugins/ioam/lib-pot/pot.api index fa2fc126..c377cde0 100644 --- a/src/plugins/ioam/lib-pot/pot.api +++ b/src/plugins/ioam/lib-pot/pot.api @@ -27,7 +27,7 @@ @param list_name_len - length of the name of this profile list @param list_name - name of this profile list */ -define pot_profile_add { +autoreply define pot_profile_add { u32 client_index; u32 context; u8 id; @@ -42,22 +42,12 @@ define pot_profile_add { u8 list_name[0]; }; -/** \brief Proof of Transit profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pot_profile_add_reply { - u32 context; - i32 retval; -}; - - /** \brief Proof of Transit(POT): Activate POT profile in the list @param id - id of the profile @param list_name_len - length of the name of this profile list @param list_name - name of this profile list */ -define pot_profile_activate { +autoreply define pot_profile_activate { u32 client_index; u32 context; u8 id; @@ -65,37 +55,19 @@ define pot_profile_activate { u8 list_name[0]; }; -/** \brief Proof of Transit profile activate response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pot_profile_activate_reply { - u32 context; - i32 retval; -}; - /** \brief Delete POT Profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param list_name_len - length of the name of the profile list @param list_name - name of profile list to delete */ -define pot_profile_del { +autoreply define pot_profile_del { u32 client_index; u32 context; u8 list_name_len; u8 list_name[0]; }; -/** \brief Proof of Transit profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pot_profile_del_reply { - u32 context; - i32 retval; -}; - /** \brief Show POT Profiles @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/ioam/lib-trace/trace.api b/src/plugins/ioam/lib-trace/trace.api index cb958325..2f45c6e2 100644 --- a/src/plugins/ioam/lib-trace/trace.api +++ b/src/plugins/ioam/lib-trace/trace.api @@ -22,7 +22,7 @@ @param trace_tsp- Timestamp resolution @param app_data - Application specific opaque */ -define trace_profile_add { +autoreply define trace_profile_add { u32 client_index; u32 context; u8 trace_type; @@ -32,37 +32,15 @@ define trace_profile_add { u32 app_data; }; -/** \brief Trace profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define trace_profile_add_reply { - u32 context; - i32 retval; -}; - - - /** \brief Delete trace Profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define trace_profile_del { +autoreply define trace_profile_del { u32 client_index; u32 context; }; -/** \brief Trace profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define trace_profile_del_reply { - u32 context; - i32 retval; -}; - - - /** \brief Show trace Profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api index 056529a4..a6761f07 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api @@ -24,7 +24,7 @@ @param trace_enable - iOAM Trace enabled or not flag */ -define vxlan_gpe_ioam_enable { +autoreply define vxlan_gpe_ioam_enable { u32 client_index; u32 context; u16 id; @@ -33,38 +33,18 @@ define vxlan_gpe_ioam_enable { u8 trace_enable; }; -/** \brief iOAM Over VxLAN-GPE - Set iOAM transport for VXLAN-GPE reply - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define vxlan_gpe_ioam_enable_reply { - u32 context; - i32 retval; -}; - - /** \brief iOAM for VxLAN-GPE disable @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param id - profile id */ -define vxlan_gpe_ioam_disable +autoreply define vxlan_gpe_ioam_disable { u32 client_index; u32 context; u16 id; }; -/** \brief vxlan_gpe_ioam disable response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define vxlan_gpe_ioam_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Enable iOAM for a VNI (VXLAN-GPE) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -73,7 +53,7 @@ define vxlan_gpe_ioam_disable_reply @param remote - IPv4/6 Address of the remote VTEP */ -define vxlan_gpe_ioam_vni_enable { +autoreply define vxlan_gpe_ioam_vni_enable { u32 client_index; u32 context; u32 vni; @@ -82,18 +62,6 @@ define vxlan_gpe_ioam_vni_enable { u8 is_ipv6; }; -/** \brief Reply to enable iOAM for a VNI (VXLAN-GPE) - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_vni_enable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - /** \brief Disable iOAM for a VNI (VXLAN-GPE) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -102,7 +70,7 @@ define vxlan_gpe_ioam_vni_enable_reply { @param remote - IPv4/6 Address of the remote VTEP */ -define vxlan_gpe_ioam_vni_disable { +autoreply define vxlan_gpe_ioam_vni_disable { u32 client_index; u32 context; u32 vni; @@ -111,19 +79,6 @@ define vxlan_gpe_ioam_vni_disable { u8 is_ipv6; }; -/** \brief Reply to disable iOAM for a VNI (VXLAN-GPE) - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_vni_disable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - - /** \brief Enable iOAM for a VXLAN-GPE transit @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -131,7 +86,7 @@ define vxlan_gpe_ioam_vni_disable_reply { @param outer_fib_index- FIB index */ -define vxlan_gpe_ioam_transit_enable { +autoreply define vxlan_gpe_ioam_transit_enable { u32 client_index; u32 context; u32 outer_fib_index; @@ -139,18 +94,6 @@ define vxlan_gpe_ioam_transit_enable { u8 is_ipv6; }; -/** \brief Reply to enable iOAM for VXLAN-GPE transit - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_transit_enable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - /** \brief Disable iOAM for VXLAN-GPE transit @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -158,7 +101,7 @@ define vxlan_gpe_ioam_transit_enable_reply { @param outer_fib_index- FIB index */ -define vxlan_gpe_ioam_transit_disable { +autoreply define vxlan_gpe_ioam_transit_disable { u32 client_index; u32 context; u32 outer_fib_index; @@ -166,16 +109,3 @@ define vxlan_gpe_ioam_transit_disable { u8 is_ipv6; }; -/** \brief Reply to disable iOAM for VXLAN-GPE transit - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param retval - return value for request - -*/ -define vxlan_gpe_ioam_transit_disable_reply { - u32 client_index; - u32 context; - i32 retval; -}; - - diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api index 39ee3c8f..32cc669b 100644 --- a/src/plugins/lb/lb.api +++ b/src/plugins/lb/lb.api @@ -8,7 +8,7 @@ @param flow_timeout - Time in seconds after which, if no packet is received for a given flow, the flow is removed from the established flow table. */ -define lb_conf +autoreply define lb_conf { u32 client_index; u32 context; @@ -18,11 +18,6 @@ define lb_conf u32 flow_timeout; }; -define lb_conf_reply { - u32 context; - i32 retval; -}; - /** \brief Add a virtual address (or prefix) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -33,7 +28,7 @@ define lb_conf_reply { for this VIP (must be power of 2). @param is_del - The VIP should be removed. */ -define lb_add_del_vip { +autoreply define lb_add_del_vip { u32 client_index; u32 context; u8 ip_prefix[16]; @@ -43,11 +38,6 @@ define lb_add_del_vip { u8 is_del; }; -define lb_add_del_vip_reply { - u32 context; - i32 retval; -}; - /** \brief Add an application server for a given VIP @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -56,7 +46,7 @@ define lb_add_del_vip_reply { @param as_address - The application server address (IPv4 in lower order 32 bits). @param is_del - The AS should be removed. */ -define lb_add_del_as { +autoreply define lb_add_del_as { u32 client_index; u32 context; u8 vip_ip_prefix[16]; @@ -64,8 +54,3 @@ define lb_add_del_as { u8 as_address[16]; u8 is_del; }; - -define lb_add_del_as_reply { - u32 context; - i32 retval; -}; diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api index 6f946421..95e016c3 100644 --- a/src/plugins/memif/memif.api +++ b/src/plugins/memif/memif.api @@ -57,7 +57,7 @@ define memif_create_reply @param context - sender context, to match reply w/ request @param sw_if_index - software index of the interface to delete */ -define memif_delete +autoreply define memif_delete { u32 client_index; u32 context; @@ -65,16 +65,6 @@ define memif_delete u32 sw_if_index; }; -/** \brief Delete host-interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define memif_delete_reply -{ - u32 context; - i32 retval; -}; - /** \brief Memory interface details structure @param context - sender context, to match reply w/ request (memif_dump) @param sw_if_index - index of the interface diff --git a/src/plugins/snat/snat.api b/src/plugins/snat/snat.api index 9689f5f9..573b6753 100644 --- a/src/plugins/snat/snat.api +++ b/src/plugins/snat/snat.api @@ -29,7 +29,7 @@ @param vrf_id - VRF id of tenant, ~0 means independent of VRF @param is_add - 1 if add, 0 if delete */ -define snat_add_address_range { +autoreply define snat_add_address_range { u32 client_index; u32 context; u8 is_ip4; @@ -39,15 +39,6 @@ define snat_add_address_range { u8 is_add; }; -/** \brief Add S-NAT address range reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_address_range_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT addresses @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -77,7 +68,7 @@ define snat_address_details { @param is_inside - 1 if inside, 0 if outside @param sw_if_index - software index of the interface */ -define snat_interface_add_del_feature { +autoreply define snat_interface_add_del_feature { u32 client_index; u32 context; u8 is_add; @@ -85,15 +76,6 @@ define snat_interface_add_del_feature { u32 sw_if_index; }; -/** \brief Enable/disable S-NAT feature on the interface reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_interface_add_del_feature_reply { - u32 context; - i32 retval; -}; - /** \brief Dump interfaces with S-NAT feature @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -130,7 +112,7 @@ define snat_interface_details { used) @param vfr_id - VRF ID */ -define snat_add_static_mapping { +autoreply define snat_add_static_mapping { u32 client_index; u32 context; u8 is_add; @@ -145,15 +127,6 @@ define snat_add_static_mapping { u32 vrf_id; }; -/** \brief Add/delete S-NAT static mapping reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_static_mapping_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT static mappings @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -257,21 +230,12 @@ define snat_show_config_reply @param context - sender context, to match reply w/ request @param worker_mask - S-NAT workers mask */ -define snat_set_workers { +autoreply define snat_set_workers { u32 client_index; u32 context; u64 worker_mask; }; -/** \brief Set S-NAT workers reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_set_workers_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT workers @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -300,7 +264,7 @@ define snat_worker_details { @param is_add - 1 if add, 0 if delete @param sw_if_index - software index of the interface */ -define snat_add_del_interface_addr { +autoreply define snat_add_del_interface_addr { u32 client_index; u32 context; u8 is_add; @@ -308,15 +272,6 @@ define snat_add_del_interface_addr { u32 sw_if_index; }; -/** \brief Add/delete S-NAT pool address from specific interfce reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_del_interface_addr_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT pool addresses interfaces @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -342,7 +297,7 @@ define snat_interface_addr_details { @param src_port - source port number @param enable - 1 if enable, 0 if disable */ -define snat_ipfix_enable_disable { +autoreply define snat_ipfix_enable_disable { u32 client_index; u32 context; u32 domain_id; @@ -350,15 +305,6 @@ define snat_ipfix_enable_disable { u8 enable; }; -/** \brief Enable/disable S-NAT IPFIX logging reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_ipfix_enable_disable_reply { - u32 context; - i32 retval; -}; - /** \brief Dump S-NAT users @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -437,7 +383,7 @@ define snat_user_session_details { @param out_addr - outside IP address @param out_addr - outside IP address prefix length */ -define snat_add_det_map { +autoreply define snat_add_det_map { u32 client_index; u32 context; u8 is_add; @@ -449,15 +395,6 @@ define snat_add_det_map { u8 out_plen; }; -/** \brief Add/delete S-NAT deterministic mapping reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_add_det_map_reply { - u32 context; - i32 retval; -}; - /** \brief Get outside address and port range from inside address @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -556,7 +493,7 @@ define snat_det_map_details { @param tcp_transitory - TCP transitory timeout (default 240sec) @param icmp - ICMP timeout (default 60sec) */ -define snat_det_set_timeouts { +autoreply define snat_det_set_timeouts { u32 client_index; u32 context; u32 udp; @@ -565,15 +502,6 @@ define snat_det_set_timeouts { u32 icmp; }; -/** \brief Set values of timeouts for deterministic NAT reply - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define snat_det_set_timeouts_reply { - u32 context; - i32 retval; -}; - /** \brief Get values of timeouts for deterministic NAT (seconds) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/tools/vppapigen/gram.y b/src/tools/vppapigen/gram.y index de26af8d..9cea6023 100644 --- a/src/tools/vppapigen/gram.y +++ b/src/tools/vppapigen/gram.y @@ -38,7 +38,7 @@ void generate (YYSTYPE); %token NAME RPAR LPAR SEMI LBRACK RBRACK NUMBER PRIMTYPE BARF %token TPACKED DEFINE LCURLY RCURLY STRING UNION %token HELPER_STRING COMMA -%token NOVERSION MANUAL_PRINT MANUAL_ENDIAN TYPEONLY DONT_TRACE +%token NOVERSION MANUAL_PRINT MANUAL_ENDIAN TYPEONLY DONT_TRACE AUTOREPLY %% @@ -64,6 +64,7 @@ flag: | MANUAL_ENDIAN {$$ = $1;} | DONT_TRACE {$$ = $1;} | TYPEONLY {$$ = $1;} + | AUTOREPLY {$$ = $1;} ; defn: DEFINE NAME LCURLY defbody RCURLY SEMI diff --git a/src/tools/vppapigen/lex.c b/src/tools/vppapigen/lex.c index 733942ad..e6358143 100644 --- a/src/tools/vppapigen/lex.c +++ b/src/tools/vppapigen/lex.c @@ -27,6 +27,9 @@ #include "lex.h" #include "node.h" #include "tools/vppapigen/gram.h" +#include +#include +#include FILE *ifp, *ofp, *pythonfp, *jsonfp; char *vlib_app_name = "vpp"; @@ -38,6 +41,9 @@ int current_filename_allocated; unsigned long input_crc; unsigned long message_crc; int yydebug; +char *push_input_fifo; +char saved_ungetc_char; +char have_ungetc_char; /* * lexer variable definitions @@ -469,9 +475,50 @@ static char namebuf [MAXNAME]; static inline char getc_char (FILE *ifp) { + char rv; + + if (have_ungetc_char) { + have_ungetc_char = 0; + return saved_ungetc_char; + } + + if (clib_fifo_elts (push_input_fifo)) { + clib_fifo_sub1(push_input_fifo, rv); + return (rv & 0x7f); + } return ((char)(getc(ifp) & 0x7f)); } +u32 fe (char *fifo) +{ + return clib_fifo_elts (fifo); +} + +static inline void +ungetc_char (char c, FILE *ifp) +{ + saved_ungetc_char = c; + have_ungetc_char = 1; +} + +void autoreply (void *np_arg) +{ + static u8 *s; + node_t *np = (node_t *)np_arg; + int i; + + vec_reset_length (s); + + s = format (0, " define %s_reply\n", (char *)(np->data[0])); + s = format (s, "{\n"); + s = format (s, " u32 context;\n"); + s = format (s, " i32 retval;\n"); + s = format (s, "};\n"); + + for (i = 0; i < vec_len (s); i++) + clib_fifo_add1 (push_input_fifo, s[i]); +} + /* * yylex (well, yylex_1: The real yylex below does crc-hackery) */ @@ -595,7 +642,7 @@ static int yylex_1 (void) return (EOF); if (!isalnum (c) && c != '_') { - ungetc (c, ifp); + ungetc_char (c, ifp); namebuf [nameidx] = 0; the_lexer_state = START_STATE; return (name_check (namebuf, &yylval)); @@ -616,7 +663,7 @@ static int yylex_1 (void) return (EOF); if (!isdigit (c)) { - ungetc (c, ifp); + ungetc_char (c, ifp); namebuf [nameidx] = 0; the_lexer_state = START_STATE; yylval = (void *) atol(namebuf); @@ -889,6 +936,7 @@ int yylex (void) case MANUAL_ENDIAN: code = 276; break; case TYPEONLY: code = 278; break; case DONT_TRACE: code = 279; break; + case AUTOREPLY: code = 280; break; case EOF: code = ~0; break; /* hysterical compatibility */ @@ -929,6 +977,7 @@ static struct keytab { } keytab [] = /* Keep the table sorted, binary search used below! */ { + {"autoreply", NODE_AUTOREPLY}, {"define", NODE_DEFINE}, {"dont_trace", NODE_DONT_TRACE}, {"f64", NODE_F64}, @@ -1005,6 +1054,10 @@ static int name_check (const char *s, YYSTYPE *token_value) *token_value = (YYSTYPE) NODE_FLAG_DONT_TRACE; return(DONT_TRACE); + case NODE_AUTOREPLY: + *token_value = (YYSTYPE) NODE_FLAG_AUTOREPLY; + return(AUTOREPLY); + case NODE_NOVERSION: return(NOVERSION); diff --git a/src/tools/vppapigen/lex.h b/src/tools/vppapigen/lex.h index a0fdc735..275cf685 100644 --- a/src/tools/vppapigen/lex.h +++ b/src/tools/vppapigen/lex.h @@ -24,6 +24,7 @@ extern int yylex (void); extern void yyerror (char *); extern int yyparse (void); +extern void autoreply (void *); #ifndef YYSTYPE #define YYSTYPE void * diff --git a/src/tools/vppapigen/node.c b/src/tools/vppapigen/node.c index 359ac9c9..9f234037 100644 --- a/src/tools/vppapigen/node.c +++ b/src/tools/vppapigen/node.c @@ -1050,6 +1050,11 @@ YYSTYPE set_flags(YYSTYPE a1, YYSTYPE a2) flags = (int)(uword) a1; np->flags |= flags; + + /* Generate a foo_reply_t right here */ + if (flags & NODE_FLAG_AUTOREPLY) + autoreply(np); + return (a2); } /* diff --git a/src/tools/vppapigen/node.h b/src/tools/vppapigen/node.h index 297d6036..65bd5d10 100644 --- a/src/tools/vppapigen/node.h +++ b/src/tools/vppapigen/node.h @@ -53,6 +53,7 @@ enum node_subclass { /* WARNING: indices must match the vft... */ NODE_MANUAL_PRINT, NODE_MANUAL_ENDIAN, NODE_DONT_TRACE, + NODE_AUTOREPLY, }; enum passid { @@ -84,6 +85,7 @@ typedef struct node_ { #define NODE_FLAG_MANUAL_ENDIAN (1<<1) #define NODE_FLAG_TYPEONLY (1<<3) #define NODE_FLAG_DONT_TRACE (1<<4) +#define NODE_FLAG_AUTOREPLY (1<<5) typedef struct node_vft_ { void (*print)(struct node_ *); diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api index c38b483c..32e51407 100644 --- a/src/vlibmemory/memclnt.api +++ b/src/vlibmemory/memclnt.api @@ -72,7 +72,7 @@ define memclnt_read_timeout { /* * RPC */ -define rpc_call { +autoreply define rpc_call { u32 client_index; u32 context; u64 function; @@ -82,11 +82,6 @@ define rpc_call { u8 data[0]; }; -define rpc_reply { - i32 retval; - u32 context; -}; - /* * Lookup message-ID base by name */ diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index 7a536ee8..43574dea 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -1275,7 +1275,7 @@ VLIB_CLI_COMMAND (cli_show_api_plugin_command, static) = { static void vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) { - vl_api_rpc_reply_t *rmp; + vl_api_rpc_call_reply_t *rmp; int (*fp) (void *); i32 rv = 0; vlib_main_t *vm = vlib_get_main (); @@ -1305,7 +1305,7 @@ vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) if (q) { rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_RPC_REPLY); + rmp->_vl_msg_id = ntohs (VL_API_RPC_CALL_REPLY); rmp->context = mp->context; rmp->retval = rv; vl_msg_api_send_shmem (q, (u8 *) & rmp); @@ -1318,7 +1318,7 @@ vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp) } static void -vl_api_rpc_reply_t_handler (vl_api_rpc_reply_t * mp) +vl_api_rpc_call_reply_t_handler (vl_api_rpc_call_reply_t * mp) { clib_warning ("unimplemented"); } @@ -1415,7 +1415,7 @@ vl_api_trace_plugin_msg_ids_t_handler (vl_api_trace_plugin_msg_ids_t * mp) #define foreach_rpc_api_msg \ _(RPC_CALL,rpc_call) \ -_(RPC_REPLY,rpc_reply) +_(RPC_CALL_REPLY,rpc_call_reply) #define foreach_plugin_trace_msg \ _(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids) diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api index 2cdcfad3..7bcaa4c3 100644 --- a/src/vnet/bfd/bfd.api +++ b/src/vnet/bfd/bfd.api @@ -18,43 +18,23 @@ @param context - sender context, to match reply w/ request @param sw_if_index - interface to use as echo source */ -define bfd_udp_set_echo_source +autoreply define bfd_udp_set_echo_source { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Set BFD feature response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_set_echo_source_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete BFD echo source @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define bfd_udp_del_echo_source +autoreply define bfd_udp_del_echo_source { u32 client_index; u32 context; }; -/** \brief Delete BFD echo source response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_del_echo_source_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -69,7 +49,7 @@ define bfd_udp_del_echo_source_reply @param bfd_key_id - key id sent out in BFD packets (if is_authenticated) @param conf_key_id - id of already configured key (if is_authenticated) */ -define bfd_udp_add +autoreply define bfd_udp_add { u32 client_index; u32 context; @@ -85,16 +65,6 @@ define bfd_udp_add u32 conf_key_id; }; -/** \brief Add UDP BFD session response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_add_reply -{ - u32 context; - i32 retval; -}; - /** \brief Modify UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -106,7 +76,7 @@ define bfd_udp_add_reply @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param detect_mult - detect multiplier (# of packets missed before connection goes down) */ -define bfd_udp_mod +autoreply define bfd_udp_mod { u32 client_index; u32 context; @@ -119,16 +89,6 @@ define bfd_udp_mod u8 detect_mult; }; -/** \brief Modify UDP BFD session response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_mod_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete UDP BFD session on interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -137,7 +97,7 @@ define bfd_udp_mod_reply @param peer_addr - peer address @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 */ -define bfd_udp_del +autoreply define bfd_udp_del { u32 client_index; u32 context; @@ -147,16 +107,6 @@ define bfd_udp_del u8 is_ipv6; }; -/** \brief Delete UDP BFD session response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bfd_udp_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get all BFD sessions @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -206,7 +156,7 @@ define bfd_udp_session_details @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param admin_up_down - set the admin state, 1 = up, 0 = down */ -define bfd_udp_session_set_flags +autoreply define bfd_udp_session_set_flags { u32 client_index; u32 context; @@ -217,23 +167,13 @@ define bfd_udp_session_set_flags u8 admin_up_down; }; -/** \brief Reply to bfd_udp_session_set_flags - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define bfd_udp_session_set_flags_reply -{ - u32 context; - i32 retval; -}; - /** \brief Register for BFD events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param enable_disable - 1 => register for events, 0 => cancel registration @param pid - sender's pid */ -define want_bfd_events +autoreply define want_bfd_events { u32 client_index; u32 context; @@ -241,16 +181,6 @@ define want_bfd_events u32 pid; }; -/** \brief Reply for BFD events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_bfd_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief BFD UDP - add/replace key to configuration @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -259,7 +189,7 @@ define want_bfd_events_reply @param auth_type - authentication type (RFC 5880/4.1/Auth Type) @param key - key data */ -define bfd_auth_set_key +autoreply define bfd_auth_set_key { u32 client_index; u32 context; @@ -269,16 +199,6 @@ define bfd_auth_set_key u8 key[20]; }; -/** \brief BFD UDP - add/replace key reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_auth_set_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief BFD UDP - delete key from configuration @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -286,23 +206,13 @@ define bfd_auth_set_key_reply @param key_len - length of key (must be non-zero) @param key - key data */ -define bfd_auth_del_key +autoreply define bfd_auth_del_key { u32 client_index; u32 context; u32 conf_key_id; }; -/** \brief BFD UDP - delete key reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_auth_del_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get a list of configured authentication keys @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -338,7 +248,7 @@ define bfd_auth_keys_details @param bfd_key_id - key id sent out in BFD packets @param conf_key_id - id of already configured key */ -define bfd_udp_auth_activate +autoreply define bfd_udp_auth_activate { u32 client_index; u32 context; @@ -351,16 +261,6 @@ define bfd_udp_auth_activate u32 conf_key_id; }; -/** \brief BFD UDP - activate/change authentication reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_udp_auth_activate_reply -{ - u32 context; - i32 retval; -}; - /** \brief BFD UDP - deactivate authentication @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -370,7 +270,7 @@ define bfd_udp_auth_activate_reply @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4 @param is_delayed - change is applied once peer applies the change (on first received non-authenticated packet) */ -define bfd_udp_auth_deactivate +autoreply define bfd_udp_auth_deactivate { u32 client_index; u32 context; @@ -381,16 +281,6 @@ define bfd_udp_auth_deactivate u8 is_delayed; }; -/** \brief BFD UDP - deactivate authentication reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define bfd_udp_auth_deactivate_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/classify/classify.api b/src/vnet/classify/classify.api index 51ebd6c8..cacb9bed 100644 --- a/src/vnet/classify/classify.api +++ b/src/vnet/classify/classify.api @@ -92,7 +92,7 @@ define classify_add_del_table_reply VRF id if action is 1 or 2. @param match[] - for add, match value for session, required */ -define classify_add_del_session +autoreply define classify_add_del_session { u32 client_index; u32 context; @@ -106,16 +106,6 @@ define classify_add_del_session u8 match[0]; }; -/** \brief Classify add / del session response - @param context - sender context, to match reply w/ request - @param retval - return code for the add/del session request -*/ -define classify_add_del_session_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset policer classify interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -127,7 +117,7 @@ define classify_add_del_session_reply Note: User is recommeneded to use just one valid table_index per call. (ip4_table_index, ip6_table_index, or l2_table_index) */ -define policer_classify_set_interface +autoreply define policer_classify_set_interface { u32 client_index; u32 context; @@ -138,16 +128,6 @@ define policer_classify_set_interface u8 is_add; }; -/** \brief Set/unset policer classify interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define policer_classify_set_interface_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get list of policer classify interfaces and tables @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -308,7 +288,7 @@ define classify_session_details Note: User is recommeneded to use just one valid table_index per call. (ip4_table_index, ip6_table_index, or l2_table_index) */ -define flow_classify_set_interface { +autoreply define flow_classify_set_interface { u32 client_index; u32 context; u32 sw_if_index; @@ -317,15 +297,6 @@ define flow_classify_set_interface { u8 is_add; }; -/** \brief Set/unset flow classify interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define flow_classify_set_interface_reply { - u32 context; - i32 retval; -}; - /** \brief Get list of flow classify interfaces and tables @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -353,4 +324,4 @@ define flow_classify_details { * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/cop/cop.api b/src/vnet/cop/cop.api index b34dae80..69316001 100644 --- a/src/vnet/cop/cop.api +++ b/src/vnet/cop/cop.api @@ -20,7 +20,7 @@ @param enable_disable - 1 => enable, 0 => disable */ -define cop_interface_enable_disable +autoreply define cop_interface_enable_disable { u32 client_index; u32 context; @@ -28,17 +28,6 @@ define cop_interface_enable_disable u8 enable_disable; }; -/** \brief cop: interface enable/disable junk filtration reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define cop_interface_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief cop: enable/disable whitelist filtration features on an interface Note: the supplied fib_id must match in order to remove the feature! @@ -51,7 +40,7 @@ define cop_interface_enable_disable_reply @param default_cop - 1 => enable non-ip4, non-ip6 filtration 0=> disable it */ -define cop_whitelist_enable_disable +autoreply define cop_whitelist_enable_disable { u32 client_index; u32 context; @@ -62,17 +51,6 @@ define cop_whitelist_enable_disable u8 default_cop; }; -/** \brief cop: interface enable/disable junk filtration reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define cop_whitelist_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief get_node_graph - get a copy of the vpp node graph including the current set of graph arcs. @@ -85,4 +63,4 @@ define cop_whitelist_enable_disable_reply * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/devices/af_packet/af_packet.api b/src/vnet/devices/af_packet/af_packet.api index 9fb2a207..8d40ad60 100644 --- a/src/vnet/devices/af_packet/af_packet.api +++ b/src/vnet/devices/af_packet/af_packet.api @@ -46,7 +46,7 @@ define af_packet_create_reply @param context - sender context, to match reply w/ request @param host_if_name - interface name */ -define af_packet_delete +autoreply define af_packet_delete { u32 client_index; u32 context; @@ -54,16 +54,6 @@ define af_packet_delete u8 host_if_name[64]; }; -/** \brief Delete host-interface response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define af_packet_delete_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/devices/netmap/netmap.api b/src/vnet/devices/netmap/netmap.api index 377ccffd..8dc698b9 100644 --- a/src/vnet/devices/netmap/netmap.api +++ b/src/vnet/devices/netmap/netmap.api @@ -22,7 +22,7 @@ @param is_pipe - is pipe @param is_master - 0=slave, 1=master */ -define netmap_create +autoreply define netmap_create { u32 client_index; u32 context; @@ -34,22 +34,12 @@ define netmap_create u8 is_master; }; -/** \brief Create netmap response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define netmap_create_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete netmap @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param netmap_if_name - interface name */ -define netmap_delete +autoreply define netmap_delete { u32 client_index; u32 context; @@ -57,16 +47,6 @@ define netmap_delete u8 netmap_if_name[64]; }; -/** \brief Delete netmap response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define netmap_delete_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/vnet/devices/virtio/vhost_user.api index 4f604e45..df7ce7ab 100644 --- a/src/vnet/devices/virtio/vhost_user.api +++ b/src/vnet/devices/virtio/vhost_user.api @@ -53,7 +53,7 @@ define create_vhost_user_if_reply @param sock_filename - unix socket filename, used to speak with frontend @param operation_mode - polling=0, interrupt=1, or adaptive=2 */ -define modify_vhost_user_if +autoreply define modify_vhost_user_if { u32 client_index; u32 context; @@ -65,36 +65,16 @@ define modify_vhost_user_if u8 operation_mode; }; -/** \brief vhost-user interface modify response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define modify_vhost_user_if_reply -{ - u32 context; - i32 retval; -}; - /** \brief vhost-user interface delete request @param client_index - opaque cookie to identify the sender */ -define delete_vhost_user_if +autoreply define delete_vhost_user_if { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief vhost-user interface delete response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define delete_vhost_user_if_reply -{ - u32 context; - i32 retval; -}; - /** \brief Vhost-user interface details structure (fix this) @param sw_if_index - index of the interface @param interface_name - name of interface diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api index 2db85a79..eb0b070d 100644 --- a/src/vnet/dhcp/dhcp.api +++ b/src/vnet/dhcp/dhcp.api @@ -24,7 +24,7 @@ @param dhcp_server[] - server address @param dhcp_src_address[] - */ -define dhcp_proxy_config +autoreply define dhcp_proxy_config { u32 client_index; u32 context; @@ -36,16 +36,6 @@ define dhcp_proxy_config u8 dhcp_src_address[16]; }; -/** \brief DHCP Proxy config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_proxy_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief DHCP Proxy set / unset vss request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -55,7 +45,7 @@ define dhcp_proxy_config_reply @param is_ipv6 - ip6 if non-zero, else ip4 @param is_add - set vss if non-zero, else delete */ -define dhcp_proxy_set_vss +autoreply define dhcp_proxy_set_vss { u32 client_index; u32 context; @@ -66,16 +56,6 @@ define dhcp_proxy_set_vss u8 is_add; }; -/** \brief DHCP proxy set / unset vss response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_proxy_set_vss_reply -{ - u32 context; - i32 retval; -}; - /** \brief DHCP Client config add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -86,7 +66,7 @@ define dhcp_proxy_set_vss_reply via dhcp_compl_event API message if non-zero @param pid - sender's pid */ -define dhcp_client_config +autoreply define dhcp_client_config { u32 client_index; u32 context; @@ -97,16 +77,6 @@ define dhcp_client_config u32 pid; }; -/** \brief DHCP Client config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define dhcp_client_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief Tell client about a DHCP completion event @param client_index - opaque cookie to identify the sender @param pid - client pid registered to receive notification @@ -162,4 +132,4 @@ manual_endian manual_print define dhcp_proxy_details * Local Variables: * eval: (c-set-style "gnu") * End: - */ \ No newline at end of file + */ diff --git a/src/vnet/flow/flow.api b/src/vnet/flow/flow.api index 0e0f99bf..1c5e8c5c 100644 --- a/src/vnet/flow/flow.api +++ b/src/vnet/flow/flow.api @@ -24,7 +24,7 @@ @param template_interval - number of seconds after which to resend template @param udp_checksum - UDP checksum calculation enable flag */ -define set_ipfix_exporter +autoreply define set_ipfix_exporter { u32 client_index; u32 context; @@ -37,15 +37,6 @@ define set_ipfix_exporter u8 udp_checksum; }; -/** \brief Reply to IPFIX exporter configure request - @param context - sender context which was passed in the request -*/ -define set_ipfix_exporter_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPFIX exporter dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -84,22 +75,13 @@ define ipfix_exporter_details @param domain_id - domain ID reported in IPFIX messages for classify stream @param src_port - source port of UDP session for classify stream */ -define set_ipfix_classify_stream { +autoreply define set_ipfix_classify_stream { u32 client_index; u32 context; u32 domain_id; u16 src_port; }; -/** \brief IPFIX classify stream configure response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define set_ipfix_classify_stream_reply { - u32 context; - i32 retval; -}; - /** \brief IPFIX classify stream dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -127,7 +109,7 @@ define ipfix_classify_stream_details { @param ip_version - version of IP used in the classifier table @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified */ -define ipfix_classify_table_add_del { +autoreply define ipfix_classify_table_add_del { u32 client_index; u32 context; u32 table_id; @@ -136,14 +118,6 @@ define ipfix_classify_table_add_del { u8 is_add; }; -/** \brief IPFIX add classifier table response - @param context - sender context which was passed in the request -*/ -define ipfix_classify_table_add_del_reply { - u32 context; - i32 retval; -}; - /** \brief IPFIX classify tables dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/interface.api b/src/vnet/interface.api index 85fd73fb..9df63f18 100644 --- a/src/vnet/interface.api +++ b/src/vnet/interface.api @@ -6,7 +6,7 @@ @param link_up_down - Oper state sent on change event, not used in config. @param deleted - interface was deleted */ -define sw_interface_set_flags +autoreply define sw_interface_set_flags { u32 client_index; u32 context; @@ -17,23 +17,13 @@ define sw_interface_set_flags u8 deleted; }; -/** \brief Reply to sw_interface_set_flags - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define sw_interface_set_flags_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set interface MTU @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - index of the interface to set MTU on @param mtu - MTU */ -define sw_interface_set_mtu +autoreply define sw_interface_set_mtu { u32 client_index; u32 context; @@ -41,23 +31,13 @@ define sw_interface_set_mtu u16 mtu; }; -/** \brief Reply to sw_interface_set_mtu - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define sw_interface_set_mtu_reply -{ - u32 context; - i32 retval; -}; - /** \brief Register for interface events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param enable_disable - 1 => register for events, 0 => cancel registration @param pid - sender's pid */ -define want_interface_events +autoreply define want_interface_events { u32 client_index; u32 context; @@ -65,16 +45,6 @@ define want_interface_events u32 pid; }; -/** \brief Reply for interface events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_interface_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface details structure (fix this) @param sw_if_index - index of the interface @param sup_sw_if_index - index of parent interface if any, else same as sw_if_index @@ -184,7 +154,7 @@ define sw_interface_dump @param address_length - address length in bytes, 4 for ip4, 16 for ip6 @param address - array of address bytes */ -define sw_interface_add_del_address +autoreply define sw_interface_add_del_address { u32 client_index; u32 context; @@ -196,16 +166,6 @@ define sw_interface_add_del_address u8 address[16]; }; -/** \brief Reply to sw_interface_add_del_address - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define sw_interface_add_del_address_reply -{ - u32 context; - i32 retval; -}; - /** \brief Associate the specified interface with a fib table @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -213,7 +173,7 @@ define sw_interface_add_del_address_reply @param is_ipv6 - if non-zero ipv6, else ipv4 @param vrf_id - fib table/vrd id to associate the interface with */ -define sw_interface_set_table +autoreply define sw_interface_set_table { u32 client_index; u32 context; @@ -222,16 +182,6 @@ define sw_interface_set_table u32 vrf_id; }; -/** \brief Reply to sw_interface_set_table - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define sw_interface_set_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get VRF id assigned to interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -280,7 +230,7 @@ define vnet_interface_counters @param unnumbered_sw_if_index - interface which will use the address @param is_add - if non-zero set the association, else unset it */ -define sw_interface_set_unnumbered +autoreply define sw_interface_set_unnumbered { u32 client_index; u32 context; @@ -289,38 +239,18 @@ define sw_interface_set_unnumbered u8 is_add; }; -/** \brief Set unnumbered interface add / del response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_set_unnumbered_reply -{ - u32 context; - i32 retval; -}; - /** \brief Clear interface statistics @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - index of the interface to clear statistics */ -define sw_interface_clear_stats +autoreply define sw_interface_clear_stats { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Reply to sw_interface_clear_stats - @param context - sender context which was passed in the request - @param retval - return code of the set flags request -*/ -define sw_interface_clear_stats_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set / clear software interface tag @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -328,7 +258,7 @@ define sw_interface_clear_stats_reply @param add_del - 1 = add, 0 = delete @param tag - an ascii tag */ -define sw_interface_tag_add_del +autoreply define sw_interface_tag_add_del { u32 client_index; u32 context; @@ -337,23 +267,13 @@ define sw_interface_tag_add_del u8 tag[64]; }; -/** \brief Reply to set / clear software interface tag - @param context - sender context which was passed in the request - @param retval - return code for the request -*/ -define sw_interface_tag_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set an interface's MAC address @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - the interface whose MAC will be set @param mac_addr - the new MAC address */ -define sw_interface_set_mac_address +autoreply define sw_interface_set_mac_address { u32 client_index; u32 context; @@ -361,16 +281,6 @@ define sw_interface_set_mac_address u8 mac_address[6]; }; -/** \brief Reply to setting an interface MAC address request - @param context - sender context which was passed in the request - @param retval - return code for the request -*/ -define sw_interface_set_mac_address_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api index 6af1714f..7097a130 100644 --- a/src/vnet/ip/ip.api +++ b/src/vnet/ip/ip.api @@ -136,7 +136,7 @@ define ip_neighbor_details { @param mac_address - l2 address of the neighbor @param dst_address - ip4 or ip6 address of the neighbor */ -define ip_neighbor_add_del +autoreply define ip_neighbor_add_del { u32 client_index; u32 context; @@ -150,16 +150,6 @@ define ip_neighbor_add_del u8 dst_address[16]; }; -/** \brief Reply for IP Neighbor add / delete request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_neighbor_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set the ip flow hash config for a fib request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -172,7 +162,7 @@ define ip_neighbor_add_del_reply @param proto -if non-zero include proto in flow hash @param reverse - if non-zero include reverse in flow hash */ -define set_ip_flow_hash +autoreply define set_ip_flow_hash { u32 client_index; u32 context; @@ -186,16 +176,6 @@ define set_ip_flow_hash u8 reverse; }; -/** \brief Set the ip flow hash config for a fib response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define set_ip_flow_hash_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 router advertisement config request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -213,7 +193,7 @@ define set_ip_flow_hash_reply @param initial_count - @param initial_interval - */ -define sw_interface_ip6nd_ra_config +autoreply define sw_interface_ip6nd_ra_config { u32 client_index; u32 context; @@ -233,16 +213,6 @@ define sw_interface_ip6nd_ra_config u32 initial_interval; }; -/** \brief IPv6 router advertisement config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_ip6nd_ra_config_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 router advertisement prefix config request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -272,7 +242,7 @@ define sw_interface_ip6nd_ra_config_reply preferred [ADDRCONF]. A value of all one bits (0xffffffff) represents infinity. */ -define sw_interface_ip6nd_ra_prefix +autoreply define sw_interface_ip6nd_ra_prefix { u32 client_index; u32 context; @@ -289,16 +259,6 @@ define sw_interface_ip6nd_ra_prefix u32 pref_lifetime; }; -/** \brief IPv6 router advertisement prefix config response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_ip6nd_ra_prefix_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 ND proxy config @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -306,7 +266,7 @@ define sw_interface_ip6nd_ra_prefix_reply @param address - The address of the host for which to proxy for @param is_add - Adding or deleting */ -define ip6nd_proxy_add_del +autoreply define ip6nd_proxy_add_del { u32 client_index; u32 context; @@ -315,16 +275,6 @@ define ip6nd_proxy_add_del u8 address[16]; }; -/** \brief IPv6 ND proxy response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define ip6nd_proxy_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 ND proxy details returned after request @param context - sender context, to match reply w/ request @param retval - return code for the request @@ -355,7 +305,7 @@ define ip6nd_proxy_dump @param sw_if_index - interface used to reach neighbor @param enable - if non-zero enable ip6 on interface, else disable */ -define sw_interface_ip6_enable_disable +autoreply define sw_interface_ip6_enable_disable { u32 client_index; u32 context; @@ -363,23 +313,13 @@ define sw_interface_ip6_enable_disable u8 enable; /* set to true if enable */ }; -/** \brief IPv6 interface enable / disable response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_ip6_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 set link local address on interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - interface to set link local on @param address[] - the new link local address */ -define sw_interface_ip6_set_link_local_address +autoreply define sw_interface_ip6_set_link_local_address { u32 client_index; u32 context; @@ -387,16 +327,6 @@ define sw_interface_ip6_set_link_local_address u8 address[16]; }; -/** \brief IPv6 set link local address on interface response - @param context - sender context, to match reply w/ request - @param retval - error code for the request -*/ -define sw_interface_ip6_set_link_local_address_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add / del route request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -422,7 +352,7 @@ define sw_interface_ip6_set_link_local_address_reply @param next_hop_out_label_stack - the next-hop output label stack, outer most first @param next_hop_via_label - The next-hop is a resolved via a local label */ -define ip_add_del_route +autoreply define ip_add_del_route { u32 client_index; u32 context; @@ -452,16 +382,6 @@ define ip_add_del_route u32 next_hop_out_label_stack[next_hop_n_out_labels]; }; -/** \brief Reply for add / del route request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_add_del_route_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add / del route request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -470,7 +390,7 @@ define ip_add_del_route_reply FIXME */ -define ip_mroute_add_del +autoreply define ip_mroute_add_del { u32 client_index; u32 context; @@ -488,16 +408,6 @@ define ip_mroute_add_del u8 src_address[16]; }; -/** \brief Reply for add / del mroute request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_mroute_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump IP multicast fib table @param client_index - opaque cookie to identify the sender */ diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api index ef090f84..203c5272 100644 --- a/src/vnet/ipsec/ipsec.api +++ b/src/vnet/ipsec/ipsec.api @@ -20,7 +20,7 @@ @param spd_id - SPD instance id (control plane allocated) */ -define ipsec_spd_add_del +autoreply define ipsec_spd_add_del { u32 client_index; u32 context; @@ -28,17 +28,6 @@ define ipsec_spd_add_del u32 spd_id; }; -/** \brief Reply for IPsec: Add/delete Security Policy Database entry - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_spd_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Add/delete SPD from interface @param client_index - opaque cookie to identify the sender @@ -49,7 +38,7 @@ define ipsec_spd_add_del_reply */ -define ipsec_interface_add_del_spd +autoreply define ipsec_interface_add_del_spd { u32 client_index; u32 context; @@ -59,17 +48,6 @@ define ipsec_interface_add_del_spd u32 spd_id; }; -/** \brief Reply for IPsec: Add/delete SPD from interface - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_interface_add_del_spd_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Add/delete Security Policy Database entry See RFC 4301, 4.4.1.1 on how to match packet to selectors @@ -95,7 +73,7 @@ define ipsec_interface_add_del_spd_reply */ -define ipsec_spd_add_del_entry +autoreply define ipsec_spd_add_del_entry { u32 client_index; u32 context; @@ -125,17 +103,6 @@ define ipsec_spd_add_del_entry u32 sa_id; }; -/** \brief Reply for IPsec: Add/delete Security Policy Database entry - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_spd_add_del_entry_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Add/delete Security Association Database entry @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -167,7 +134,7 @@ define ipsec_spd_add_del_entry_reply IPsec tunnel address copy mode (to support GDOI) */ -define ipsec_sad_add_del_entry +autoreply define ipsec_sad_add_del_entry { u32 client_index; u32 context; @@ -195,17 +162,6 @@ define ipsec_sad_add_del_entry u8 tunnel_dst_address[16]; }; -/** \brief Reply for IPsec: Add/delete Security Association Database entry - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_sad_add_del_entry_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPsec: Update Security Association keys @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -219,7 +175,7 @@ define ipsec_sad_add_del_entry_reply @param integrity_key - integrity keying material */ -define ipsec_sa_set_key +autoreply define ipsec_sa_set_key { u32 client_index; u32 context; @@ -233,17 +189,6 @@ define ipsec_sa_set_key u8 integrity_key[128]; }; -/** \brief Reply for IPsec: Update Security Association keys - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define ipsec_sa_set_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Add/delete profile @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -251,7 +196,7 @@ define ipsec_sa_set_key_reply @param name - IKEv2 profile name @param is_add - Add IKEv2 profile if non-zero, else delete */ -define ikev2_profile_add_del +autoreply define ikev2_profile_add_del { u32 client_index; u32 context; @@ -260,16 +205,6 @@ define ikev2_profile_add_del u8 is_add; }; -/** \brief Reply for IKEv2: Add/delete profile - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 profile authentication method @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -280,7 +215,7 @@ define ikev2_profile_add_del_reply @param data_len - Authentication data length @param data - Authentication data (for rsa-sig cert file path) */ -define ikev2_profile_set_auth +autoreply define ikev2_profile_set_auth { u32 client_index; u32 context; @@ -292,16 +227,6 @@ define ikev2_profile_set_auth u8 data[0]; }; -/** \brief Reply for IKEv2: Set IKEv2 profile authentication method - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_set_auth_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 profile local/remote identification @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -312,7 +237,7 @@ define ikev2_profile_set_auth_reply @param data_len - Identification data length @param data - Identification data */ -define ikev2_profile_set_id +autoreply define ikev2_profile_set_id { u32 client_index; u32 context; @@ -324,16 +249,6 @@ define ikev2_profile_set_id u8 data[0]; }; -/** \brief Reply for IKEv2: - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_set_id_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 profile traffic selector parameters @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -346,7 +261,7 @@ define ikev2_profile_set_id_reply @param start_addr - The smallest address included in traffic selector @param end_addr - The largest address included in traffic selector */ -define ikev2_profile_set_ts +autoreply define ikev2_profile_set_ts { u32 client_index; u32 context; @@ -360,23 +275,13 @@ define ikev2_profile_set_ts u32 end_addr; }; -/** \brief Reply for IKEv2: Set IKEv2 profile traffic selector parameters - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_profile_set_ts_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 local RSA private key @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param key_file - Key file absolute path */ -define ikev2_set_local_key +autoreply define ikev2_set_local_key { u32 client_index; u32 context; @@ -384,16 +289,6 @@ define ikev2_set_local_key u8 key_file[256]; }; -/** \brief Reply for IKEv2: Set IKEv2 local key - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_local_key_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 responder interface and IP address @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -402,7 +297,7 @@ define ikev2_set_local_key_reply @param sw_if_index - interface index @param address - interface address */ -define ikev2_set_responder +autoreply define ikev2_set_responder { u32 client_index; u32 context; @@ -412,17 +307,6 @@ define ikev2_set_responder u8 address[4]; }; -/** \brief Reply for IKEv2: Set IKEv2 responder interface and IP address - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_responder_reply -{ - u32 context; - i32 retval; -}; - - /** \brief IKEv2: Set IKEv2 IKE transforms in SA_INIT proposal (RFC 7296) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -434,7 +318,7 @@ define ikev2_set_responder_reply @param dh_group - Diffie-Hellman group */ -define ikev2_set_ike_transforms +autoreply define ikev2_set_ike_transforms { u32 client_index; u32 context; @@ -446,16 +330,6 @@ define ikev2_set_ike_transforms u32 dh_group; }; -/** \brief Reply for IKEv2: Set IKEv2 IKE transforms - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_ike_transforms_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set IKEv2 ESP transforms in SA_INIT proposal (RFC 7296) @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -467,7 +341,7 @@ define ikev2_set_ike_transforms_reply @param dh_group - Diffie-Hellman group */ -define ikev2_set_esp_transforms +autoreply define ikev2_set_esp_transforms { u32 client_index; u32 context; @@ -479,16 +353,6 @@ define ikev2_set_esp_transforms u32 dh_group; }; -/** \brief Reply for IKEv2: Set IKEv2 ESP transforms - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_esp_transforms_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Set Child SA lifetime, limited by time and/or data @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -500,7 +364,7 @@ define ikev2_set_esp_transforms_reply @param lifetime_maxdata - SA maximum life time in bytes (0 to disable) */ -define ikev2_set_sa_lifetime +autoreply define ikev2_set_sa_lifetime { u32 client_index; u32 context; @@ -512,16 +376,6 @@ define ikev2_set_sa_lifetime u64 lifetime_maxdata; }; -/** \brief Reply for IKEv2: Set Child SA lifetime - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_set_sa_lifetime_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the SA_INIT exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -529,7 +383,7 @@ define ikev2_set_sa_lifetime_reply @param name - IKEv2 profile name */ -define ikev2_initiate_sa_init +autoreply define ikev2_initiate_sa_init { u32 client_index; u32 context; @@ -537,16 +391,6 @@ define ikev2_initiate_sa_init u8 name[64]; }; -/** \brief Reply for IKEv2: Initiate the SA_INIT exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_sa_init_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the delete IKE SA exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -554,7 +398,7 @@ define ikev2_initiate_sa_init_reply @param ispi - IKE SA initiator SPI */ -define ikev2_initiate_del_ike_sa +autoreply define ikev2_initiate_del_ike_sa { u32 client_index; u32 context; @@ -562,16 +406,6 @@ define ikev2_initiate_del_ike_sa u64 ispi; }; -/** \brief Reply for IKEv2: Initiate the delete IKE SA exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_del_ike_sa_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the delete Child SA exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -579,7 +413,7 @@ define ikev2_initiate_del_ike_sa_reply @param ispi - Child SA initiator SPI */ -define ikev2_initiate_del_child_sa +autoreply define ikev2_initiate_del_child_sa { u32 client_index; u32 context; @@ -587,16 +421,6 @@ define ikev2_initiate_del_child_sa u32 ispi; }; -/** \brief Reply for IKEv2: Initiate the delete Child SA exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_del_child_sa_reply -{ - u32 context; - i32 retval; -}; - /** \brief IKEv2: Initiate the rekey Child SA exchange @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -604,7 +428,7 @@ define ikev2_initiate_del_child_sa_reply @param ispi - Child SA initiator SPI */ -define ikev2_initiate_rekey_child_sa +autoreply define ikev2_initiate_rekey_child_sa { u32 client_index; u32 context; @@ -612,16 +436,6 @@ define ikev2_initiate_rekey_child_sa u32 ispi; }; -/** \brief Reply for IKEv2: Initiate the rekey Child SA exchange - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ikev2_initiate_rekey_child_sa_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump ipsec policy database data @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -682,4 +496,4 @@ define ipsec_spd_details { * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index c23eebec..db42d635 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -70,66 +70,36 @@ define l2_fib_table_dump @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ -define l2_fib_clear_table +autoreply define l2_fib_clear_table { u32 client_index; u32 context; }; -/** \brief L2 fib clear table response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_fib_clear_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 FIB flush bridge domain entries @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bd_id - the entry's bridge domain id */ -define l2fib_flush_bd +autoreply define l2fib_flush_bd { u32 client_index; u32 context; u32 bd_id; }; -/** \brief L2 FIB flush bridge domain entries response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2fib_flush_bd_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 FIB flush interface entries @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bd_id - the entry's bridge domain id */ -define l2fib_flush_int +autoreply define l2fib_flush_int { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief L2 FIB flush interface entries response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2fib_flush_int_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 FIB add entry request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -140,7 +110,7 @@ define l2fib_flush_int_reply @param static_mac - @param filter_mac - */ -define l2fib_add_del +autoreply define l2fib_add_del { u32 client_index; u32 context; @@ -153,16 +123,6 @@ define l2fib_add_del u8 bvi_mac; }; -/** \brief L2 FIB add entry response - @param context - sender context, to match reply w/ request - @param retval - return code for the add l2fib entry request -*/ -define l2fib_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set L2 flags request !!! TODO - need more info, feature bits in l2_input.h @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -196,7 +156,7 @@ define l2_flags_reply @param bd_id - the bridge domain to create @param mac_age - mac aging time in min, 0 for disabled */ -define bridge_domain_set_mac_age +autoreply define bridge_domain_set_mac_age { u32 client_index; u32 context; @@ -204,16 +164,6 @@ define bridge_domain_set_mac_age u8 mac_age; }; -/** \brief Set bridge domain response - @param context - sender context, to match reply w/ request - @param retval - return code for the set l2 bits request -*/ -define bridge_domain_set_mac_age_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 bridge domain add or delete request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -226,7 +176,7 @@ define bridge_domain_set_mac_age_reply @param mac_age - mac aging time in min, 0 for disabled @param is_add - add or delete flag */ -define bridge_domain_add_del +autoreply define bridge_domain_add_del { u32 client_index; u32 context; @@ -240,16 +190,6 @@ define bridge_domain_add_del u8 is_add; }; -/** \brief L2 bridge domain add or delete response - @param context - sender context, to match reply w/ request - @param retval - return code for the set bridge flags request -*/ -define bridge_domain_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 bridge domain request operational state details @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -337,7 +277,7 @@ define bridge_flags_reply @param tag1 - Needed for any push or translate vtr op @param tag2 - Needed for any push 2 or translate x-2 vtr ops */ -define l2_interface_vlan_tag_rewrite +autoreply define l2_interface_vlan_tag_rewrite { u32 client_index; u32 context; @@ -348,16 +288,6 @@ define l2_interface_vlan_tag_rewrite u32 tag2; // second pushed tag }; -/** \brief L2 interface vlan tag rewrite response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_vlan_tag_rewrite_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 interface pbb tag rewrite configure request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -370,7 +300,7 @@ define l2_interface_vlan_tag_rewrite_reply @param b_vlanid - B-tag vlanid, needed for any push or translate qinq vtr op @param i_sid - I-tag service id, needed for any push or translate qinq vtr op */ -define l2_interface_pbb_tag_rewrite +autoreply define l2_interface_pbb_tag_rewrite { u32 client_index; u32 context; @@ -383,16 +313,6 @@ define l2_interface_pbb_tag_rewrite u32 i_sid; }; -/** \brief L2 interface pbb tag rewrite response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_pbb_tag_rewrite_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/l2tp/l2tp.api b/src/vnet/l2tp/l2tp.api index 5a5a5a48..4587a807 100644 --- a/src/vnet/l2tp/l2tp.api +++ b/src/vnet/l2tp/l2tp.api @@ -52,7 +52,7 @@ define l2tpv3_create_tunnel_reply u32 sw_if_index; }; -define l2tpv3_set_tunnel_cookies +autoreply define l2tpv3_set_tunnel_cookies { u32 client_index; u32 context; @@ -61,16 +61,6 @@ define l2tpv3_set_tunnel_cookies u64 new_remote_cookie; }; -/** \brief L2TP tunnel set cookies response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2tpv3_set_tunnel_cookies_reply -{ - u32 context; - i32 retval; -}; - define sw_if_l2tpv3_tunnel_details { u32 context; @@ -91,7 +81,7 @@ define sw_if_l2tpv3_tunnel_dump u32 context; }; -define l2tpv3_interface_enable_disable +autoreply define l2tpv3_interface_enable_disable { u32 client_index; u32 context; @@ -99,13 +89,7 @@ define l2tpv3_interface_enable_disable u32 sw_if_index; }; -define l2tpv3_interface_enable_disable_reply -{ - u32 context; - i32 retval; -}; - -define l2tpv3_set_lookup_key +autoreply define l2tpv3_set_lookup_key { u32 client_index; u32 context; @@ -113,12 +97,6 @@ define l2tpv3_set_lookup_key u8 key; }; -define l2tpv3_set_lookup_key_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") diff --git a/src/vnet/lisp-cp/lisp.api b/src/vnet/lisp-cp/lisp.api index a50a5ccb..8bed71b3 100644 --- a/src/vnet/lisp-cp/lisp.api +++ b/src/vnet/lisp-cp/lisp.api @@ -59,7 +59,7 @@ define lisp_add_del_locator_set_reply @param priority - priority of the lisp locator @param weight - weight of the lisp locator */ -define lisp_add_del_locator +autoreply define lisp_add_del_locator { u32 client_index; u32 context; @@ -70,16 +70,6 @@ define lisp_add_del_locator u8 weight; }; -/** \brief Reply for locator add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_locator_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete lisp eid-table @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -98,7 +88,7 @@ define lisp_add_del_locator_reply HMAC_SHA_256_128 2 @param key - secret key */ -define lisp_add_del_local_eid +autoreply define lisp_add_del_local_eid { u32 client_index; u32 context; @@ -112,16 +102,6 @@ define lisp_add_del_local_eid u8 key[64]; }; -/** \brief Reply for local_eid add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_local_eid_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add/delete map server @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -129,7 +109,7 @@ define lisp_add_del_local_eid_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - map server IP address */ -define lisp_add_del_map_server +autoreply define lisp_add_del_map_server { u32 client_index; u32 context; @@ -138,16 +118,6 @@ define lisp_add_del_map_server u8 ip_address[16]; }; -/** \brief Reply for lisp_add_del_map_server - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_map_server_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map-resolver @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -155,7 +125,7 @@ define lisp_add_del_map_server_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - array of address bytes */ -define lisp_add_del_map_resolver +autoreply define lisp_add_del_map_resolver { u32 client_index; u32 context; @@ -164,45 +134,25 @@ define lisp_add_del_map_resolver u8 ip_address[16]; }; -/** \brief Reply for map_resolver add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_map_resolver_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable or disable LISP feature @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_en - enable protocol if non-zero, else disable */ -define lisp_enable_disable +autoreply define lisp_enable_disable { u32 client_index; u32 context; u8 is_en; }; -/** \brief Reply for gpe enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable LISP PITR node @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param ls_name - locator set name @param is_add - add locator set if non-zero, else disable pitr */ -define lisp_pitr_set_locator_set +autoreply define lisp_pitr_set_locator_set { u32 client_index; u32 context; @@ -210,16 +160,6 @@ define lisp_pitr_set_locator_set u8 ls_name[64]; }; -/** \brief Reply for lisp_pitr_set_locator_set - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_pitr_set_locator_set_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable use of PETR @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -227,7 +167,7 @@ define lisp_pitr_set_locator_set_reply @param address - PETR IP address @param is_add - add locator set if non-zero, else disable pitr */ -define lisp_use_petr +autoreply define lisp_use_petr { u32 client_index; u32 context; @@ -236,16 +176,6 @@ define lisp_use_petr u8 is_add; }; -/** \brief Reply for lisp_pitr_set_locator_set - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_use_petr_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for LISP PETR status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -298,45 +228,25 @@ define show_lisp_rloc_probe_state_reply @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define lisp_rloc_probe_enable_disable +autoreply define lisp_rloc_probe_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for lisp_rloc_probe_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_rloc_probe_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable/disable LISP map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define lisp_map_register_enable_disable +autoreply define lisp_map_register_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for lisp_map_register_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_map_register_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get state of LISP map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -366,23 +276,13 @@ define show_lisp_map_register_state_reply 0 - destination only 1 - source/destaination */ -define lisp_map_request_mode +autoreply define lisp_map_request_mode { u32 client_index; u32 context; u8 mode; }; -/** \brief Reply for lisp_map_request_mode - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_map_request_mode_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for LISP map-request mode @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -430,7 +330,7 @@ typeonly manual_endian manual_print define remote_locator @param rloc_num - number of remote locators @param rlocs - remote locator records */ -manual_print manual_endian define lisp_add_del_remote_mapping +autoreply manual_print manual_endian define lisp_add_del_remote_mapping { u32 client_index; u32 context; @@ -448,16 +348,6 @@ manual_print manual_endian define lisp_add_del_remote_mapping vl_api_remote_locator_t rlocs[rloc_num]; }; -/** \brief Reply for lisp_add_del_remote_mapping - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_remote_mapping_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete LISP adjacency adjacency @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -470,7 +360,7 @@ define lisp_add_del_remote_mapping_reply @param reid - remote EID @param leid - local EID */ -define lisp_add_del_adjacency +autoreply define lisp_add_del_adjacency { u32 client_index; u32 context; @@ -483,23 +373,13 @@ define lisp_add_del_adjacency u8 leid_len; }; -/** \brief Reply for lisp_add_del_adjacency - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_add_del_adjacency_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map request itr rlocs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add address if non-zero, else delete @param locator_set_name - locator set name */ -define lisp_add_del_map_request_itr_rlocs +autoreply define lisp_add_del_map_request_itr_rlocs { u32 client_index; u32 context; @@ -512,12 +392,6 @@ define lisp_add_del_map_request_itr_rlocs @param retval - return code */ -define lisp_add_del_map_request_itr_rlocs_reply -{ - u32 context; - i32 retval; -}; - /** \brief map/unmap vni/bd_index to vrf @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -525,7 +399,7 @@ define lisp_add_del_map_request_itr_rlocs_reply @param dp_table - virtual network id/bridge domain index @param vrf - vrf */ -define lisp_eid_table_add_del_map +autoreply define lisp_eid_table_add_del_map { u32 client_index; u32 context; @@ -535,16 +409,6 @@ define lisp_eid_table_add_del_map u8 is_l2; }; -/** \brief Reply for lisp_eid_table_add_del_map - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define lisp_eid_table_add_del_map_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for map lisp locator status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/lisp-cp/one.api b/src/vnet/lisp-cp/one.api index ca82f694..2fa1edf6 100644 --- a/src/vnet/lisp-cp/one.api +++ b/src/vnet/lisp-cp/one.api @@ -59,7 +59,7 @@ define one_add_del_locator_set_reply @param priority - priority of the locator @param weight - weight of the locator */ -define one_add_del_locator +autoreply define one_add_del_locator { u32 client_index; u32 context; @@ -70,16 +70,6 @@ define one_add_del_locator u8 weight; }; -/** \brief Reply for locator add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_locator_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete ONE eid-table @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -98,7 +88,7 @@ define one_add_del_locator_reply HMAC_SHA_256_128 2 @param key - secret key */ -define one_add_del_local_eid +autoreply define one_add_del_local_eid { u32 client_index; u32 context; @@ -112,16 +102,6 @@ define one_add_del_local_eid u8 key[64]; }; -/** \brief Reply for local_eid add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_local_eid_reply -{ - u32 context; - i32 retval; -}; - /** \brief Add/delete map server @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -129,7 +109,7 @@ define one_add_del_local_eid_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - map server IP address */ -define one_add_del_map_server +autoreply define one_add_del_map_server { u32 client_index; u32 context; @@ -138,16 +118,6 @@ define one_add_del_map_server u8 ip_address[16]; }; -/** \brief Reply for one_add_del_map_server - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_map_server_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map-resolver @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -155,7 +125,7 @@ define one_add_del_map_server_reply @param is_ipv6 - if non-zero the address is ipv6, else ipv4 @param ip_address - array of address bytes */ -define one_add_del_map_resolver +autoreply define one_add_del_map_resolver { u32 client_index; u32 context; @@ -164,45 +134,25 @@ define one_add_del_map_resolver u8 ip_address[16]; }; -/** \brief Reply for map_resolver add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_map_resolver_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable or disable ONE feature @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_en - enable protocol if non-zero, else disable */ -define one_enable_disable +autoreply define one_enable_disable { u32 client_index; u32 context; u8 is_en; }; -/** \brief Reply for gpe enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable ONE PITR node @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param ls_name - locator set name @param is_add - add locator set if non-zero, else disable pitr */ -define one_pitr_set_locator_set +autoreply define one_pitr_set_locator_set { u32 client_index; u32 context; @@ -210,16 +160,6 @@ define one_pitr_set_locator_set u8 ls_name[64]; }; -/** \brief Reply for one_pitr_set_locator_set - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_pitr_set_locator_set_reply -{ - u32 context; - i32 retval; -}; - /** \brief configure or disable use of PETR @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -227,7 +167,7 @@ define one_pitr_set_locator_set_reply @param address - PETR IP address @param is_add - add locator set if non-zero, else disable PETR */ -define one_use_petr +autoreply define one_use_petr { u32 client_index; u32 context; @@ -236,16 +176,6 @@ define one_use_petr u8 is_add; }; -/** \brief Reply for one_use_petr - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_use_petr_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for ONE PETR status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -298,45 +228,25 @@ define show_one_rloc_probe_state_reply @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define one_rloc_probe_enable_disable +autoreply define one_rloc_probe_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for one_rloc_probe_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_rloc_probe_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable/disable ONE map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_enable - enable if non-zero; disable otherwise */ -define one_map_register_enable_disable +autoreply define one_map_register_enable_disable { u32 client_index; u32 context; u8 is_enabled; }; -/** \brief Reply for one_map_register_enable_disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_map_register_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get state of ONE map-register @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -366,23 +276,13 @@ define show_one_map_register_state_reply 0 - destination only 1 - source/destaination */ -define one_map_request_mode +autoreply define one_map_request_mode { u32 client_index; u32 context; u8 mode; }; -/** \brief Reply for one_map_request_mode - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_map_request_mode_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for ONE map-request mode @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -430,7 +330,7 @@ typeonly manual_endian manual_print define one_remote_locator @param rloc_num - number of remote locators @param rlocs - remote locator records */ -manual_print manual_endian define one_add_del_remote_mapping +autoreply manual_print manual_endian define one_add_del_remote_mapping { u32 client_index; u32 context; @@ -448,16 +348,6 @@ manual_print manual_endian define one_add_del_remote_mapping vl_api_one_remote_locator_t rlocs[rloc_num]; }; -/** \brief Reply for one_add_del_remote_mapping - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_remote_mapping_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete ONE adjacency adjacency @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -470,7 +360,7 @@ define one_add_del_remote_mapping_reply @param reid - remote EID @param leid - local EID */ -define one_add_del_adjacency +autoreply define one_add_del_adjacency { u32 client_index; u32 context; @@ -483,23 +373,13 @@ define one_add_del_adjacency u8 leid_len; }; -/** \brief Reply for one_add_del_adjacency - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_add_del_adjacency_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete map request itr rlocs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add address if non-zero, else delete @param locator_set_name - locator set name */ -define one_add_del_map_request_itr_rlocs +autoreply define one_add_del_map_request_itr_rlocs { u32 client_index; u32 context; @@ -507,17 +387,6 @@ define one_add_del_map_request_itr_rlocs u8 locator_set_name[64]; }; -/** \brief Reply for one_add_del_map_request_itr_rlocs - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ - -define one_add_del_map_request_itr_rlocs_reply -{ - u32 context; - i32 retval; -}; - /** \brief map/unmap vni/bd_index to vrf @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -525,7 +394,7 @@ define one_add_del_map_request_itr_rlocs_reply @param dp_table - virtual network id/bridge domain index @param vrf - vrf */ -define one_eid_table_add_del_map +autoreply define one_eid_table_add_del_map { u32 client_index; u32 context; @@ -535,16 +404,6 @@ define one_eid_table_add_del_map u8 is_l2; }; -/** \brief Reply for one_eid_table_add_del_map - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define one_eid_table_add_del_map_reply -{ - u32 context; - i32 retval; -}; - /** \brief Request for map one locator status @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -901,31 +760,19 @@ define one_stats_details u32 bytes; }; -define one_stats_flush +autoreply define one_stats_flush { u32 client_index; u32 context; }; -define one_stats_flush_reply -{ - u32 context; - i32 retval; -}; - -define one_stats_enable_disable +autoreply define one_stats_enable_disable { u32 client_index; u32 context; u8 is_en; }; -define one_stats_enable_disable_reply -{ - u32 context; - i32 retval; -}; - define show_one_stats_enable_disable { u32 client_index; diff --git a/src/vnet/lisp-gpe/lisp_gpe.api b/src/vnet/lisp-gpe/lisp_gpe.api index 43a6a6cd..f79d18c1 100644 --- a/src/vnet/lisp-gpe/lisp_gpe.api +++ b/src/vnet/lisp-gpe/lisp_gpe.api @@ -43,7 +43,7 @@ typeonly manual_print manual_endian define gpe_locator @param loc_num - number of locators @param locs - array of remote locators */ -manual_print manual_endian define gpe_add_del_fwd_entry +autoreply manual_print manual_endian define gpe_add_del_fwd_entry { u32 client_index; u32 context; @@ -60,44 +60,24 @@ manual_print manual_endian define gpe_add_del_fwd_entry vl_api_gpe_locator_t locs[loc_num]; }; -/** \brief Reply for gpe_fwd_entry add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_add_del_fwd_entry_reply -{ - u32 context; - i32 retval; -}; - /** \brief enable or disable gpe protocol @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_en - enable protocol if non-zero, else disable */ -define gpe_enable_disable +autoreply define gpe_enable_disable { u32 client_index; u32 context; u8 is_en; }; -/** \brief Reply for gpe enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief add or delete gpe_iface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_add - add address if non-zero, else delete */ -define gpe_add_del_iface +autoreply define gpe_add_del_iface { u32 client_index; u32 context; @@ -107,16 +87,6 @@ define gpe_add_del_iface u32 vni; }; -/** \brief Reply for gpe_iface add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_add_del_iface_reply -{ - u32 context; - i32 retval; -}; - define gpe_fwd_entries_get { u32 client_index; @@ -163,23 +133,13 @@ manual_endian manual_print define gpe_fwd_entry_path_details @param context - sender context, to match reply w/ request @param mode - LISP (value 0) or VXLAN (value 1) */ -define gpe_set_encap_mode +autoreply define gpe_set_encap_mode { u32 client_index; u32 context; u8 mode; }; -/** \brief Reply for set_encap_mode - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define gpe_set_encap_mode_reply -{ - u32 context; - i32 retval; -}; - /** \brief get GPE encapsulation mode @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/map/map.api b/src/vnet/map/map.api index 4e4be85e..d68f13f0 100644 --- a/src/vnet/map/map.api +++ b/src/vnet/map/map.api @@ -62,22 +62,13 @@ define map_add_domain_reply @param context - sender context, to match reply w/ request @param index - MAP Domain index */ -define map_del_domain +autoreply define map_del_domain { u32 client_index; u32 context; u32 index; }; -/** \brief Reply for MAP domain del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define map_del_domain_reply -{ - u32 context; - i32 retval; -}; /** \brief Add or Delete MAP rule from a domain (Only used for shared IPv4 per subscriber) @param client_index - opaque cookie to identify the sender @@ -87,7 +78,7 @@ define map_del_domain_reply @param ip6_dst - MAP CE IPv6 address @param psid - Rule PSID */ -define map_add_del_rule +autoreply define map_add_del_rule { u32 client_index; u32 context; @@ -97,15 +88,6 @@ define map_add_del_rule u16 psid; }; -/** \brief Reply for MAP rule add/del - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define map_add_del_rule_reply -{ - u32 context; - i32 retval; -}; /** \brief Get list of map domains @param client_index - opaque cookie to identify the sender diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api index a1e1270a..c8a3ffb7 100644 --- a/src/vnet/mpls/mpls.api +++ b/src/vnet/mpls/mpls.api @@ -26,7 +26,7 @@ @param mb_address_length - Length of IP prefix @param mb_address[16] - IP prefix/ */ -define mpls_ip_bind_unbind +autoreply define mpls_ip_bind_unbind { u32 client_index; u32 context; @@ -40,16 +40,6 @@ define mpls_ip_bind_unbind u8 mb_address[16]; }; -/** \brief Reply for MPLS IP bind/unbind request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define mpls_ip_bind_unbind_reply -{ - u32 context; - i32 retval; -}; - /** \brief MPLS tunnel Add / del route @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -172,7 +162,7 @@ manual_endian manual_print define mpls_tunnel_details @param mr_next_hop_out_label_stack - the next-hop output label stack, outer most first @param next_hop_via_label - The next-hop is a resolved via a local label */ -define mpls_route_add_del +autoreply define mpls_route_add_del { u32 client_index; u32 context; @@ -199,16 +189,6 @@ define mpls_route_add_del u32 mr_next_hop_out_label_stack[mr_next_hop_n_out_labels]; }; -/** \brief Reply for MPLS route add / del request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define mpls_route_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump MPLS fib table @param client_index - opaque cookie to identify the sender */ @@ -240,4 +220,4 @@ manual_endian manual_print define mpls_fib_details * eval: (c-set-style "gnu") * End: */ - \ No newline at end of file + diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api index e207e46f..4aef09da 100644 --- a/src/vnet/session/session.api +++ b/src/vnet/session/session.api @@ -49,26 +49,17 @@ define application_attach_reply { @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request */ - define application_detach { +autoreply define application_detach { u32 client_index; u32 context; }; - /** \brief detach reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define application_detach_reply { - u32 context; - i32 retval; -}; - /** \brief vpp->client, please map an additional shared memory segment @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param segment_name - */ -define map_another_segment { +autoreply define map_another_segment { u32 client_index; u32 context; u32 segment_size; @@ -83,7 +74,7 @@ define map_another_segment { "tcp://::/0/80" [ipv6] etc. @param options - socket options, fifo sizes, etc. */ -define bind_uri { +autoreply define bind_uri { u32 client_index; u32 context; u32 accept_cookie; @@ -97,7 +88,7 @@ define bind_uri { "tcp://::/0/80" [ipv6], etc. @param options - socket options, fifo sizes, etc. */ -define unbind_uri { +autoreply define unbind_uri { u32 client_index; u32 context; u8 uri[128]; @@ -122,24 +113,6 @@ define connect_uri { u64 options[16]; }; -/** \brief Bind reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define bind_uri_reply { - u32 context; - i32 retval; -}; - -/** \brief unbind reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define unbind_uri_reply { - u32 context; - i32 retval; -}; - /** \brief vpp->client, connect reply @param context - sender context, to match reply w/ request @param retval - return code for the request @@ -165,15 +138,6 @@ define connect_uri_reply { u8 segment_name[128]; }; -/** \brief client->vpp - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define map_another_segment_reply { - u32 context; - i32 retval; -}; - /** \brief vpp->client, accept this session @param context - sender context, to match reply w/ request @param listener_handle - tells client which listener this pertains to @@ -290,7 +254,7 @@ define bind_sock { @param context - sender context, to match reply w/ request @param handle - bind handle obtained from bind reply */ -define unbind_sock { +autoreply define unbind_sock { u32 client_index; u32 context; u64 handle; @@ -339,15 +303,6 @@ define bind_sock_reply { u8 segment_name[128]; }; -/** \brief unbind reply - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define unbind_sock_reply { - u32 context; - i32 retval; -}; - /** \brief vpp/server->client, connect reply @param context - sender context, to match reply w/ request @param retval - return code for the request @@ -378,23 +333,14 @@ define connect_sock_reply { @param context - sender context, to match reply w/ request @param is_enable - disable session layer if 0, enable otherwise */ -define session_enable_disable { +autoreply define session_enable_disable { u32 client_index; u32 context; u8 is_enable; }; -/** \brief Reply for session enable/disable - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define session_enable_disable_reply { - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") * End: - */ \ No newline at end of file + */ diff --git a/src/vnet/span/span.api b/src/vnet/span/span.api index 4babdd83..914fd8d0 100644 --- a/src/vnet/span/span.api +++ b/src/vnet/span/span.api @@ -21,7 +21,7 @@ @param sw_if_index_to - interface where the traffic is mirrored @param state - 0 = disabled, 1 = rx enabled, 2 = tx enabled, 3 tx & rx enabled */ -define sw_interface_span_enable_disable { +autoreply define sw_interface_span_enable_disable { u32 client_index; u32 context; u32 sw_if_index_from; @@ -29,14 +29,6 @@ define sw_interface_span_enable_disable { u8 state; }; -/** \brief Reply to SPAN enable/disable request - @param context - sender context which was passed in the request -*/ -define sw_interface_span_enable_disable_reply { - u32 context; - i32 retval; -}; - /** \brief SPAN dump request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/sr/sr.api b/src/vnet/sr/sr.api index 5feadcb0..9e900741 100644 --- a/src/vnet/sr/sr.api +++ b/src/vnet/sr/sr.api @@ -25,7 +25,7 @@ @param fib_table FIB table in which we should install the localsid entry @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. */ -define sr_localsid_add_del +autoreply define sr_localsid_add_del { u32 client_index; u32 context; @@ -39,16 +39,6 @@ define sr_localsid_add_del u8 nh_addr[16]; }; -/** \brief IPv6 SR LocalSID add/del request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_localsid_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR policy add @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -59,7 +49,7 @@ define sr_localsid_add_del_reply @param fib_table is the VRF where to install the FIB entry for the BSID @param segments is a vector of IPv6 address composing the segment list */ -define sr_policy_add +autoreply define sr_policy_add { u32 client_index; u32 context; @@ -72,16 +62,6 @@ define sr_policy_add u8 segments[0]; }; -/** \brief IPv6 SR Policy add request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_add_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR policy modification @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -94,7 +74,7 @@ define sr_policy_add_reply @param weight is the weight of the sid list. optional. @param is_encap Mode. Encapsulation or SRH insertion. */ -define sr_policy_mod +autoreply define sr_policy_mod { u32 client_index; u32 context; @@ -108,23 +88,13 @@ define sr_policy_mod u8 segments[0]; }; -/** \brief IPv6 SR Policy modification request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_mod_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR policy deletion @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bsid is the bindingSID of the SR Policy @param index is the index of the SR policy */ -define sr_policy_del +autoreply define sr_policy_del { u32 client_index; u32 context; @@ -132,16 +102,6 @@ define sr_policy_del u32 sr_policy_index; }; -/** \brief IPv6 SR Policy deletion request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_policy_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief IPv6 SR steering add/del @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -154,7 +114,7 @@ define sr_policy_del_reply @param sw_if_index is the incoming interface for L2 traffic @param traffic_type describes the type of traffic */ -define sr_steering_add_del +autoreply define sr_steering_add_del { u32 client_index; u32 context; @@ -168,16 +128,6 @@ define sr_steering_add_del u8 traffic_type; }; -/** \brief IPv6 SR steering add/del request response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define sr_steering_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump the list of SR LocalSIDs @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/unix/tap.api b/src/vnet/unix/tap.api index 1fd0bb09..d9fba371 100644 --- a/src/vnet/unix/tap.api +++ b/src/vnet/unix/tap.api @@ -93,23 +93,13 @@ define tap_modify_reply @param context - sender context, to match reply w/ request @param sw_if_index - interface index of existing tap interface */ -define tap_delete +autoreply define tap_delete { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Reply for tap delete request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define tap_delete_reply -{ - u32 context; - i32 retval; -}; - /** \brief Dump tap interfaces request */ define sw_interface_tap_dump { diff --git a/src/vnet/vxlan/vxlan.api b/src/vnet/vxlan/vxlan.api index 048220fb..6c331a58 100644 --- a/src/vnet/vxlan/vxlan.api +++ b/src/vnet/vxlan/vxlan.api @@ -61,7 +61,7 @@ define vxlan_tunnel_details @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass @param enable - if non-zero enable, else disable */ -define sw_interface_set_vxlan_bypass +autoreply define sw_interface_set_vxlan_bypass { u32 client_index; u32 context; @@ -69,13 +69,3 @@ define sw_interface_set_vxlan_bypass u8 is_ipv6; u8 enable; }; - -/** \brief Interface set vxlan-bypass response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_set_vxlan_bypass_reply -{ - u32 context; - i32 retval; -}; \ No newline at end of file diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index a4ba180d..7c07c822 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -80,7 +80,7 @@ define create_vlan_subif_reply @param sw_if_index - index of the interface @param enable - if non-zero enable, else disable */ -define sw_interface_set_mpls_enable +autoreply define sw_interface_set_mpls_enable { u32 client_index; u32 context; @@ -88,16 +88,6 @@ define sw_interface_set_mpls_enable u8 enable; }; -/** \brief Reply for MPLS state on an interface - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define sw_interface_set_mpls_enable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Proxy ARP add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -106,7 +96,7 @@ define sw_interface_set_mpls_enable_reply @param low_address[4] - Low address of the Proxy ARP range @param hi_address[4] - High address of the Proxy ARP range */ -define proxy_arp_add_del +autoreply define proxy_arp_add_del { u32 client_index; u32 context; @@ -116,23 +106,13 @@ define proxy_arp_add_del u8 hi_address[4]; }; -/** \brief Reply for proxy arp add / del request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define proxy_arp_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Proxy ARP add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - Which interface to enable / disable Proxy Arp on @param enable_disable - 1 to enable Proxy ARP on interface, 0 to disable */ -define proxy_arp_intfc_enable_disable +autoreply define proxy_arp_intfc_enable_disable { u32 client_index; u32 context; @@ -141,23 +121,13 @@ define proxy_arp_intfc_enable_disable u8 enable_disable; }; -/** \brief Reply for Proxy ARP interface enable / disable request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define proxy_arp_intfc_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Reset VRF (remove all routes etc) request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4 @param vrf_id - ID of th FIB table / VRF to reset */ -define reset_vrf +autoreply define reset_vrf { u32 client_index; u32 context; @@ -165,16 +135,6 @@ define reset_vrf u32 vrf_id; }; -/** \brief Reply for Reset VRF request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define reset_vrf_reply -{ - u32 context; - i32 retval; -}; - /** \brief Is Address Reachable request - DISABLED @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -200,7 +160,7 @@ define is_address_reachable @param enable_disable - 1 = enable stats, 0 = disable @param pid - pid of process requesting stats updates */ -define want_stats +autoreply define want_stats { u32 client_index; u32 context; @@ -208,16 +168,6 @@ define want_stats u32 pid; }; -/** \brief Reply for Want Stats request - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_stats_reply -{ - u32 context; - i32 retval; -}; - typeonly manual_print manual_endian define ip4_fib_counter { u32 address; @@ -331,7 +281,7 @@ define oam_event @param enable_disable- enable if non-zero, else disable @param pid - pid of the requesting process */ -define want_oam_events +autoreply define want_oam_events { u32 client_index; u32 context; @@ -339,16 +289,6 @@ define want_oam_events u32 pid; }; -/** \brief Want OAM events response - @param context - sender context, to match reply w/ request - @param retval - return code for the want oam stats request -*/ -define want_oam_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief OAM add / del target request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -357,7 +297,7 @@ define want_oam_events_reply @param dst_address[] - destination address of the target @param is_add - add target if non-zero, else delete */ -define oam_add_del +autoreply define oam_add_del { u32 client_index; u32 context; @@ -367,23 +307,13 @@ define oam_add_del u8 is_add; }; -/** \brief OAM add / del target response - @param context - sender context, to match reply w/ request - @param retval - return code of the request -*/ -define oam_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Reset fib table request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param vrf_id - vrf/table id of the fib table to reset @param is_ipv6 - an ipv6 fib to reset if non-zero, else ipv4 */ -define reset_fib +autoreply define reset_fib { u32 client_index; u32 context; @@ -391,16 +321,6 @@ define reset_fib u8 is_ipv6; }; -/** \brief Reset fib response - @param context - sender context, to match reply w/ request - @param retval - return code for the reset bfib request -*/ -define reset_fib_reply -{ - u32 context; - i32 retval; -}; - /** \brief Create loopback interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -458,23 +378,13 @@ define create_loopback_instance_reply @param context - sender context, to match reply w/ request @param sw_if_index - sw index of the interface that was created */ -define delete_loopback +autoreply define delete_loopback { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Delete loopback interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define delete_loopback_reply -{ - u32 context; - i32 retval; -}; - /** \brief Control ping from client to api server request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -543,7 +453,7 @@ define cli_inband_reply @param is_ipv6 - neighbor limit if non-zero, else ARP limit @param arp_neighbor_limit - the new limit, defaults are ~ 50k */ -define set_arp_neighbor_limit +autoreply define set_arp_neighbor_limit { u32 client_index; u32 context; @@ -551,16 +461,6 @@ define set_arp_neighbor_limit u32 arp_neighbor_limit; }; -/** \brief Set max allowed ARP or ip6 neighbor entries response - @param context - sender context, to match reply w/ request - @param retval - return code for request -*/ -define set_arp_neighbor_limit_reply -{ - u32 context; - i32 retval; -}; - /** \brief L2 interface patch add / del request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -568,7 +468,7 @@ define set_arp_neighbor_limit_reply @param tx_sw_if_index - transmit side interface @param is_add - if non-zero set up the interface patch, else remove it */ -define l2_patch_add_del +autoreply define l2_patch_add_del { u32 client_index; u32 context; @@ -577,23 +477,13 @@ define l2_patch_add_del u8 is_add; }; -/** \brief L2 interface patch add / del response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_patch_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface set vpath request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - interface used to reach neighbor @param enable - if non-zero enable, else disable */ -define sw_interface_set_vpath +autoreply define sw_interface_set_vpath { u32 client_index; u32 context; @@ -601,16 +491,6 @@ define sw_interface_set_vpath u8 enable; }; -/** \brief Interface set vpath response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define sw_interface_set_vpath_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set L2 XConnect between two interfaces request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -618,7 +498,7 @@ define sw_interface_set_vpath_reply @param tx_sw_if_index - Transmit interface index @param enable - enable xconnect if not 0, else set to L3 mode */ -define sw_interface_set_l2_xconnect +autoreply define sw_interface_set_l2_xconnect { u32 client_index; u32 context; @@ -627,16 +507,6 @@ define sw_interface_set_l2_xconnect u8 enable; }; -/** \brief Set L2 XConnect response - @param context - sender context, to match reply w/ request - @param retval - L2 XConnect request return code -*/ -define sw_interface_set_l2_xconnect_reply -{ - u32 context; - i32 retval; -}; - /** \brief Interface bridge mode request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -646,7 +516,7 @@ define sw_interface_set_l2_xconnect_reply @param shg - Shared horizon group, for bridge mode only @param enable - Enable beige mode if not 0, else set to L3 mode */ -define sw_interface_set_l2_bridge +autoreply define sw_interface_set_l2_bridge { u32 client_index; u32 context; @@ -657,16 +527,6 @@ define sw_interface_set_l2_bridge u8 enable; }; -/** \brief Interface bridge mode response - @param context - sender context, to match reply w/ request - @param retval - Bridge mode request return code -*/ -define sw_interface_set_l2_bridge_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set bridge domain ip to mac entry request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -676,7 +536,7 @@ define sw_interface_set_l2_bridge_reply @param mac_address - MAC address @param */ -define bd_ip_mac_add_del +autoreply define bd_ip_mac_add_del { u32 client_index; u32 context; @@ -687,16 +547,6 @@ define bd_ip_mac_add_del u8 mac_address[6]; }; -/** \brief Set bridge domain ip to mac entry response - @param context - sender context, to match reply w/ request - @param retval - return code for the set bridge flags request -*/ -define bd_ip_mac_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset the classification table for an interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -704,7 +554,7 @@ define bd_ip_mac_add_del_reply @param sw_if_index - interface to associate with the table @param table_index - index of the table, if ~0 unset the table */ -define classify_set_interface_ip_table +autoreply define classify_set_interface_ip_table { u32 client_index; u32 context; @@ -713,16 +563,6 @@ define classify_set_interface_ip_table u32 table_index; /* ~0 => off */ }; -/** \brief Set/unset interface classification table response - @param context - sender context, to match reply w/ request - @param retval - return code -*/ -define classify_set_interface_ip_table_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set/unset l2 classification tables for an interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -731,7 +571,7 @@ define classify_set_interface_ip_table_reply @param ip6_table_index - ip6 index @param other_table_index - other index */ -define classify_set_interface_l2_tables +autoreply define classify_set_interface_l2_tables { u32 client_index; u32 context; @@ -743,16 +583,6 @@ define classify_set_interface_l2_tables u8 is_input; }; -/** \brief Set/unset l2 classification tables for an interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define classify_set_interface_l2_tables_reply -{ - u32 context; - i32 retval; -}; - /** \brief Get node index using name request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -809,7 +639,7 @@ define add_node_next_reply @param sw_if_index - interface to enable/disable filtering on @param enable_disable - if non-zero enable filtering, else disable */ -define l2_interface_efp_filter +autoreply define l2_interface_efp_filter { u32 client_index; u32 context; @@ -817,16 +647,6 @@ define l2_interface_efp_filter u32 enable_disable; }; -/** \brief L2 interface ethernet flow point filtering response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define l2_interface_efp_filter_reply -{ - u32 context; - i32 retval; -}; - define create_subif { u32 client_index; @@ -882,7 +702,7 @@ define show_version_reply }; /* Gross kludge, DGMS */ -define interface_name_renumber +autoreply define interface_name_renumber { u32 client_index; u32 context; @@ -890,12 +710,6 @@ define interface_name_renumber u32 new_show_dev_instance; }; -define interface_name_renumber_reply -{ - u32 context; - i32 retval; -}; - /** \brief Register for ip4 arp resolution events @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -903,7 +717,7 @@ define interface_name_renumber_reply @param pid - sender's pid @param address - the exact ip4 address of interest */ -define want_ip4_arp_events +autoreply define want_ip4_arp_events { u32 client_index; u32 context; @@ -912,16 +726,6 @@ define want_ip4_arp_events u32 address; }; -/** \brief Reply for interface events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_ip4_arp_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief Tell client about an ip4 arp resolution event @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -949,7 +753,7 @@ define ip4_arp_event @param pid - sender's pid @param address - the exact ip6 address of interest */ -define want_ip6_nd_events +autoreply define want_ip6_nd_events { u32 client_index; u32 context; @@ -958,16 +762,6 @@ define want_ip6_nd_events u8 address[16]; }; -/** \brief Reply for ip6 nd resolution events registration - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define want_ip6_nd_events_reply -{ - u32 context; - i32 retval; -}; - /** \brief Tell client about an ip6 nd resolution or mac/ip event @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -999,7 +793,7 @@ define ip6_nd_event Note: User is recommeneded to use just one valid table_index per call. (ip4_table_index, ip6_table_index, or l2_table_index) */ -define input_acl_set_interface +autoreply define input_acl_set_interface { u32 client_index; u32 context; @@ -1010,16 +804,6 @@ define input_acl_set_interface u8 is_add; }; -/** \brief Set/unset input ACL interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define input_acl_set_interface_reply -{ - u32 context; - i32 retval; -}; - define get_node_graph { u32 client_index; @@ -1048,7 +832,7 @@ define get_node_graph_reply @param pow_enable - Proof of Work enabled or not flag @param trace_enable - iOAM Trace enabled or not flag */ -define ioam_enable +autoreply define ioam_enable { u32 client_index; u32 context; @@ -1060,38 +844,18 @@ define ioam_enable u32 node_id; }; -/** \brief iOAM Trace profile add / del response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define ioam_enable_reply -{ - u32 context; - i32 retval; -}; - /** \brief iOAM disable @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param index - MAP Domain index */ -define ioam_disable +autoreply define ioam_disable { u32 client_index; u32 context; u16 id; }; -/** \brief iOAM disable response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define ioam_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Query relative index via node names @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1149,7 +913,7 @@ define pg_create_interface_reply @param count - number of packets to be captured @param pcap_file - pacp file name to store captured packets */ -define pg_capture +autoreply define pg_capture { u32 client_index; u32 context; @@ -1160,23 +924,13 @@ define pg_capture u8 pcap_file_name[pcap_name_length]; }; -/** \brief PacketGenerator capture packets response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define pg_capture_reply -{ - u32 context; - i32 retval; -}; - /** \brief Enable / disable packet generator request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param is_enabled - 1 if enabling streams, 0 if disabling @param stream - stream name to be enable/disabled, if not specified handle all streams */ -define pg_enable_disable +autoreply define pg_enable_disable { u32 client_index; u32 context; @@ -1185,16 +939,6 @@ define pg_enable_disable u8 stream_name[stream_name_length]; }; -/** \brief Reply for enable / disable packet generator - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define pg_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /** \brief Configure IP source and L4 port-range check @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1208,7 +952,7 @@ define pg_enable_disable_reply @param vrf_id - fib table/vrf id to associate the source and port-range check with @note To specify a single port set low_port and high_port entry the same */ -define ip_source_and_port_range_check_add_del +autoreply define ip_source_and_port_range_check_add_del { u32 client_index; u32 context; @@ -1222,16 +966,6 @@ define ip_source_and_port_range_check_add_del u32 vrf_id; }; -/** \brief Configure IP source and L4 port-range check reply - @param context - returned sender context, to match reply w/ request - @param retval - return code -*/ -define ip_source_and_port_range_check_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Set interface source and L4 port-range request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1239,7 +973,7 @@ define ip_source_and_port_range_check_add_del_reply @param tcp_vrf_id - VRF associated with source and TCP port-range check @param udp_vrf_id - VRF associated with source and TCP port-range check */ -define ip_source_and_port_range_check_interface_add_del +autoreply define ip_source_and_port_range_check_interface_add_del { u32 client_index; u32 context; @@ -1251,36 +985,17 @@ define ip_source_and_port_range_check_interface_add_del u32 udp_out_vrf_id; }; -/** \brief Set interface source and L4 port-range response - @param context - sender context, to match reply w/ request - @param retval - return value for request -*/ -define ip_source_and_port_range_check_interface_add_del_reply -{ - u32 context; - i32 retval; -}; - /** \brief Delete sub interface request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - sw index of the interface that was created by create_subif */ -define delete_subif { +autoreply define delete_subif { u32 client_index; u32 context; u32 sw_if_index; }; -/** \brief Delete sub interface response - @param context - sender context, to match reply w/ request - @param retval - return code for the request -*/ -define delete_subif_reply { - u32 context; - i32 retval; -}; - /** \brief Punt traffic to the host @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -1289,7 +1004,7 @@ define delete_subif_reply { @param l4_protocol - L4 protocol to be punted, only UDP (0x11) is supported @param l4_port - TCP/UDP port to be punted */ -define punt { +autoreply define punt { u32 client_index; u32 context; u8 is_add; @@ -1298,23 +1013,13 @@ define punt { u16 l4_port; }; -/** \brief Reply to the punt request - @param context - sender context which was passed in the request - @param retval - return code of punt request -*/ -define punt_reply -{ - u32 context; - i32 retval; -}; - /** \brief Feature path enable/disable request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - the interface @param enable - 1 = on, 0 = off */ -define feature_enable_disable { +autoreply define feature_enable_disable { u32 client_index; u32 context; u32 sw_if_index; @@ -1323,16 +1028,6 @@ define feature_enable_disable { u8 feature_name[64]; }; -/** \brief Reply to the eature path enable/disable request - @param context - sender context which was passed in the request - @param retval - return code for the request -*/ -define feature_enable_disable_reply -{ - u32 context; - i32 retval; -}; - /* * Local Variables: * eval: (c-set-style "gnu") -- cgit 1.2.3-korg From f3bcdbf071c98ed676591bd22c3d3f8601009fa8 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Tue, 2 May 2017 07:38:01 +0200 Subject: BFD: don't crash if interface is deleted Instead, drop the BFD session associated with it.. Change-Id: Ie09877d5c94844be2e833900d9dde7f23edaf8cd Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd_udp.c | 75 ++++++++++++++++++++++++++++---------------------- test/test_bfd.py | 30 ++++++++++++++++++++ 2 files changed, 72 insertions(+), 33 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index ebee590b..346c5495 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -177,11 +177,11 @@ bfd_udp_get_echo_src_ip4 (ip4_address_t * addr) if (ia->address_length <= 31) { addr->as_u32 = clib_host_to_net_u32 (x->as_u32); - /* - * flip the last bit to get a different address, might be network, - * we don't care ... - */ - addr->as_u32 ^= 1; + /* + * flip the last bit to get a different address, might be network, + * we don't care ... + */ + addr->as_u32 ^= 1; addr->as_u32 = clib_net_to_host_u32 (addr->as_u32); return 1; } @@ -221,9 +221,9 @@ bfd_udp_get_echo_src_ip6 (ip6_address_t * addr) } void -bfd_udp_get_echo_source (int *is_set, u32 * sw_if_index, int *have_usable_ip4, - ip4_address_t * ip4, int *have_usable_ip6, - ip6_address_t * ip6) +bfd_udp_get_echo_source (int *is_set, u32 * sw_if_index, + int *have_usable_ip4, ip4_address_t * ip4, + int *have_usable_ip6, ip6_address_t * ip6) { if (bfd_udp_main.echo_source_is_set) { @@ -239,8 +239,8 @@ bfd_udp_get_echo_source (int *is_set, u32 * sw_if_index, int *have_usable_ip4, } int -bfd_add_udp4_transport (vlib_main_t * vm, u32 bi, - const bfd_session_t * bs, int is_echo) +bfd_add_udp4_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, + int is_echo) { const bfd_udp_session_t *bus = &bs->udp; const bfd_udp_key_t *key = &bus->key; @@ -294,8 +294,8 @@ bfd_add_udp4_transport (vlib_main_t * vm, u32 bi, } int -bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, - const bfd_session_t * bs, int is_echo) +bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, + int is_echo) { const bfd_udp_session_t *bus = &bs->udp; const bfd_udp_key_t *key = &bus->key; @@ -1200,8 +1200,8 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, b0->current_data = 0; b0->current_length = 0; memset (vnet_buffer (b0), 0, sizeof (*vnet_buffer (b0))); - bfd_init_final_control_frame (vm, b0, bfd_udp_main.bfd_main, - bs, 0); + bfd_init_final_control_frame (vm, b0, bfd_udp_main.bfd_main, bs, + 0); if (is_ipv6) { vlib_node_increment_counter (vm, bfd_udp6_input_node.index, @@ -1440,29 +1440,38 @@ VLIB_REGISTER_NODE (bfd_udp_echo6_input_node, static) = { /* *INDENT-ON* */ static clib_error_t * -bfd_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) +bfd_udp_sw_if_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_create) { - // vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index); - if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) - { - /* TODO */ - } - return 0; -} - -VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bfd_sw_interface_up_down); - -static clib_error_t * -bfd_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) -{ - if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP) - { - /* TODO */ - } + bfd_session_t **to_be_freed = NULL; + BFD_DBG ("sw_if_add_del called, sw_if_index=%u, is_create=%u", sw_if_index, + is_create); + if (!is_create) + { + bfd_session_t *bs; + pool_foreach (bs, bfd_udp_main.bfd_main->sessions, + { + if (bs->transport != BFD_TRANSPORT_UDP4 && + bs->transport != BFD_TRANSPORT_UDP6) + { + continue;} + if (bs->udp.key.sw_if_index != sw_if_index) + { + continue;} + vec_add1 (to_be_freed, bs);} + ); + } + bfd_session_t **bs; + vec_foreach (bs, to_be_freed) + { + clib_warning ("removal of sw_if_index=%u forces removal of bfd session " + "with bs_idx=%u", sw_if_index, (*bs)->bs_idx); + bfd_session_set_flags (*bs, 0); + bfd_udp_del_session_internal (*bs); + } return 0; } -VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bfd_hw_interface_up_down); +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (bfd_udp_sw_if_add_del); /* * setup function diff --git a/test/test_bfd.py b/test/test_bfd.py index 0923d36d..be42cdad 100644 --- a/test/test_bfd.py +++ b/test/test_bfd.py @@ -17,6 +17,7 @@ from bfd import VppBFDAuthKey, BFD, BFDAuthType, VppBFDUDPSession, \ BFDDiagCode, BFDState, BFD_vpp_echo from framework import VppTestCase, VppTestRunner, running_extended_tests from vpp_pg_interface import CaptureTimeoutError, is_ipv6_misc +from vpp_lo_interface import VppLoInterface from util import ppp from vpp_papi_provider import UnexpectedApiReturnValueError from vpp_ip_route import VppIpRoute, VppRoutePath @@ -1403,6 +1404,20 @@ class BFD4TestCase(VppTestCase): self.assert_equal(count, 0, "number of packets received") self.assert_equal(len(events), 0, "number of events received") + def test_intf_deleted(self): + """ interface with bfd session deleted """ + intf = VppLoInterface(self, 0) + intf.config_ip4() + intf.admin_up() + sw_if_index = intf.sw_if_index + vpp_session = VppBFDUDPSession(self, intf, intf.remote_ip4) + vpp_session.add_vpp_config() + vpp_session.admin_up() + intf.remove_vpp_config() + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + self.assert_equal(e.sw_if_index, sw_if_index, "sw_if_index") + self.assertFalse(vpp_session.query_vpp_config()) + @unittest.skipUnless(running_extended_tests(), "part of extended tests") class BFD6TestCase(VppTestCase): @@ -1597,6 +1612,21 @@ class BFD6TestCase(VppTestCase): self.test_session.send_packet() self.assertTrue(echo_seen, "No echo packets received") + def test_intf_deleted(self): + """ interface with bfd session deleted """ + intf = VppLoInterface(self, 0) + intf.config_ip6() + intf.admin_up() + sw_if_index = intf.sw_if_index + vpp_session = VppBFDUDPSession( + self, intf, intf.remote_ip6, af=AF_INET6) + vpp_session.add_vpp_config() + vpp_session.admin_up() + intf.remove_vpp_config() + e = self.vapi.wait_for_event(1, "bfd_udp_session_details") + self.assert_equal(e.sw_if_index, sw_if_index, "sw_if_index") + self.assertFalse(vpp_session.query_vpp_config()) + @unittest.skipUnless(running_extended_tests(), "part of extended tests") class BFDFIBTestCase(VppTestCase): -- cgit 1.2.3-korg From 0f68c79a9d8533d492c8221f924b14cf4e222136 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 26 Apr 2017 13:05:05 +0200 Subject: Add crc32c inline function, allows compilation on 32-bit systems 32-bit code still can use crc32c instructions, but it operates on 32 registers Change-Id: I9bb6b0b59635d6ea6a753584676ebcf59c8f6584 Signed-off-by: Damjan Marion --- src/plugins/acl/bihash_40_8.h | 9 ++----- src/plugins/lb/lbhash.h | 2 +- src/vnet/bfd/bfd_main.c | 2 +- src/vppinfra.am | 1 + src/vppinfra/bihash_24_8.h | 7 ++---- src/vppinfra/bihash_8_8.h | 7 ++---- src/vppinfra/crc32.h | 58 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 67 insertions(+), 19 deletions(-) create mode 100644 src/vppinfra/crc32.h (limited to 'src/vnet/bfd') diff --git a/src/plugins/acl/bihash_40_8.h b/src/plugins/acl/bihash_40_8.h index ba3dfbea..1dfb6a1e 100644 --- a/src/plugins/acl/bihash_40_8.h +++ b/src/plugins/acl/bihash_40_8.h @@ -24,6 +24,7 @@ #include #include #include +#include typedef struct { @@ -44,13 +45,7 @@ static inline u64 clib_bihash_hash_40_8 (const clib_bihash_kv_40_8_t * v) { #if __SSE4_2__ - u32 value = 0; - value = _mm_crc32_u64 (value, v->key[0]); - value = _mm_crc32_u64 (value, v->key[1]); - value = _mm_crc32_u64 (value, v->key[2]); - value = _mm_crc32_u64 (value, v->key[3]); - value = _mm_crc32_u64 (value, v->key[4]); - return value; + return clib_crc32c ((u8 *) v->key, 40); #else u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2] ^ v->key[3] ^ v->key[4]; return clib_xxhash (tmp); diff --git a/src/plugins/lb/lbhash.h b/src/plugins/lb/lbhash.h index ca3cc143..c514fb57 100644 --- a/src/plugins/lb/lbhash.h +++ b/src/plugins/lb/lbhash.h @@ -101,7 +101,7 @@ void lb_hash_free(lb_hash_t *h) vec_free(mem); } -#if __SSE4_2__ +#if __SSE4_2__ && !defined (__i386__) static_always_inline u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4) { diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index 66b31ce5..b58a5132 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -39,7 +39,7 @@ static u64 bfd_calc_echo_checksum (u32 discriminator, u64 expire_time, u32 secret) { u64 checksum = 0; -#if __SSE4_2__ +#if __SSE4_2__ && !defined (__i386__) checksum = _mm_crc32_u64 (0, discriminator); checksum = _mm_crc32_u64 (checksum, expire_time); checksum = _mm_crc32_u64 (checksum, secret); diff --git a/src/vppinfra.am b/src/vppinfra.am index fed1981e..3939d3ce 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -168,6 +168,7 @@ nobase_include_HEADERS = \ vppinfra/cache.h \ vppinfra/clib.h \ vppinfra/cpu.h \ + vppinfra/crc32.h \ vppinfra/dlist.h \ vppinfra/elf.h \ vppinfra/elf_clib.h \ diff --git a/src/vppinfra/bihash_24_8.h b/src/vppinfra/bihash_24_8.h index 353f06bf..655dab80 100644 --- a/src/vppinfra/bihash_24_8.h +++ b/src/vppinfra/bihash_24_8.h @@ -24,6 +24,7 @@ #include #include #include +#include typedef struct { @@ -44,11 +45,7 @@ static inline u64 clib_bihash_hash_24_8 (const clib_bihash_kv_24_8_t * v) { #if __SSE4_2__ - u32 value = 0; - value = _mm_crc32_u64 (value, v->key[0]); - value = _mm_crc32_u64 (value, v->key[1]); - value = _mm_crc32_u64 (value, v->key[2]); - return value; + return clib_crc32c ((u8 *) v->key, 24); #else u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2]; return clib_xxhash (tmp); diff --git a/src/vppinfra/bihash_8_8.h b/src/vppinfra/bihash_8_8.h index d70da596..b5c17461 100644 --- a/src/vppinfra/bihash_8_8.h +++ b/src/vppinfra/bihash_8_8.h @@ -24,10 +24,7 @@ #include #include #include - -#if __SSE4_2__ -#include -#endif +#include /** 8 octet key, 8 octet key value pair */ typedef struct @@ -55,7 +52,7 @@ clib_bihash_hash_8_8 (clib_bihash_kv_8_8_t * v) { /* Note: to torture-test linear scan, make this fn return a constant */ #if __SSE4_2__ - return _mm_crc32_u64 (0, v->key); + return clib_crc32c ((u8 *) & v->key, 8); #else return clib_xxhash (v->key); #endif diff --git a/src/vppinfra/crc32.h b/src/vppinfra/crc32.h new file mode 100644 index 00000000..abb2953f --- /dev/null +++ b/src/vppinfra/crc32.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_crc32_h__ +#define __included_crc32_h__ + +#if __SSE4_2__ +#include + +static_always_inline u32 +clib_crc32c (u8 * s, int len) +{ + u32 v = 0; + +#if __x86_64__ + for (; len >= 8; len -= 8, s += 8) + v = _mm_crc32_u64 (v, *((u64 *) s)); +#else + /* workaround weird GCC bug when using _mm_crc32_u32 + which happens with -O2 optimization */ + volatile ("":::"memory"); +#endif + + for (; len >= 4; len -= 4, s += 4) + v = _mm_crc32_u32 (v, *((u32 *) s)); + + for (; len >= 2; len -= 2, s += 2) + v = _mm_crc32_u16 (v, *((u16 *) s)); + + for (; len >= 1; len -= 1, s += 1) + v = _mm_crc32_u8 (v, *((u16 *) s)); + + return v; +} + +#endif + +#endif /* __included_crc32_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 072401e8096c648b91f958bd911f64ce24fecff9 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 13 Jul 2017 18:53:27 +0200 Subject: Introduce l{2,3,4}_hdr_offset fields in the buffer metadata To save space in the first cacheline following is changed: - total_length_not_including_first_buffer moved to the 2nd cacheline. This field is used only when VLIB_BUFFER_TOTAL_LENGTH_VALID and VLIB_BUFFER_NEXT_PRESENT are both set. - free_list_index is now stored in 4bits inside flags, which allows up to 16 free lists. In case we need more we can store index in the 2nd cachelin Change-Id: Ic8521350819391af470d31d3fa1013e67ecb7681 Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/node.c | 8 ++++++- src/vlib/buffer.c | 16 ++++++++----- src/vlib/buffer.h | 40 +++++++++++++++++--------------- src/vlib/buffer_funcs.h | 50 +++++++++++++++++++++++++++++----------- src/vnet/bfd/bfd_udp.c | 4 ++-- src/vnet/buffer.h | 14 +++-------- src/vnet/dhcp/dhcp4_proxy_node.c | 2 +- src/vnet/dhcp/dhcp6_proxy_node.c | 2 +- src/vnet/ethernet/ethernet.h | 3 +-- src/vnet/ethernet/node.c | 23 ++++++++---------- src/vnet/ip/ip4_forward.c | 6 ++--- src/vnet/ip/ip6_forward.c | 6 ++--- src/vnet/ip/ip6_neighbor.c | 19 +++++++-------- src/vnet/l2/l2_bvi.h | 2 +- src/vnet/lisp-cp/control.c | 2 +- src/vnet/replication.c | 6 ++--- 16 files changed, 111 insertions(+), 92 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index 69acc529..74fb8da1 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -208,7 +208,13 @@ dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, mb_seg = mb->next; b_chain = b; - while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs)) + if (mb->nb_segs < 2) + return; + + b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + b->total_length_not_including_first_buffer = 0; + + while (nb_seg < mb->nb_segs) { ASSERT (mb_seg != 0); diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index b2a095cf..53b60c16 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -72,8 +72,8 @@ format_vlib_buffer (u8 * s, va_list * args) uword indent = format_get_indent (s); s = format (s, "current data %d, length %d, free-list %d, clone-count %u", - b->current_data, b->current_length, b->free_list_index, - b->n_add_refs); + b->current_data, b->current_length, + vlib_buffer_get_free_list_index (b), b->n_add_refs); if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID) s = format (s, ", totlen-nifb %d", @@ -163,10 +163,14 @@ vlib_validate_buffer_helper (vlib_main_t * vm, vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_free_list_t *fl; - if (pool_is_free_index (bm->buffer_free_list_pool, b->free_list_index)) - return format (0, "unknown free list 0x%x", b->free_list_index); + if (pool_is_free_index + (bm->buffer_free_list_pool, vlib_buffer_get_free_list_index (b))) + return format (0, "unknown free list 0x%x", + vlib_buffer_get_free_list_index (b)); - fl = pool_elt_at_index (bm->buffer_free_list_pool, b->free_list_index); + fl = + pool_elt_at_index (bm->buffer_free_list_pool, + vlib_buffer_get_free_list_index (b)); if ((signed) b->current_data < (signed) -VLIB_BUFFER_PRE_DATA_SIZE) return format (0, "current data %d before pre-data", b->current_data); @@ -388,7 +392,7 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm, f->name = clib_mem_is_vec (name) ? name : format (0, "%s", name); /* Setup free buffer template. */ - f->buffer_init_template.free_list_index = f->index; + vlib_buffer_set_free_list_index (&f->buffer_init_template, f->index); f->buffer_init_template.n_add_refs = 0; if (is_public) diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index b20538b7..c810db4e 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -72,6 +72,7 @@ typedef struct the end of this buffer. */ u32 flags; /**< buffer flags: +
VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index,
VLIB_BUFFER_IS_TRACED: trace this buffer.
VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer.
VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says @@ -82,28 +83,26 @@ typedef struct set to avoid adding it to a flow report
VLIB_BUFFER_FLAG_USER(n): user-defined bit N */ -#define VLIB_BUFFER_IS_TRACED (1 << 0) -#define VLIB_BUFFER_LOG2_NEXT_PRESENT (1) + +/* any change to the following line requres update of + * vlib_buffer_get_free_list_index(...) and + * vlib_buffer_set_free_list_index(...) functions */ +#define VLIB_BUFFER_FREE_LIST_INDEX_MASK ((1 << 4) - 1) + +#define VLIB_BUFFER_IS_TRACED (1 << 4) +#define VLIB_BUFFER_LOG2_NEXT_PRESENT (5) #define VLIB_BUFFER_NEXT_PRESENT (1 << VLIB_BUFFER_LOG2_NEXT_PRESENT) -#define VLIB_BUFFER_IS_RECYCLED (1 << 2) -#define VLIB_BUFFER_TOTAL_LENGTH_VALID (1 << 3) -#define VLIB_BUFFER_REPL_FAIL (1 << 4) -#define VLIB_BUFFER_RECYCLE (1 << 5) -#define VLIB_BUFFER_FLOW_REPORT (1 << 6) -#define VLIB_BUFFER_EXT_HDR_VALID (1 << 7) +#define VLIB_BUFFER_IS_RECYCLED (1 << 6) +#define VLIB_BUFFER_TOTAL_LENGTH_VALID (1 << 7) +#define VLIB_BUFFER_REPL_FAIL (1 << 8) +#define VLIB_BUFFER_RECYCLE (1 << 9) +#define VLIB_BUFFER_FLOW_REPORT (1 << 10) +#define VLIB_BUFFER_EXT_HDR_VALID (1 << 11) /* User defined buffer flags. */ #define LOG2_VLIB_BUFFER_FLAG_USER(n) (32 - (n)) #define VLIB_BUFFER_FLAG_USER(n) (1 << LOG2_VLIB_BUFFER_FLAG_USER(n)) - u32 free_list_index; /**< Buffer free list that this buffer was - allocated from and will be freed to. - */ - - u32 total_length_not_including_first_buffer; - /**< Only valid for first buffer in chain. Current length plus - total length given here give total number of bytes in buffer chain. - */ STRUCT_MARK (template_end); u32 next_buffer; /**< Next buffer for this linked-list of buffers. @@ -128,7 +127,7 @@ typedef struct Before allocating any of it, discussion required! */ - u32 opaque[8]; /**< Opaque data used by sub-graphs for their own purposes. + u32 opaque[10]; /**< Opaque data used by sub-graphs for their own purposes. See .../vnet/vnet/buffer.h */ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); @@ -137,7 +136,12 @@ typedef struct if VLIB_PACKET_IS_TRACED flag is set. */ u32 recycle_count; /**< Used by L2 path recycle code */ - u32 opaque2[14]; /**< More opaque data, currently unused */ + + u32 total_length_not_including_first_buffer; + /**< Only valid for first buffer in chain. Current length plus + total length given here give total number of bytes in buffer chain. + */ + u32 opaque2[13]; /**< More opaque data, currently unused */ /***** end of second cache line */ CLIB_CACHE_LINE_ALIGN_MARK (cacheline2); diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index 97442e12..1aaac0b2 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -106,12 +106,15 @@ uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, always_inline uword vlib_buffer_length_in_chain (vlib_main_t * vm, vlib_buffer_t * b) { - uword l = b->current_length + b->total_length_not_including_first_buffer; - if (PREDICT_FALSE ((b->flags & (VLIB_BUFFER_NEXT_PRESENT - | VLIB_BUFFER_TOTAL_LENGTH_VALID)) - == VLIB_BUFFER_NEXT_PRESENT)) - return vlib_buffer_length_in_chain_slow_path (vm, b); - return l; + uword len = b->current_length; + + if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0)) + return len; + + if (PREDICT_TRUE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)) + return len + b->total_length_not_including_first_buffer; + + return vlib_buffer_length_in_chain_slow_path (vm, b); } /** \brief Get length in bytes of the buffer index buffer chain @@ -261,6 +264,24 @@ vlib_buffer_round_size (u32 size) return round_pow2 (size, sizeof (vlib_buffer_t)); } +always_inline u32 +vlib_buffer_get_free_list_index (vlib_buffer_t * b) +{ + return b->flags & VLIB_BUFFER_FREE_LIST_INDEX_MASK; +} + +always_inline void +vlib_buffer_set_free_list_index (vlib_buffer_t * b, u32 index) +{ + /* if there is an need for more free lists we should consider + storig data in the 2nd cacheline */ + ASSERT (VLIB_BUFFER_FREE_LIST_INDEX_MASK & 1); + ASSERT (index <= VLIB_BUFFER_FREE_LIST_INDEX_MASK); + + b->flags &= ~VLIB_BUFFER_FREE_LIST_INDEX_MASK; + b->flags |= index & VLIB_BUFFER_FREE_LIST_INDEX_MASK; +} + /** \brief Allocate buffers from specific freelist into supplied array @param vm - (vlib_main_t *) vlib main data structure pointer @@ -381,7 +402,7 @@ vlib_buffer_get_buffer_free_list (vlib_main_t * vm, vlib_buffer_t * b, vlib_buffer_main_t *bm = vm->buffer_main; u32 i; - *index = i = b->free_list_index; + *index = i = vlib_buffer_get_free_list_index (b); return pool_elt_at_index (bm->buffer_free_list_pool, i); } @@ -569,7 +590,8 @@ vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, } n_buffers = vlib_buffer_alloc_from_free_list (vm, buffers, n_buffers, - s->free_list_index); + vlib_buffer_get_free_list_index + (s)); if (PREDICT_FALSE (n_buffers == 0)) { buffers[0] = src_buffer; @@ -581,7 +603,8 @@ vlib_buffer_clone (vlib_main_t * vm, u32 src_buffer, u32 * buffers, vlib_buffer_t *d = vlib_get_buffer (vm, buffers[i]); d->current_data = s->current_data; d->current_length = head_end_offset; - d->free_list_index = s->free_list_index; + vlib_buffer_set_free_list_index (d, + vlib_buffer_get_free_list_index (s)); d->total_length_not_including_first_buffer = s->total_length_not_including_first_buffer + s->current_length - head_end_offset; @@ -615,7 +638,8 @@ vlib_buffer_attach_clone (vlib_main_t * vm, vlib_buffer_t * head, vlib_buffer_t * tail) { ASSERT ((head->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - ASSERT (head->free_list_index == tail->free_list_index); + ASSERT (vlib_buffer_get_free_list_index (head) == + vlib_buffer_get_free_list_index (tail)); head->flags |= VLIB_BUFFER_NEXT_PRESENT; head->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; @@ -791,7 +815,7 @@ vlib_buffer_init_for_free_list (vlib_buffer_t * dst, CLIB_CACHE_LINE_BYTES * 2); /* Make sure buffer template is sane. */ - ASSERT (fl->index == fl->buffer_init_template.free_list_index); + ASSERT (fl->index == vlib_buffer_get_free_list_index (src)); clib_memcpy (STRUCT_MARK_PTR (dst, template_start), STRUCT_MARK_PTR (src, template_start), @@ -806,7 +830,6 @@ vlib_buffer_init_for_free_list (vlib_buffer_t * dst, _(current_data); _(current_length); _(flags); - _(free_list_index); #undef _ ASSERT (dst->total_length_not_including_first_buffer == 0); ASSERT (dst->n_add_refs == 0); @@ -832,7 +855,7 @@ vlib_buffer_init_two_for_free_list (vlib_buffer_t * dst0, vlib_buffer_t *src = &fl->buffer_init_template; /* Make sure buffer template is sane. */ - ASSERT (fl->index == fl->buffer_init_template.free_list_index); + ASSERT (fl->index == vlib_buffer_get_free_list_index (src)); clib_memcpy (STRUCT_MARK_PTR (dst0, template_start), STRUCT_MARK_PTR (src, template_start), @@ -853,7 +876,6 @@ vlib_buffer_init_two_for_free_list (vlib_buffer_t * dst0, _(current_data); _(current_length); _(flags); - _(free_list_index); #undef _ ASSERT (dst0->total_length_not_including_first_buffer == 0); diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 346c5495..06b843c6 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -843,7 +843,7 @@ bfd_udp4_find_headers (vlib_buffer_t * b, ip4_header_t ** ip4, udp_header_t ** udp) { /* sanity check first */ - const i32 start = vnet_buffer (b)->ip.start_of_ip_header; + const i32 start = vnet_buffer (b)->l3_hdr_offset; if (start < 0 && start < sizeof (b->pre_data)) { BFD_ERR ("Start of ip header is before pre_data, ignoring"); @@ -1000,7 +1000,7 @@ bfd_udp6_find_headers (vlib_buffer_t * b, ip6_header_t ** ip6, udp_header_t ** udp) { /* sanity check first */ - const i32 start = vnet_buffer (b)->ip.start_of_ip_header; + const i32 start = vnet_buffer (b)->l3_hdr_offset; if (start < 0 && start < sizeof (b->pre_data)) { BFD_ERR ("Start of ip header is before pre_data, ignoring"); diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 9aba34da..8647db00 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -71,7 +71,6 @@ #define VNET_BUFFER_SPAN_CLONE (1 << LOG2_VNET_BUFFER_SPAN_CLONE) #define foreach_buffer_opaque_union_subtype \ -_(ethernet) \ _(ip) \ _(swt) \ _(l2) \ @@ -100,16 +99,12 @@ _(tcp) typedef struct { u32 sw_if_index[VLIB_N_RX_TX]; + i16 l2_hdr_offset; + i16 l3_hdr_offset; + i16 l4_hdr_offset; union { - /* Ethernet. */ - struct - { - /* Saved value of current header by ethernet-input. */ - i32 start_of_ethernet_header; - } ethernet; - /* IP4/6 buffer opaque. */ struct { @@ -143,9 +138,6 @@ typedef struct u8 code; u32 data; } icmp; - - /* IP header offset from vlib_buffer.data - saved by ip*_local nodes */ - i32 start_of_ip_header; }; } ip; diff --git a/src/vnet/dhcp/dhcp4_proxy_node.c b/src/vnet/dhcp/dhcp4_proxy_node.c index 26e1e65c..1b59cdea 100644 --- a/src/vnet/dhcp/dhcp4_proxy_node.c +++ b/src/vnet/dhcp/dhcp4_proxy_node.c @@ -231,7 +231,7 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, o = (dhcp_option_t *) (((uword) o) + (o->length + 2)); } - fl = vlib_buffer_get_free_list (vm, b0->free_list_index); + fl = vlib_buffer_get_free_list (vm, vlib_buffer_get_free_list_index (b0)); // start write at (option*)o, some packets have padding if (((u8 *)o - (u8 *)b0->data + VPP_DHCP_OPTION82_SIZE) > fl->n_data_bytes) { diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index 885313a5..e109cc4c 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -306,7 +306,7 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, copy_ip6_address(&r1->link_addr, ia0); link_address_set: - fl = vlib_buffer_get_free_list (vm, b0->free_list_index); + fl = vlib_buffer_get_free_list (vm, vlib_buffer_get_free_list_index (b0)); if ((b0->current_length+sizeof(*id1)+sizeof(*vss1)+sizeof(*cmac)) > fl->n_data_bytes) diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h index dcc656a7..2fc5b804 100644 --- a/src/vnet/ethernet/ethernet.h +++ b/src/vnet/ethernet/ethernet.h @@ -344,8 +344,7 @@ ethernet_setup_node (vlib_main_t * vm, u32 node_index) always_inline ethernet_header_t * ethernet_buffer_get_header (vlib_buffer_t * b) { - return (void *) - (b->data + vnet_buffer (b)->ethernet.start_of_ethernet_header); + return (void *) (b->data + vnet_buffer (b)->l2_hdr_offset); } /** Returns the number of VLAN headers in the current Ethernet frame in the diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index d9fdff48..421d501a 100755 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -101,7 +101,7 @@ parse_header (ethernet_input_variant_t variant, e0 = (void *) (b0->data + b0->current_data); - vnet_buffer (b0)->ethernet.start_of_ethernet_header = b0->current_data; + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; vlib_buffer_advance (b0, sizeof (e0[0])); @@ -205,9 +205,7 @@ identify_subint (vnet_hw_interface_t * hi, if (!(*is_l2)) { ethernet_header_t *e0; - e0 = - (void *) (b0->data + - vnet_buffer (b0)->ethernet.start_of_ethernet_header); + e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset); if (!(ethernet_address_cast (e0->dst_address))) { @@ -238,7 +236,7 @@ determine_next_node (ethernet_main_t * em, { *next0 = em->l2_next; // record the L2 len and reset the buffer so the L2 header is preserved - u32 eth_start = vnet_buffer (b0)->ethernet.start_of_ethernet_header; + u32 eth_start = vnet_buffer (b0)->l2_hdr_offset; vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start; ASSERT (vnet_buffer (b0)->l2.l2_len == ethernet_buffer_header_size (b0)); @@ -424,10 +422,8 @@ ethernet_input_inline (vlib_main_t * vm, cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2; } - vnet_buffer (b0)->ethernet.start_of_ethernet_header = - b0->current_data; - vnet_buffer (b1)->ethernet.start_of_ethernet_header = - b1->current_data; + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; + vnet_buffer (b1)->l2_hdr_offset = b1->current_data; if (PREDICT_TRUE (is_l20 != 0)) { @@ -519,9 +515,9 @@ ethernet_input_inline (vlib_main_t * vm, { len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data - - vnet_buffer (b0)->ethernet.start_of_ethernet_header; + - vnet_buffer (b0)->l2_hdr_offset; len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data - - vnet_buffer (b1)->ethernet.start_of_ethernet_header; + - vnet_buffer (b1)->l2_hdr_offset; stats_n_packets += 2; stats_n_bytes += len0 + len1; @@ -646,8 +642,7 @@ ethernet_input_inline (vlib_main_t * vm, cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2; } - vnet_buffer (b0)->ethernet.start_of_ethernet_header = - b0->current_data; + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; if (PREDICT_TRUE (is_l20 != 0)) { @@ -710,7 +705,7 @@ ethernet_input_inline (vlib_main_t * vm, { len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data - - vnet_buffer (b0)->ethernet.start_of_ethernet_header; + - vnet_buffer (b0)->l2_hdr_offset; stats_n_packets += 1; stats_n_bytes += len0; diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 8263e01c..b8dfa847 100755 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1585,8 +1585,8 @@ ip4_local_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); ip1 = vlib_buffer_get_current (p1); - vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; - vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data; + vnet_buffer (p0)->l3_hdr_offset = p0->current_data; + vnet_buffer (p1)->l3_hdr_offset = p1->current_data; sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; @@ -1788,7 +1788,7 @@ ip4_local_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); - vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; + vnet_buffer (p0)->l3_hdr_offset = p0->current_data; sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 4b574b9a..2b8c2bd2 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -1362,8 +1362,8 @@ ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip0 = vlib_buffer_get_current (p0); ip1 = vlib_buffer_get_current (p1); - vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; - vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data; + vnet_buffer (p0)->l3_hdr_offset = p0->current_data; + vnet_buffer (p1)->l3_hdr_offset = p1->current_data; type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol]; type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol]; @@ -1493,7 +1493,7 @@ ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip0 = vlib_buffer_get_current (p0); - vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; + vnet_buffer (p0)->l3_hdr_offset = p0->current_data; type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol]; next0 = lm->local_next_by_ip_protocol[ip0->protocol]; diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index b8f6f9b1..68a8cbbc 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -1479,9 +1479,8 @@ icmp6_router_solicitation (vlib_main_t * vm, sizeof (icmp6_router_advertisement_header_t); vlib_buffer_add_data (vm, - p0->free_list_index, - bi0, - (void *) &rh, + vlib_buffer_get_free_list_index + (p0), bi0, (void *) &rh, sizeof (icmp6_router_advertisement_header_t)); @@ -1499,9 +1498,8 @@ icmp6_router_solicitation (vlib_main_t * vm, eth_if0->address, 6); vlib_buffer_add_data (vm, - p0->free_list_index, - bi0, - (void *) &h, + vlib_buffer_get_free_list_index + (p0), bi0, (void *) &h, sizeof (icmp6_neighbor_discovery_ethernet_link_layer_address_option_t)); @@ -1525,9 +1523,8 @@ icmp6_router_solicitation (vlib_main_t * vm, sizeof (icmp6_neighbor_discovery_mtu_option_t); vlib_buffer_add_data (vm, - p0->free_list_index, - bi0, - (void *) &h, + vlib_buffer_get_free_list_index + (p0), bi0, (void *) &h, sizeof (icmp6_neighbor_discovery_mtu_option_t)); } @@ -1579,7 +1576,7 @@ icmp6_router_solicitation (vlib_main_t * vm, payload_length += sizeof( icmp6_neighbor_discovery_prefix_information_option_t); vlib_buffer_add_data (vm, - p0->free_list_index, + vlib_buffer_get_free_list_index (p0), bi0, (void *)&h, sizeof(icmp6_neighbor_discovery_prefix_information_option_t)); @@ -2326,7 +2323,7 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index) num_addr_records++; vlib_buffer_add_data - (vm, b0->free_list_index, bo0, + (vm, vlib_buffer_get_free_list_index (b0), bo0, (void *)&rr, sizeof(icmp6_multicast_address_record_t)); payload_length += sizeof( icmp6_multicast_address_record_t); diff --git a/src/vnet/l2/l2_bvi.h b/src/vnet/l2/l2_bvi.h index e21a1616..662ec402 100644 --- a/src/vnet/l2/l2_bvi.h +++ b/src/vnet/l2/l2_bvi.h @@ -57,7 +57,7 @@ l2_to_bvi (vlib_main_t * vlib_main, } /* Save L2 header position which may be changed due to packet replication */ - vnet_buffer (b0)->ethernet.start_of_ethernet_header = b0->current_data; + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; /* Strip L2 header */ l2_len = vnet_buffer (b0)->l2.l2_len; diff --git a/src/vnet/lisp-cp/control.c b/src/vnet/lisp-cp/control.c index 22b5c82c..d8a1372d 100644 --- a/src/vnet/lisp-cp/control.c +++ b/src/vnet/lisp-cp/control.c @@ -3706,7 +3706,7 @@ send_map_reply (lisp_cp_main_t * lcm, u32 mi, ip_address_t * dst, static void find_ip_header (vlib_buffer_t * b, u8 ** ip_hdr) { - const i32 start = vnet_buffer (b)->ip.start_of_ip_header; + const i32 start = vnet_buffer (b)->l3_hdr_offset; if (start < 0 && start < -sizeof (b->pre_data)) { *ip_hdr = 0; diff --git a/src/vnet/replication.c b/src/vnet/replication.c index 1c6f28d2..0fdca0bf 100644 --- a/src/vnet/replication.c +++ b/src/vnet/replication.c @@ -43,12 +43,12 @@ replication_prep (vlib_main_t * vm, ctx_id = ctx - rm->contexts[thread_index]; /* Save state from vlib buffer */ - ctx->saved_free_list_index = b0->free_list_index; + ctx->saved_free_list_index = vlib_buffer_get_free_list_index (b0); ctx->current_data = b0->current_data; /* Set up vlib buffer hooks */ b0->recycle_count = ctx_id; - b0->free_list_index = rm->recycle_list_index; + vlib_buffer_set_free_list_index (b0, rm->recycle_list_index); b0->flags |= VLIB_BUFFER_RECYCLE; /* Save feature state */ @@ -129,7 +129,7 @@ replication_recycle (vlib_main_t * vm, vlib_buffer_t * b0, u32 is_last) * This is the last replication in the list. * Restore original buffer free functionality. */ - b0->free_list_index = ctx->saved_free_list_index; + vlib_buffer_set_free_list_index (b0, ctx->saved_free_list_index); b0->flags &= ~VLIB_BUFFER_RECYCLE; /* Free context back to its pool */ -- cgit 1.2.3-korg From 213b5aae860c2a9d5de8d4d070d0d2091af699f5 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 13 Jul 2017 21:19:27 +0200 Subject: vnet_buffer_t flags cleanup Change-Id: I123eccea98abafeb31f25d2a162501e2eded60d4 Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/init.c | 7 +++--- src/plugins/gtpu/gtpu_decap.c | 18 ++++++++-------- src/plugins/ixge/ixge.c | 12 +++++------ src/vnet/bfd/bfd_udp.c | 4 ++-- src/vnet/buffer.h | 49 +++++++++++++++++++++--------------------- src/vnet/ethernet/ethernet.h | 14 ++++++------ src/vnet/handoff.c | 2 +- src/vnet/ip/icmp4.c | 6 +++--- src/vnet/ip/ip4_forward.c | 38 ++++++++++++++++---------------- src/vnet/ip/ip6_forward.c | 44 +++++++++++++++++++++---------------- src/vnet/ip/ip6_neighbor.c | 4 ++-- src/vnet/session/node.c | 2 +- src/vnet/span/node.c | 4 ++-- src/vnet/tcp/tcp_output.c | 8 +++---- src/vnet/vxlan-gpe/decap.c | 18 ++++++++-------- src/vnet/vxlan/decap.c | 18 ++++++++-------- 16 files changed, 127 insertions(+), 121 deletions(-) (limited to 'src/vnet/bfd') diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 2ec1664b..7ca3d358 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -196,8 +196,8 @@ dpdk_lib_init (dpdk_main_t * dm) "dpdk rx"); if (dm->conf->enable_tcp_udp_checksum) - dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT - | IP_BUFFER_L4_CHECKSUM_COMPUTED); + dm->buffer_flags_template &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT + | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED); /* vlib_buffer_t template */ vec_validate_aligned (dm->buffer_templates, tm->n_vlib_mains - 1, @@ -1544,7 +1544,8 @@ dpdk_init (vlib_main_t * vm) /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ dm->buffer_flags_template = (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID - | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); + | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | + VNET_BUFFER_F_L4_CHECKSUM_CORRECT); dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL; diff --git a/src/plugins/gtpu/gtpu_decap.c b/src/plugins/gtpu/gtpu_decap.c index fc74e7cb..de235889 100644 --- a/src/plugins/gtpu/gtpu_decap.c +++ b/src/plugins/gtpu/gtpu_decap.c @@ -982,7 +982,7 @@ ip_gtpu_bypass_inline (vlib_main_t * vm, } flags0 = b0->flags; - good_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; /* Don't verify UDP checksum for packets with explicit zero checksum. */ good_udp0 |= udp0->checksum == 0; @@ -998,14 +998,14 @@ ip_gtpu_bypass_inline (vlib_main_t * vm, /* Verify UDP checksum */ if (PREDICT_FALSE (!good_udp0)) { - if ((flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED) == 0) + if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) { if (is_ip4) flags0 = ip4_tcp_udp_validate_checksum (vm, b0); else flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0); good_udp0 = - (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; } } @@ -1064,7 +1064,7 @@ ip_gtpu_bypass_inline (vlib_main_t * vm, } flags1 = b1->flags; - good_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; /* Don't verify UDP checksum for packets with explicit zero checksum. */ good_udp1 |= udp1->checksum == 0; @@ -1080,14 +1080,14 @@ ip_gtpu_bypass_inline (vlib_main_t * vm, /* Verify UDP checksum */ if (PREDICT_FALSE (!good_udp1)) { - if ((flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED) == 0) + if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) { if (is_ip4) flags1 = ip4_tcp_udp_validate_checksum (vm, b1); else flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1); good_udp1 = - (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; } } @@ -1182,7 +1182,7 @@ ip_gtpu_bypass_inline (vlib_main_t * vm, } flags0 = b0->flags; - good_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; /* Don't verify UDP checksum for packets with explicit zero checksum. */ good_udp0 |= udp0->checksum == 0; @@ -1198,14 +1198,14 @@ ip_gtpu_bypass_inline (vlib_main_t * vm, /* Verify UDP checksum */ if (PREDICT_FALSE (!good_udp0)) { - if ((flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED) == 0) + if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) { if (is_ip4) flags0 = ip4_tcp_udp_validate_checksum (vm, b0); else flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0); good_udp0 = - (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; } } diff --git a/src/plugins/ixge/ixge.c b/src/plugins/ixge/ixge.c index 628d6d71..e0150f41 100644 --- a/src/plugins/ixge/ixge.c +++ b/src/plugins/ixge/ixge.c @@ -656,11 +656,11 @@ ixge_rx_next_and_error_from_status_x1 (ixge_device_t * xd, f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) - ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); + ? VNET_BUFFER_F_L4_CHECKSUM_COMPUTED : 0); f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) - ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); + ? 0 : VNET_BUFFER_F_L4_CHECKSUM_CORRECT); *error0 = e0; *next0 = n0; @@ -715,17 +715,17 @@ ixge_rx_next_and_error_from_status_x2 (ixge_device_t * xd, f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) - ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); + ? VNET_BUFFER_F_L4_CHECKSUM_COMPUTED : 0); f1 = ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) - ? IP_BUFFER_L4_CHECKSUM_COMPUTED : 0); + ? VNET_BUFFER_F_L4_CHECKSUM_COMPUTED : 0); f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) - ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); + ? 0 : VNET_BUFFER_F_L4_CHECKSUM_CORRECT); f1 |= ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) - ? 0 : IP_BUFFER_L4_CHECKSUM_CORRECT); + ? 0 : VNET_BUFFER_F_L4_CHECKSUM_CORRECT); *flags0 = f0; *flags1 = f1; diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 06b843c6..533d98d6 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -246,7 +246,7 @@ bfd_add_udp4_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, const bfd_udp_key_t *key = &bus->key; vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; @@ -301,7 +301,7 @@ bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs, const bfd_udp_key_t *key = &bus->key; vlib_buffer_t *b = vlib_get_buffer (vm, bi); - b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; vnet_buffer (b)->ip.adj_index[VLIB_RX] = bus->adj_index; vnet_buffer (b)->ip.adj_index[VLIB_TX] = bus->adj_index; vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 8647db00..52dada30 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -42,33 +42,32 @@ #include -/* VLIB buffer flags for ip4/ip6 packets. Set by input interfaces for ip4/ip6 - tcp/udp packets with hardware computed checksums. */ -#define LOG2_IP_BUFFER_L4_CHECKSUM_COMPUTED LOG2_VLIB_BUFFER_FLAG_USER(1) -#define LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT LOG2_VLIB_BUFFER_FLAG_USER(2) -#define IP_BUFFER_L4_CHECKSUM_COMPUTED (1 << LOG2_IP_BUFFER_L4_CHECKSUM_COMPUTED) -#define IP_BUFFER_L4_CHECKSUM_CORRECT (1 << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT) - -/* VLAN header flags. - * These bits are zeroed in vlib_buffer_init_for_free_list() - * meaning wherever the buffer comes from they have a reasonable - * value (eg, if ip4/ip6 generates the packet.) - */ -#define LOG2_ETH_BUFFER_VLAN_2_DEEP LOG2_VLIB_BUFFER_FLAG_USER(3) -#define LOG2_ETH_BUFFER_VLAN_1_DEEP LOG2_VLIB_BUFFER_FLAG_USER(4) -#define ETH_BUFFER_VLAN_2_DEEP (1 << LOG2_ETH_BUFFER_VLAN_2_DEEP) -#define ETH_BUFFER_VLAN_1_DEEP (1 << LOG2_ETH_BUFFER_VLAN_1_DEEP) -#define ETH_BUFFER_VLAN_BITS (ETH_BUFFER_VLAN_1_DEEP | \ - ETH_BUFFER_VLAN_2_DEEP) - -#define LOG2_BUFFER_HANDOFF_NEXT_VALID LOG2_VLIB_BUFFER_FLAG_USER(6) -#define BUFFER_HANDOFF_NEXT_VALID (1 << LOG2_BUFFER_HANDOFF_NEXT_VALID) +#define foreach_vnet_buffer_field \ + _( 1, L4_CHECKSUM_COMPUTED) \ + _( 2, L4_CHECKSUM_CORRECT) \ + _( 3, VLAN_2_DEEP) \ + _( 4, VLAN_1_DEEP) \ + _( 6, HANDOFF_NEXT_VALID) \ + _( 7, LOCALLY_ORIGINATED) \ + _( 8, SPAN_CLONE) + +#define VNET_BUFFER_FLAGS_VLAN_BITS \ + (VNET_BUFFER_F_VLAN_1_DEEP | VNET_BUFFER_F_VLAN_2_DEEP) + +enum +{ +#define _(bit, name) VNET_BUFFER_F_##name = (1 << LOG2_VLIB_BUFFER_FLAG_USER(bit)), + foreach_vnet_buffer_field +#undef _ +}; -#define LOG2_VNET_BUFFER_LOCALLY_ORIGINATED LOG2_VLIB_BUFFER_FLAG_USER(7) -#define VNET_BUFFER_LOCALLY_ORIGINATED (1 << LOG2_VNET_BUFFER_LOCALLY_ORIGINATED) +enum +{ +#define _(bit, name) VNET_BUFFER_F_LOG2_##name = LOG2_VLIB_BUFFER_FLAG_USER(bit), + foreach_vnet_buffer_field +#undef _ +}; -#define LOG2_VNET_BUFFER_SPAN_CLONE LOG2_VLIB_BUFFER_FLAG_USER(8) -#define VNET_BUFFER_SPAN_CLONE (1 << LOG2_VNET_BUFFER_SPAN_CLONE) #define foreach_buffer_opaque_union_subtype \ _(ip) \ diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h index 2fc5b804..d9ab8c10 100644 --- a/src/vnet/ethernet/ethernet.h +++ b/src/vnet/ethernet/ethernet.h @@ -352,7 +352,7 @@ ethernet_buffer_get_header (vlib_buffer_t * b) * the number of headers is not known. */ #define ethernet_buffer_get_vlan_count(b) ( \ - ((b)->flags & ETH_BUFFER_VLAN_BITS) >> LOG2_ETH_BUFFER_VLAN_1_DEEP \ + ((b)->flags & VNET_BUFFER_FLAGS_VLAN_BITS) >> VNET_BUFFER_F_LOG2_VLAN_1_DEEP \ ) /** Sets the number of VLAN headers in the current Ethernet frame in the @@ -360,8 +360,8 @@ ethernet_buffer_get_header (vlib_buffer_t * b) * the number of headers is not known. */ #define ethernet_buffer_set_vlan_count(b, v) ( \ - (b)->flags = ((b)->flags & ~ETH_BUFFER_VLAN_BITS) | \ - (((v) << LOG2_ETH_BUFFER_VLAN_1_DEEP) & ETH_BUFFER_VLAN_BITS) \ + (b)->flags = ((b)->flags & ~VNET_BUFFER_FLAGS_VLAN_BITS) | \ + (((v) << VNET_BUFFER_F_LOG2_VLAN_1_DEEP) & VNET_BUFFER_FLAGS_VLAN_BITS) \ ) /** Adjusts the vlan count by the delta in 'v' */ @@ -372,10 +372,10 @@ ethernet_buffer_get_header (vlib_buffer_t * b) /** Adjusts the vlan count by the header size byte delta in 'v' */ #define ethernet_buffer_adjust_vlan_count_by_bytes(b, v) ( \ - (b)->flags = ((b)->flags & ~ETH_BUFFER_VLAN_BITS) | (( \ - ((b)->flags & ETH_BUFFER_VLAN_BITS) + \ - ((v) << (LOG2_ETH_BUFFER_VLAN_1_DEEP - 2)) \ - ) & ETH_BUFFER_VLAN_BITS) \ + (b)->flags = ((b)->flags & ~VNET_BUFFER_FLAGS_VLAN_BITS) | (( \ + ((b)->flags & VNET_BUFFER_FLAGS_VLAN_BITS) + \ + ((v) << (VNET_BUFFER_F_LOG2_VLAN_1_DEEP - 2)) \ + ) & VNET_BUFFER_FLAGS_VLAN_BITS) \ ) /** diff --git a/src/vnet/handoff.c b/src/vnet/handoff.c index 9f3c93b4..81cb9f55 100644 --- a/src/vnet/handoff.c +++ b/src/vnet/handoff.c @@ -130,7 +130,7 @@ worker_handoff_node_fn (vlib_main_t * vm, /* if input node did not specify next index, then packet should go to eternet-input */ - if (PREDICT_FALSE ((b0->flags & BUFFER_HANDOFF_NEXT_VALID) == 0)) + if (PREDICT_FALSE ((b0->flags & VNET_BUFFER_F_HANDOFF_NEXT_VALID) == 0)) vnet_buffer (b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT; else if (vnet_buffer (b0)->handoff.next_index == diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c index c3afff72..bbeab32b 100644 --- a/src/vnet/ip/icmp4.c +++ b/src/vnet/ip/icmp4.c @@ -336,8 +336,8 @@ ip4_icmp_echo_request (vlib_main_t * vm, ASSERT (ip0->checksum == ip4_header_checksum (ip0)); ASSERT (ip1->checksum == ip4_header_checksum (ip1)); - p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; - p1->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + p1->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; } while (n_left_from > 0 && n_left_to_next > 0) @@ -392,7 +392,7 @@ ip4_icmp_echo_request (vlib_main_t * vm, ASSERT (ip0->checksum == ip4_header_checksum (ip0)); - p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; } vlib_put_next_frame (vm, node, next, n_left_to_next); diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index b8dfa847..8dd927d4 100755 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1509,15 +1509,15 @@ ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) udp0 = (void *) (ip0 + 1); if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0) { - p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED - | IP_BUFFER_L4_CHECKSUM_CORRECT); + p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED + | VNET_BUFFER_F_L4_CHECKSUM_CORRECT); return p0->flags; } sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0); - p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED - | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT)); + p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED + | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT)); return p0->flags; } @@ -1629,8 +1629,8 @@ ip4_local_inline (vlib_main_t * vm, flags0 = p0->flags; flags1 = p1->flags; - good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; - good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_tcp_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + good_tcp_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; udp0 = ip4_next_header (ip0); udp1 = ip4_next_header (ip1); @@ -1657,19 +1657,19 @@ ip4_local_inline (vlib_main_t * vm, if (is_tcp_udp0) { if (is_tcp_udp0 - && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)) + && !(flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)) flags0 = ip4_tcp_udp_validate_checksum (vm, p0); good_tcp_udp0 = - (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; } if (is_tcp_udp1) { if (is_tcp_udp1 - && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED)) + && !(flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)) flags1 = ip4_tcp_udp_validate_checksum (vm, p1); good_tcp_udp1 = - (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; good_tcp_udp1 |= is_udp1 && udp1->checksum == 0; } } @@ -1817,7 +1817,7 @@ ip4_local_inline (vlib_main_t * vm, flags0 = p0->flags; - good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_tcp_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; udp0 = ip4_next_header (ip0); @@ -1837,10 +1837,10 @@ ip4_local_inline (vlib_main_t * vm, if (is_tcp_udp0) { if (is_tcp_udp0 - && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)) + && !(flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)) flags0 = ip4_tcp_udp_validate_checksum (vm, p0); good_tcp_udp0 = - (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; } } @@ -2428,7 +2428,7 @@ ip4_rewrite_inline (vlib_main_t * vm, /* Decrement TTL & update checksum. Works either endian, so no need for byte swap. */ - if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) + if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) { i32 ttl0 = ip0->ttl; @@ -2461,9 +2461,9 @@ ip4_rewrite_inline (vlib_main_t * vm, } else { - p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; } - if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) + if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) { i32 ttl1 = ip1->ttl; @@ -2496,7 +2496,7 @@ ip4_rewrite_inline (vlib_main_t * vm, } else { - p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; } /* Rewrite packet header and updates lengths. */ @@ -2614,7 +2614,7 @@ ip4_rewrite_inline (vlib_main_t * vm, next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */ /* Decrement TTL & update checksum. */ - if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) + if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) { i32 ttl0 = ip0->ttl; @@ -2648,7 +2648,7 @@ ip4_rewrite_inline (vlib_main_t * vm, } else { - p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; } if (do_counters) diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 2b8c2bd2..604e1492 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -1284,15 +1284,15 @@ ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) udp0 = (void *) (ip0 + 1); if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0) { - p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED - | IP_BUFFER_L4_CHECKSUM_CORRECT); + p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED + | VNET_BUFFER_F_L4_CHECKSUM_CORRECT); return p0->flags; } sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length); - p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED - | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT)); + p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED + | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT)); return p0->flags; } @@ -1374,8 +1374,10 @@ ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) flags0 = p0->flags; flags1 = p1->flags; - good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; - good_l4_checksum1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_l4_checksum0 = + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + good_l4_checksum1 = + (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; len_diff0 = 0; len_diff1 = 0; @@ -1414,19 +1416,21 @@ ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN && !good_l4_checksum0 - && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))) + && !(flags0 & + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))) { flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0); good_l4_checksum0 = - (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; } if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN && !good_l4_checksum1 - && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))) + && !(flags1 & + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))) { flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1); good_l4_checksum1 = - (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; } error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL; @@ -1500,7 +1504,8 @@ ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) flags0 = p0->flags; - good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_l4_checksum0 = + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; len_diff0 = 0; if (PREDICT_TRUE (IP_PROTOCOL_UDP == ip6_locate_header (p0, ip0, @@ -1522,11 +1527,12 @@ ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN && !good_l4_checksum0 - && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))) + && !(flags0 & + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))) { flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0); good_l4_checksum0 = - (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; } error0 = IP6_ERROR_UNKNOWN_PROTOCOL; @@ -2019,7 +2025,7 @@ ip6_rewrite_inline (vlib_main_t * vm, error0 = error1 = IP6_ERROR_NONE; next0 = next1 = IP6_REWRITE_NEXT_DROP; - if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) + if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) { i32 hop_limit0 = ip0->hop_limit; @@ -2046,9 +2052,9 @@ ip6_rewrite_inline (vlib_main_t * vm, } else { - p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; } - if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) + if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) { i32 hop_limit1 = ip1->hop_limit; @@ -2075,7 +2081,7 @@ ip6_rewrite_inline (vlib_main_t * vm, } else { - p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; } adj0 = adj_get (adj_index0); adj1 = adj_get (adj_index1); @@ -2186,7 +2192,7 @@ ip6_rewrite_inline (vlib_main_t * vm, next0 = IP6_REWRITE_NEXT_DROP; /* Check hop limit */ - if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) + if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) { i32 hop_limit0 = ip0->hop_limit; @@ -2212,7 +2218,7 @@ ip6_rewrite_inline (vlib_main_t * vm, } else { - p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; } /* Guess we are only writing on simple Ethernet header. */ diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 68a8cbbc..e8eebd4e 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -1649,7 +1649,7 @@ icmp6_router_solicitation (vlib_main_t * vm, adj_index0; } } - p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; radv_info->n_solicitations_dropped += is_dropped; radv_info->n_solicitations_rcvd += is_solicitation; @@ -2348,7 +2348,7 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index) vnet_main.local_interface_sw_if_index; vnet_buffer (b0)->ip.adj_index[VLIB_TX] = radv_info->mcast_adj_index; - b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite-mcast"); diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c index 56e62637..983b78b8 100644 --- a/src/vnet/session/node.c +++ b/src/vnet/session/node.c @@ -256,7 +256,7 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); b0->error = 0; b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID - | VNET_BUFFER_LOCALLY_ORIGINATED; + | VNET_BUFFER_F_LOCALLY_ORIGINATED; b0->current_data = 0; b0->total_length_not_including_first_buffer = 0; diff --git a/src/vnet/span/node.c b/src/vnet/span/node.c index 2a43b6e3..3a461b0a 100644 --- a/src/vnet/span/node.c +++ b/src/vnet/span/node.c @@ -77,7 +77,7 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0, return; /* Don't do it again */ - if (PREDICT_FALSE (b0->flags & VNET_BUFFER_SPAN_CLONE)) + if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_SPAN_CLONE)) return; /* *INDENT-OFF* */ @@ -92,7 +92,7 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0, if (PREDICT_TRUE(c0 != 0)) { vnet_buffer (c0)->sw_if_index[VLIB_TX] = i; - c0->flags |= VNET_BUFFER_SPAN_CLONE; + c0->flags |= VNET_BUFFER_F_SPAN_CLONE; to_mirror_next[0] = vlib_get_buffer_index (vm, c0); mirror_frames[i]->n_vectors++; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index b418e8ba..f34eb797 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -585,7 +585,7 @@ tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, u32 *to_next, next_index; vlib_frame_t *f; - b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; b->error = 0; /* Default FIB for now */ @@ -847,7 +847,7 @@ tcp_enqueue_to_output (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, u8 is_ip4) u32 *to_next, next_index; vlib_frame_t *f; - b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; b->error = 0; /* Decide where to send the packet */ @@ -1563,7 +1563,7 @@ tcp46_output_inline (vlib_main_t * vm, vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0; - b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; done: b0->error = node->errors[error0]; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) @@ -1728,7 +1728,7 @@ tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node, done: b0->error = node->errors[error0]; - b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { th0 = vlib_buffer_get_current (b0); diff --git a/src/vnet/vxlan-gpe/decap.c b/src/vnet/vxlan-gpe/decap.c index 075b0f51..1b3a8b00 100644 --- a/src/vnet/vxlan-gpe/decap.c +++ b/src/vnet/vxlan-gpe/decap.c @@ -857,7 +857,7 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm, } flags0 = b0->flags; - good_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; /* Don't verify UDP checksum for packets with explicit zero checksum. */ good_udp0 |= udp0->checksum == 0; @@ -873,14 +873,14 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm, /* Verify UDP checksum */ if (PREDICT_FALSE (!good_udp0)) { - if ((flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED) == 0) + if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) { if (is_ip4) flags0 = ip4_tcp_udp_validate_checksum (vm, b0); else flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0); good_udp0 = - (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; } } @@ -939,7 +939,7 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm, } flags1 = b1->flags; - good_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; /* Don't verify UDP checksum for packets with explicit zero checksum. */ good_udp1 |= udp1->checksum == 0; @@ -955,14 +955,14 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm, /* Verify UDP checksum */ if (PREDICT_FALSE (!good_udp1)) { - if ((flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED) == 0) + if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) { if (is_ip4) flags1 = ip4_tcp_udp_validate_checksum (vm, b1); else flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1); good_udp1 = - (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; } } @@ -1055,7 +1055,7 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm, } flags0 = b0->flags; - good_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; /* Don't verify UDP checksum for packets with explicit zero checksum. */ good_udp0 |= udp0->checksum == 0; @@ -1071,14 +1071,14 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm, /* Verify UDP checksum */ if (PREDICT_FALSE (!good_udp0)) { - if ((flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED) == 0) + if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) { if (is_ip4) flags0 = ip4_tcp_udp_validate_checksum (vm, b0); else flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0); good_udp0 = - (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; } } diff --git a/src/vnet/vxlan/decap.c b/src/vnet/vxlan/decap.c index 2acb1f6f..0dc89d3f 100644 --- a/src/vnet/vxlan/decap.c +++ b/src/vnet/vxlan/decap.c @@ -916,7 +916,7 @@ ip_vxlan_bypass_inline (vlib_main_t * vm, } flags0 = b0->flags; - good_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; /* Don't verify UDP checksum for packets with explicit zero checksum. */ good_udp0 |= udp0->checksum == 0; @@ -932,14 +932,14 @@ ip_vxlan_bypass_inline (vlib_main_t * vm, /* Verify UDP checksum */ if (PREDICT_FALSE (!good_udp0)) { - if ((flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED) == 0) + if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) { if (is_ip4) flags0 = ip4_tcp_udp_validate_checksum (vm, b0); else flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0); good_udp0 = - (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; } } @@ -998,7 +998,7 @@ ip_vxlan_bypass_inline (vlib_main_t * vm, } flags1 = b1->flags; - good_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; /* Don't verify UDP checksum for packets with explicit zero checksum. */ good_udp1 |= udp1->checksum == 0; @@ -1014,14 +1014,14 @@ ip_vxlan_bypass_inline (vlib_main_t * vm, /* Verify UDP checksum */ if (PREDICT_FALSE (!good_udp1)) { - if ((flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED) == 0) + if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) { if (is_ip4) flags1 = ip4_tcp_udp_validate_checksum (vm, b1); else flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1); good_udp1 = - (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; } } @@ -1116,7 +1116,7 @@ ip_vxlan_bypass_inline (vlib_main_t * vm, } flags0 = b0->flags; - good_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; /* Don't verify UDP checksum for packets with explicit zero checksum. */ good_udp0 |= udp0->checksum == 0; @@ -1132,14 +1132,14 @@ ip_vxlan_bypass_inline (vlib_main_t * vm, /* Verify UDP checksum */ if (PREDICT_FALSE (!good_udp0)) { - if ((flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED) == 0) + if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0) { if (is_ip4) flags0 = ip4_tcp_udp_validate_checksum (vm, b0); else flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0); good_udp0 = - (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; } } -- cgit 1.2.3-korg