From 239790fd91b3f62e5eda1042a97f9216fe59856e Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Thu, 16 Feb 2017 10:53:53 +0100 Subject: BFD: echo function Change-Id: Ib1e301d62b687d4e42434239e7cd412065c28da0 Signed-off-by: Klement Sekera --- src/vnet/bfd/bfd.api | 38 ++- src/vnet/bfd/bfd_api.c | 31 ++- src/vnet/bfd/bfd_api.h | 26 +- src/vnet/bfd/bfd_debug.h | 2 +- src/vnet/bfd/bfd_main.c | 689 +++++++++++++++++++++++++++++++++-------------- src/vnet/bfd/bfd_main.h | 94 +++++-- src/vnet/bfd/bfd_udp.c | 286 +++++++++++++++----- src/vnet/bfd/bfd_udp.h | 16 +- 8 files changed, 864 insertions(+), 318 deletions(-) (limited to 'src') diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api index f307ed2a193..93bf0fb90e8 100644 --- a/src/vnet/bfd/bfd.api +++ b/src/vnet/bfd/bfd.api @@ -13,29 +13,43 @@ * limitations under the License. */ -/** \brief Configure BFD feature +/** \brief Set BFD echo source @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param slow_timer - slow timer (seconds) - @param min_tx - desired min tx interval - @param min_rx - desired min rx interval - @param detect_mult - desired detection multiplier + @param sw_if_index - interface to use as echo source */ -define bfd_set_config +define bfd_udp_set_echo_source +{ + u32 client_index; + u32 context; + u32 sw_if_index; +}; + +/** \brief Set BFD feature response + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define bfd_udp_set_echo_source_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Delete BFD echo source + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define bfd_udp_del_echo_source { u32 client_index; u32 context; - u32 slow_timer; - u32 min_tx; - u32 min_rx; - u8 detect_mult; }; -/** \brief Configure BFD feature response +/** \brief Delete BFD feature response @param context - sender context, to match reply w/ request @param retval - return code for the request */ -define bfd_set_config_reply +define bfd_udp_del_echo_source_reply { u32 context; i32 retval; diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c index af70f0ec2d5..6632eae4c91 100644 --- a/src/vnet/bfd/bfd_api.c +++ b/src/vnet/bfd/bfd_api.c @@ -54,7 +54,9 @@ _ (BFD_AUTH_DEL_KEY, bfd_auth_del_key) \ _ (BFD_AUTH_KEYS_DUMP, bfd_auth_keys_dump) \ _ (BFD_UDP_AUTH_ACTIVATE, bfd_udp_auth_activate) \ - _ (BFD_UDP_AUTH_DEACTIVATE, bfd_udp_auth_deactivate) + _ (BFD_UDP_AUTH_DEACTIVATE, bfd_udp_auth_deactivate) \ + _ (BFD_UDP_SET_ECHO_SOURCE, bfd_udp_set_echo_source) \ + _ (BFD_UDP_DEL_ECHO_SOURCE, bfd_udp_del_echo_source) pub_sub_handler (bfd_events, BFD_EVENTS); @@ -314,6 +316,33 @@ vl_api_bfd_udp_auth_deactivate_t_handler (vl_api_bfd_udp_auth_deactivate_t * REPLY_MACRO (VL_API_BFD_UDP_AUTH_DEACTIVATE_REPLY); } +static void +vl_api_bfd_udp_set_echo_source_t_handler (vl_api_bfd_udp_set_echo_source_t * + mp) +{ + vl_api_bfd_udp_set_echo_source_reply_t *rmp; + int rv; + + VALIDATE_SW_IF_INDEX (mp); + + rv = bfd_udp_set_echo_source (clib_net_to_host_u32 (mp->sw_if_index)); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_BFD_UDP_SET_ECHO_SOURCE_REPLY); +} + +static void +vl_api_bfd_udp_del_echo_source_t_handler (vl_api_bfd_udp_del_echo_source_t * + mp) +{ + vl_api_bfd_udp_del_echo_source_reply_t *rmp; + int rv; + + rv = bfd_udp_del_echo_source (); + + REPLY_MACRO (VL_API_BFD_UDP_DEL_ECHO_SOURCE_REPLY); +} + /* * bfd_api_hookup * Add vpe's API message handlers to the table. diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h index f4486a79196..63d4a62e2e3 100644 --- a/src/vnet/bfd/bfd_api.h +++ b/src/vnet/bfd/bfd_api.h @@ -24,6 +24,17 @@ #include #include +#define foreach_bfd_transport(F) \ + F (UDP4, "ip4-rewrite") \ + F (UDP6, "ip6-rewrite") + +typedef enum +{ +#define F(t, n) BFD_TRANSPORT_##t, + foreach_bfd_transport (F) +#undef F +} bfd_transport_e; + vnet_api_error_t bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, const ip46_address_t * peer_addr, @@ -31,12 +42,11 @@ bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr, u8 detect_mult, u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id); -vnet_api_error_t bfd_udp_mod_session (u32 sw_if_index, - const ip46_address_t * local_addr, - const ip46_address_t * peer_addr, - u32 desired_min_tx_usec, - u32 required_min_rx_usec, - u8 detect_mult); +vnet_api_error_t +bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t * local_addr, + const ip46_address_t * peer_addr, + u32 desired_min_tx_usec, u32 required_min_rx_usec, + u8 detect_mult); vnet_api_error_t bfd_udp_del_session (u32 sw_if_index, const ip46_address_t * local_addr, @@ -63,6 +73,10 @@ vnet_api_error_t bfd_udp_auth_deactivate (u32 sw_if_index, const ip46_address_t * peer_addr, u8 is_delayed); +vnet_api_error_t bfd_udp_set_echo_source (u32 loopback_sw_if_index); + +vnet_api_error_t bfd_udp_del_echo_source (); + #endif /* __included_bfd_api_h__ */ /* diff --git a/src/vnet/bfd/bfd_debug.h b/src/vnet/bfd/bfd_debug.h index a06e934f560..3017352e515 100644 --- a/src/vnet/bfd/bfd_debug.h +++ b/src/vnet/bfd/bfd_debug.h @@ -20,7 +20,7 @@ #define __included_bfd_debug_h__ /* controls debug prints */ -#define BFD_DEBUG (0) +#define BFD_DEBUG (1) #if BFD_DEBUG #define BFD_DEBUG_FILE_DEF \ diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c index c0fd18dfe57..29c40458bf5 100644 --- a/src/vnet/bfd/bfd_main.c +++ b/src/vnet/bfd/bfd_main.c @@ -17,17 +17,37 @@ * @brief BFD nodes implementation */ +#if WITH_LIBSSL > 0 +#include +#endif + +#if __SSE4_2__ +#include +#endif + #include #include #include +#include #include #include #include #include #include -#if WITH_LIBSSL > 0 -#include + +static u64 +bfd_calc_echo_checksum (u32 discriminator, u64 expire_time, u32 secret) +{ + u64 checksum = 0; +#if __SSE4_2__ + checksum = _mm_crc32_u64 (0, discriminator); + checksum = _mm_crc32_u64 (checksum, expire_time); + checksum = _mm_crc32_u64 (checksum, secret); +#else + checksum = clib_xxhash (discriminator ^ expire_time ^ secret); #endif + return checksum; +} static u64 bfd_usec_to_clocks (const bfd_main_t * bm, u64 us) @@ -35,6 +55,12 @@ bfd_usec_to_clocks (const bfd_main_t * bm, u64 us) return bm->cpu_cps * ((f64) us / USEC_PER_SECOND); } +static u32 +bfd_clocks_to_usec (const bfd_main_t * bm, u64 clocks) +{ + return (clocks / bm->cpu_cps) * USEC_PER_SECOND; +} + static vlib_node_registration_t bfd_process_node; /* set to 0 here, real values filled at startup */ @@ -81,17 +107,19 @@ bfd_set_defaults (bfd_main_t * bm, bfd_session_t * bs) bs->local_state = BFD_STATE_down; bs->local_diag = BFD_DIAG_CODE_no_diag; bs->remote_state = BFD_STATE_down; - bs->local_demand = 0; bs->remote_discr = 0; - bs->config_desired_min_tx_usec = BFD_DEFAULT_DESIRED_MIN_TX_US; + bs->config_desired_min_tx_usec = BFD_DEFAULT_DESIRED_MIN_TX_USEC; bs->config_desired_min_tx_clocks = bm->default_desired_min_tx_clocks; bs->effective_desired_min_tx_clocks = bm->default_desired_min_tx_clocks; bs->remote_min_rx_usec = 1; bs->remote_min_rx_clocks = bfd_usec_to_clocks (bm, bs->remote_min_rx_usec); + bs->remote_min_echo_rx_usec = 0; + bs->remote_min_echo_rx_clocks = 0; bs->remote_demand = 0; bs->auth.remote_seq_number = 0; bs->auth.remote_seq_number_known = 0; bs->auth.local_seq_number = random_u32 (&bm->random_seed); + bs->echo_secret = random_u32 (&bm->random_seed); } static void @@ -119,68 +147,90 @@ bfd_set_state (bfd_main_t * bm, bfd_session_t * bs, } } -static void -bfd_recalc_tx_interval (bfd_main_t * bm, bfd_session_t * bs) +static const char * +bfd_poll_state_string (bfd_poll_state_e state) { - if (!bs->local_demand) + switch (state) { - bs->transmit_interval_clocks = - clib_max (bs->effective_desired_min_tx_clocks, - bs->remote_min_rx_clocks); +#define F(x) \ + case BFD_POLL_##x: \ + return "BFD_POLL_" #x; + foreach_bfd_poll_state (F) +#undef F } - else + return "UNKNOWN"; +} + +static void +bfd_set_poll_state (bfd_session_t * bs, bfd_poll_state_e state) +{ + if (bs->poll_state != state) { - /* TODO */ + BFD_DBG ("Setting poll state=%s, bs_idx=%u", + bfd_poll_state_string (state), bs->bs_idx); + bs->poll_state = state; } - BFD_DBG ("Recalculated transmit interval %lu clocks/%.2fs", - bs->transmit_interval_clocks, - bs->transmit_interval_clocks / bm->cpu_cps); +} + +static void +bfd_recalc_tx_interval (bfd_main_t * bm, bfd_session_t * bs) +{ + bs->transmit_interval_clocks = + clib_max (bs->effective_desired_min_tx_clocks, bs->remote_min_rx_clocks); + BFD_DBG ("Recalculated transmit interval " BFD_CLK_FMT, + BFD_CLK_PRN (bs->transmit_interval_clocks)); +} + +static void +bfd_recalc_echo_tx_interval (bfd_main_t * bm, bfd_session_t * bs) +{ + bs->echo_transmit_interval_clocks = + clib_max (bs->effective_desired_min_tx_clocks, + bs->remote_min_echo_rx_clocks); + BFD_DBG ("Recalculated echo transmit interval " BFD_CLK_FMT, + BFD_CLK_PRN (bs->echo_transmit_interval_clocks)); } static void bfd_calc_next_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now) { - if (!bs->local_demand) + if (bs->local_detect_mult > 1) { - if (bs->local_detect_mult > 1) - { - /* common case - 75-100% of transmit interval */ - bs->tx_timeout_clocks = bs->last_tx_clocks + - (1 - .25 * (random_f64 (&bm->random_seed))) * - bs->transmit_interval_clocks; - if (bs->tx_timeout_clocks < now) - { - /* huh, we've missed it already, transmit now */ - BFD_DBG ("Missed %lu transmit events (now is %lu, calc " - "tx_timeout is %lu)", - (now - bs->tx_timeout_clocks) / - bs->transmit_interval_clocks, - now, bs->tx_timeout_clocks); - bs->tx_timeout_clocks = now; - } - } - else + /* common case - 75-100% of transmit interval */ + bs->tx_timeout_clocks = bs->last_tx_clocks + + (1 - .25 * (random_f64 (&bm->random_seed))) * + bs->transmit_interval_clocks; + if (bs->tx_timeout_clocks < now) { - /* special case - 75-90% of transmit interval */ - bs->tx_timeout_clocks = - bs->last_tx_clocks + - (.9 - .15 * (random_f64 (&bm->random_seed))) * - bs->transmit_interval_clocks; - if (bs->tx_timeout_clocks < now) - { - /* huh, we've missed it already, transmit now */ - BFD_DBG ("Missed %lu transmit events (now is %lu, calc " - "tx_timeout is %lu)", - (now - bs->tx_timeout_clocks) / - bs->transmit_interval_clocks, - now, bs->tx_timeout_clocks); - bs->tx_timeout_clocks = now; - } + /* + * the timeout is in the past, which means that either remote + * demand mode was set or performance/clock issues ... + */ + BFD_DBG ("Missed %lu transmit events (now is %lu, calc " + "tx_timeout is %lu)", + (now - bs->tx_timeout_clocks) / + bs->transmit_interval_clocks, now, bs->tx_timeout_clocks); + bs->tx_timeout_clocks = now; } } else { - /* TODO */ + /* special case - 75-90% of transmit interval */ + bs->tx_timeout_clocks = bs->last_tx_clocks + + (.9 - .15 * (random_f64 (&bm->random_seed))) * + bs->transmit_interval_clocks; + if (bs->tx_timeout_clocks < now) + { + /* + * the timeout is in the past, which means that either remote + * demand mode was set or performance/clock issues ... + */ + BFD_DBG ("Missed %lu transmit events (now is %lu, calc " + "tx_timeout is %lu)", + (now - bs->tx_timeout_clocks) / + bs->transmit_interval_clocks, now, bs->tx_timeout_clocks); + bs->tx_timeout_clocks = now; + } } if (bs->tx_timeout_clocks) { @@ -191,24 +241,33 @@ bfd_calc_next_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now) } } +static void +bfd_calc_next_echo_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now) +{ + bs->echo_tx_timeout_clocks = + bs->echo_last_tx_clocks + bs->echo_transmit_interval_clocks; + if (bs->echo_tx_timeout_clocks < now) + { + /* huh, we've missed it already, transmit now */ + BFD_DBG ("Missed %lu echo transmit events (now is %lu, calc tx_timeout " + "is %lu)", + (now - bs->echo_tx_timeout_clocks) / + bs->echo_transmit_interval_clocks, + now, bs->echo_tx_timeout_clocks); + bs->echo_tx_timeout_clocks = now; + } + BFD_DBG ("Next echo transmit in %lu clocks/%.02fs@%lu", + bs->echo_tx_timeout_clocks - now, + (bs->echo_tx_timeout_clocks - now) / bm->cpu_cps, + bs->echo_tx_timeout_clocks); +} + static void bfd_recalc_detection_time (bfd_main_t * bm, bfd_session_t * bs) { - if (!bs->local_demand) - { - /* asynchronous mode */ - bs->detection_time_clocks = - bs->remote_detect_mult * - clib_max (bs->effective_required_min_rx_clocks, - bs->remote_desired_min_tx_clocks); - } - else - { - /* demand mode */ - bs->detection_time_clocks = - bs->local_detect_mult * clib_max (bs->config_desired_min_tx_clocks, - bs->remote_min_rx_clocks); - } + bs->detection_time_clocks = + bs->remote_detect_mult * clib_max (bs->effective_required_min_rx_clocks, + bs->remote_desired_min_tx_clocks); BFD_DBG ("Recalculated detection time %lu clocks/%.2fs", bs->detection_time_clocks, bs->detection_time_clocks / bm->cpu_cps); @@ -220,25 +279,37 @@ bfd_set_timer (bfd_main_t * bm, bfd_session_t * bs, u64 now, { u64 next = 0; u64 rx_timeout = 0; + u64 tx_timeout = 0; if (BFD_STATE_up == bs->local_state) { rx_timeout = bs->last_rx_clocks + bs->detection_time_clocks; } - if (bs->tx_timeout_clocks && rx_timeout) + if (BFD_STATE_up != bs->local_state || !bs->remote_demand || + BFD_POLL_NOT_NEEDED != bs->poll_state) { - next = clib_min (bs->tx_timeout_clocks, rx_timeout); + tx_timeout = bs->tx_timeout_clocks; } - else if (bs->tx_timeout_clocks) + if (tx_timeout && rx_timeout) { - next = bs->tx_timeout_clocks; + next = clib_min (tx_timeout, rx_timeout); + } + else if (tx_timeout) + { + next = tx_timeout; } else if (rx_timeout) { next = rx_timeout; } - BFD_DBG ("bs_idx=%u, tx_timeout=%lu, rx_timeout=%lu, next=%s", bs->bs_idx, - bs->tx_timeout_clocks, rx_timeout, - next == bs->tx_timeout_clocks ? "tx" : "rx"); + if (bs->echo && next > bs->echo_tx_timeout_clocks) + { + next = bs->echo_tx_timeout_clocks; + } + BFD_DBG ("bs_idx=%u, tx_timeout=%lu, echo_tx_timeout=%lu, rx_timeout=%lu, " + "next=%s", + bs->bs_idx, tx_timeout, bs->echo_tx_timeout_clocks, rx_timeout, + next == tx_timeout + ? "tx" : (next == bs->echo_tx_timeout_clocks ? "echo tx" : "rx")); /* sometimes the wheel expires an event a bit sooner than requested, account for that here */ if (next && (now + bm->wheel_inaccuracy > bs->wheel_time_clocks || @@ -271,6 +342,7 @@ bfd_set_effective_desired_min_tx (bfd_main_t * bm, BFD_CLK_PRN (bs->effective_desired_min_tx_clocks)); bfd_recalc_detection_time (bm, bs); bfd_recalc_tx_interval (bm, bs); + bfd_recalc_echo_tx_interval (bm, bs); bfd_calc_next_tx (bm, bs, now); } @@ -287,25 +359,40 @@ bfd_set_effective_required_min_rx (bfd_main_t * bm, static void bfd_set_remote_required_min_rx (bfd_main_t * bm, bfd_session_t * bs, - u64 now, - u32 remote_required_min_rx_usec, - int handling_wakeup) + u64 now, u32 remote_required_min_rx_usec) { - bs->remote_min_rx_usec = remote_required_min_rx_usec; - bs->remote_min_rx_clocks = - bfd_usec_to_clocks (bm, remote_required_min_rx_usec); - BFD_DBG ("Set remote min rx to " BFD_CLK_FMT, - BFD_CLK_PRN (bs->remote_min_rx_clocks)); - bfd_recalc_detection_time (bm, bs); - bfd_recalc_tx_interval (bm, bs); - bfd_calc_next_tx (bm, bs, now); - bfd_set_timer (bm, bs, now, handling_wakeup); + if (bs->remote_min_rx_usec != remote_required_min_rx_usec) + { + bs->remote_min_rx_usec = remote_required_min_rx_usec; + bs->remote_min_rx_clocks = + bfd_usec_to_clocks (bm, remote_required_min_rx_usec); + BFD_DBG ("Set remote min rx to " BFD_CLK_FMT, + BFD_CLK_PRN (bs->remote_min_rx_clocks)); + bfd_recalc_detection_time (bm, bs); + bfd_recalc_tx_interval (bm, bs); + } +} + +static void +bfd_set_remote_required_min_echo_rx (bfd_main_t * bm, bfd_session_t * bs, + u64 now, + u32 remote_required_min_echo_rx_usec) +{ + if (bs->remote_min_echo_rx_usec != remote_required_min_echo_rx_usec) + { + bs->remote_min_echo_rx_usec = remote_required_min_echo_rx_usec; + bs->remote_min_echo_rx_clocks = + bfd_usec_to_clocks (bm, bs->remote_min_echo_rx_usec); + BFD_DBG ("Set remote min echo rx to " BFD_CLK_FMT, + BFD_CLK_PRN (bs->remote_min_echo_rx_clocks)); + bfd_recalc_echo_tx_interval (bm, bs); + } } void bfd_session_start (bfd_main_t * bm, bfd_session_t * bs) { - BFD_DBG ("%U", format_bfd_session, bs); + BFD_DBG ("\nStarting session: %U", format_bfd_session, bs); bfd_recalc_tx_interval (bm, bs); vlib_process_signal_event (bm->vlib_main, bm->bfd_process_node_index, BFD_EVENT_NEW_SESSION, bs->bs_idx); @@ -418,11 +505,12 @@ static void bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, int handling_wakeup) { - BFD_DBG ("State changed: %U", format_bfd_session, bs); + BFD_DBG ("\nState changed: %U", format_bfd_session, bs); bfd_event (bm, bs); switch (bs->local_state) { case BFD_STATE_admin_down: + bs->echo = 0; bfd_set_effective_desired_min_tx (bm, bs, now, clib_max (bs->config_desired_min_tx_clocks, @@ -432,6 +520,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, bfd_set_timer (bm, bs, now, handling_wakeup); break; case BFD_STATE_down: + bs->echo = 0; bfd_set_effective_desired_min_tx (bm, bs, now, clib_max (bs->config_desired_min_tx_clocks, @@ -441,6 +530,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, bfd_set_timer (bm, bs, now, handling_wakeup); break; case BFD_STATE_init: + bs->echo = 0; bfd_set_effective_desired_min_tx (bm, bs, now, bs->config_desired_min_tx_clocks); bfd_set_timer (bm, bs, now, handling_wakeup); @@ -448,7 +538,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now, case BFD_STATE_up: bfd_set_effective_desired_min_tx (bm, bs, now, bs->config_desired_min_tx_clocks); - if (POLL_NOT_NEEDED == bs->poll_state) + if (BFD_POLL_NOT_NEEDED == bs->poll_state) { bfd_set_effective_required_min_rx (bm, bs, now, bs->config_required_min_rx_clocks); @@ -462,13 +552,14 @@ static void bfd_on_config_change (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm, bfd_session_t * bs, u64 now) { - if (bs->remote_demand) - { - /* TODO - initiate poll sequence here */ - } - else + /* + * if remote demand mode is set and we need to do a poll, set the next + * timeout so that the session wakes up immediately + */ + if (bs->remote_demand && BFD_POLL_NEEDED == bs->poll_state && + bs->poll_state_start_or_timeout_clocks < now) { - /* asynchronous - poll is part of periodic - nothing to do here */ + bs->tx_timeout_clocks = now; } bfd_recalc_detection_time (bm, bs); bfd_set_timer (bm, bs, now, 0); @@ -482,27 +573,36 @@ bfd_add_transport_layer (vlib_main_t * vm, vlib_buffer_t * b, { case BFD_TRANSPORT_UDP4: BFD_DBG ("Transport bfd via udp4, bs_idx=%u", bs->bs_idx); - bfd_add_udp4_transport (vm, b, bs); + bfd_add_udp4_transport (vm, b, bs, 0 /* is_echo */ ); break; case BFD_TRANSPORT_UDP6: BFD_DBG ("Transport bfd via udp6, bs_idx=%u", bs->bs_idx); - bfd_add_udp6_transport (vm, b, bs); + bfd_add_udp6_transport (vm, b, bs, 0 /* is_echo */ ); break; } } -static vlib_buffer_t * -bfd_create_frame_to_next_node (vlib_main_t * vm, bfd_session_t * bs) +static int +bfd_echo_add_transport_layer (vlib_main_t * vm, vlib_buffer_t * b, + bfd_session_t * bs) { - u32 bi; - if (vlib_buffer_alloc (vm, &bi, 1) != 1) + switch (bs->transport) { - clib_warning ("buffer allocation failure"); - return NULL; + case BFD_TRANSPORT_UDP4: + BFD_DBG ("Transport bfd echo via udp4, bs_idx=%u", bs->bs_idx); + return bfd_add_udp4_transport (vm, b, bs, 1 /* is_echo */ ); + break; + case BFD_TRANSPORT_UDP6: + BFD_DBG ("Transport bfd echo via udp6, bs_idx=%u", bs->bs_idx); + return bfd_add_udp6_transport (vm, b, bs, 1 /* is_echo */ ); + break; } + return 0; +} - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - ASSERT (b->current_data == 0); +static void +bfd_create_frame_to_next_node (vlib_main_t * vm, bfd_session_t * bs, u32 bi) +{ vlib_frame_t *f = vlib_get_frame_to_node (vm, bfd_node_index_by_transport[bs->transport]); @@ -510,9 +610,7 @@ bfd_create_frame_to_next_node (vlib_main_t * vm, bfd_session_t * bs) u32 *to_next = vlib_frame_vector_args (f); to_next[0] = bi; f->n_vectors = 1; - vlib_put_frame_to_node (vm, bfd_node_index_by_transport[bs->transport], f); - return b; } #if WITH_LIBSSL > 0 @@ -583,45 +681,118 @@ bfd_add_auth_section (vlib_buffer_t * b, bfd_session_t * bs) } } +static int +bfd_is_echo_possible (bfd_session_t * bs) +{ + if (BFD_STATE_up == bs->local_state && BFD_STATE_up == bs->remote_state && + bs->remote_min_echo_rx_usec > 0) + { + switch (bs->transport) + { + case BFD_TRANSPORT_UDP4: + return bfd_udp_is_echo_available (BFD_TRANSPORT_UDP4); + case BFD_TRANSPORT_UDP6: + return bfd_udp_is_echo_available (BFD_TRANSPORT_UDP6); + } + } + return 0; +} + static void -bfd_init_control_frame (vlib_buffer_t * b, bfd_session_t * bs) +bfd_init_control_frame (bfd_main_t * bm, bfd_session_t * bs, + vlib_buffer_t * b) { bfd_pkt_t *pkt = vlib_buffer_get_current (b); - u32 bfd_length = 0; bfd_length = sizeof (bfd_pkt_t); memset (pkt, 0, sizeof (*pkt)); bfd_pkt_set_version (pkt, 1); bfd_pkt_set_diag_code (pkt, bs->local_diag); bfd_pkt_set_state (pkt, bs->local_state); - if (bs->local_demand && BFD_STATE_up == bs->local_state && - BFD_STATE_up == bs->remote_state) - { - bfd_pkt_set_demand (pkt); - } pkt->head.detect_mult = bs->local_detect_mult; pkt->head.length = clib_host_to_net_u32 (bfd_length); pkt->my_disc = bs->local_discr; pkt->your_disc = bs->remote_discr; pkt->des_min_tx = clib_host_to_net_u32 (bs->config_desired_min_tx_usec); - pkt->req_min_rx = clib_host_to_net_u32 (bs->config_required_min_rx_usec); + if (bs->echo) + { + pkt->req_min_rx = + clib_host_to_net_u32 (bfd_clocks_to_usec + (bm, bs->effective_required_min_rx_clocks)); + } + else + { + pkt->req_min_rx = + clib_host_to_net_u32 (bs->config_required_min_rx_usec); + } pkt->req_min_echo_rx = clib_host_to_net_u32 (1); b->current_length = bfd_length; } +static void +bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt, + bfd_main_t * bm, bfd_session_t * bs, u64 now, + int handling_wakeup) +{ + if (!bfd_is_echo_possible (bs)) + { + BFD_DBG ("\nSwitching off echo function: %U", format_bfd_session, bs); + bs->echo = 0; + return; + } + /* sometimes the wheel expires an event a bit sooner than requested, account + for that here */ + if (now + bm->wheel_inaccuracy >= bs->echo_tx_timeout_clocks) + { + BFD_DBG ("\nSending echo packet: %U", format_bfd_session, bs); + u32 bi; + if (vlib_buffer_alloc (vm, &bi, 1) != 1) + { + clib_warning ("buffer allocation failure"); + return; + } + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + ASSERT (b->current_data == 0); + bfd_echo_pkt_t *pkt = vlib_buffer_get_current (b); + memset (pkt, 0, sizeof (*pkt)); + pkt->discriminator = bs->local_discr; + pkt->expire_time_clocks = + now + bs->echo_transmit_interval_clocks * bs->local_detect_mult; + pkt->checksum = + bfd_calc_echo_checksum (bs->local_discr, pkt->expire_time_clocks, + bs->echo_secret); + b->current_length = sizeof (*pkt); + if (!bfd_echo_add_transport_layer (vm, b, bs)) + { + BFD_ERR ("cannot send echo packet out, turning echo off"); + bs->echo = 0; + vlib_buffer_free_one (vm, bi); + return; + } + bs->echo_last_tx_clocks = now; + bfd_calc_next_echo_tx (bm, bs, now); + bfd_create_frame_to_next_node (vm, bs, bi); + } + else + { + BFD_DBG + ("No need to send echo packet now, now is %lu, tx_timeout is %lu", + now, bs->echo_tx_timeout_clocks); + } +} + static void bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm, bfd_session_t * bs, u64 now, int handling_wakeup) { - if (!bs->remote_min_rx_usec) + if (!bs->remote_min_rx_usec && BFD_POLL_NOT_NEEDED == bs->poll_state) { - BFD_DBG - ("bfd.RemoteMinRxInterval is zero, not sending periodic control " - "frame"); + BFD_DBG ("Remote min rx interval is zero, not sending periodic control " + "frame"); return; } - if (POLL_NOT_NEEDED == bs->poll_state && bs->remote_demand && + if (BFD_POLL_NOT_NEEDED == bs->poll_state && bs->remote_demand && BFD_STATE_up == bs->local_state && BFD_STATE_up == bs->remote_state) { /* @@ -630,33 +801,52 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, * bfd.SessionState is Up, and bfd.RemoteSessionState is Up) and a Poll * Sequence is not being transmitted. */ - BFD_DBG ("bfd.RemoteDemand is non-zero, not sending periodic control " - "frame"); + BFD_DBG ("Remote demand is set, not sending periodic control frame"); return; } /* sometimes the wheel expires an event a bit sooner than requested, account for that here */ if (now + bm->wheel_inaccuracy >= bs->tx_timeout_clocks) { - BFD_DBG ("Send periodic control frame for bs_idx=%lu: %U", bs->bs_idx, - format_bfd_session, bs); - vlib_buffer_t *b = bfd_create_frame_to_next_node (vm, bs); - if (!b) + BFD_DBG ("\nSending periodic control frame: %U", format_bfd_session, + bs); + u32 bi; + if (vlib_buffer_alloc (vm, &bi, 1) != 1) { + clib_warning ("buffer allocation failure"); return; } - bfd_init_control_frame (b, bs); - if (POLL_NOT_NEEDED != bs->poll_state) + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + ASSERT (b->current_data == 0); + bfd_init_control_frame (bm, bs, b); + switch (bs->poll_state) { - /* here we are either beginning a new poll sequence or retrying .. */ + case BFD_POLL_NEEDED: + if (now < bs->poll_state_start_or_timeout_clocks) + { + BFD_DBG ("Cannot start a poll sequence yet, need to wait " + "for " BFD_CLK_FMT, + BFD_CLK_PRN (bs->poll_state_start_or_timeout_clocks - + now)); + break; + } + bs->poll_state_start_or_timeout_clocks = now; + bfd_set_poll_state (bs, BFD_POLL_IN_PROGRESS); + /* fallthrough */ + case BFD_POLL_IN_PROGRESS: + case BFD_POLL_IN_PROGRESS_AND_QUEUED: bfd_pkt_set_poll (vlib_buffer_get_current (b)); - bs->poll_state = POLL_IN_PROGRESS; BFD_DBG ("Setting poll bit in packet, bs_idx=%u", bs->bs_idx); + break; + case BFD_POLL_NOT_NEEDED: + /* fallthrough */ + break; } bfd_add_auth_section (b, bs); bfd_add_transport_layer (vm, b, bs); bs->last_tx_clocks = now; bfd_calc_next_tx (bm, bs, now); + bfd_create_frame_to_next_node (vm, bs, bi); } else { @@ -664,15 +854,14 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt, ("No need to send control frame now, now is %lu, tx_timeout is %lu", now, bs->tx_timeout_clocks); } - bfd_set_timer (bm, bs, now, handling_wakeup); } void bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b, - bfd_session_t * bs) + bfd_main_t * bm, bfd_session_t * bs) { BFD_DBG ("Send final control frame for bs_idx=%lu", bs->bs_idx); - bfd_init_control_frame (b, bs); + bfd_init_control_frame (bm, bs, b); bfd_pkt_set_final (vlib_buffer_get_current (b)); bfd_add_auth_section (b, bs); bfd_add_transport_layer (vm, b, bs); @@ -681,7 +870,7 @@ bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b, * RFC allows to include changes in final frame, so if there were any * pending, we already did that, thus we can clear any pending poll needs */ - bs->poll_state = POLL_NOT_NEEDED; + bfd_set_poll_state (bs, BFD_POLL_NOT_NEEDED); } static void @@ -703,7 +892,16 @@ bfd_check_rx_timeout (bfd_main_t * bm, bfd_session_t * bs, u64 now, * since it is no longer required to maintain previous session state) * and then can transmit at its own rate. */ - bfd_set_remote_required_min_rx (bm, bs, now, 1, handling_wakeup); + bfd_set_remote_required_min_rx (bm, bs, now, 1); + } + else if (bs->echo && + bs->echo_last_rx_clocks + + bs->echo_transmit_interval_clocks * bs->local_detect_mult <= + now + bm->wheel_inaccuracy) + { + BFD_DBG ("Echo rx timeout, session goes down"); + bfd_set_diag (bs, BFD_DIAG_CODE_echo_failed); + bfd_set_state (bm, bs, BFD_STATE_down, handling_wakeup); } } @@ -721,10 +919,30 @@ bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm, bfd_send_periodic (vm, rt, bm, bs, now, 1); break; case BFD_STATE_init: - /* fallthrough */ + bfd_check_rx_timeout (bm, bs, now, 1); + bfd_send_periodic (vm, rt, bm, bs, now, 1); + break; case BFD_STATE_up: bfd_check_rx_timeout (bm, bs, now, 1); + if (BFD_POLL_NOT_NEEDED == bs->poll_state && !bs->echo && + bfd_is_echo_possible (bs)) + { + /* switch on echo function as main detection method now */ + BFD_DBG ("Switching on echo function, bs_idx=%u", bs->bs_idx); + bs->echo = 1; + bs->echo_last_rx_clocks = now; + bs->echo_tx_timeout_clocks = now; + bfd_set_effective_required_min_rx (bm, bs, now, + clib_max + (bm->min_required_min_rx_while_echo_clocks, + bs->config_required_min_rx_clocks)); + bfd_set_poll_state (bs, BFD_POLL_NEEDED); + } bfd_send_periodic (vm, rt, bm, bs, now, 1); + if (bs->echo) + { + bfd_send_echo (vm, rt, bm, bs, now, 1); + } break; } } @@ -822,6 +1040,7 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { bfd_session_t *bs = pool_elt_at_index (bm->sessions, bs_idx); bfd_on_timeout (vm, rt, bm, bs, now); + bfd_set_timer (bm, bs, now, 1); } } if (expired) @@ -894,7 +1113,9 @@ bfd_main_init (vlib_main_t * vm) bm->cpu_cps = vm->clib_time.clocks_per_second; BFD_DBG ("cps is %.2f", bm->cpu_cps); bm->default_desired_min_tx_clocks = - bfd_usec_to_clocks (bm, BFD_DEFAULT_DESIRED_MIN_TX_US); + bfd_usec_to_clocks (bm, BFD_DEFAULT_DESIRED_MIN_TX_USEC); + bm->min_required_min_rx_while_echo_clocks = + bfd_usec_to_clocks (bm, BFD_REQUIRED_MIN_RX_USEC_WHILE_ECHO); const u64 now = clib_cpu_time_now (); timing_wheel_init (&bm->wheel, now, bm->cpu_cps); bm->wheel_inaccuracy = 2 << bm->wheel.log2_clocks_per_bin; @@ -912,14 +1133,28 @@ bfd_main_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (bfd_main_init); bfd_session_t * -bfd_get_session (bfd_main_t * bm, bfd_transport_t t) +bfd_get_session (bfd_main_t * bm, bfd_transport_e t) { bfd_session_t *result; pool_get (bm->sessions, result); memset (result, 0, sizeof (*result)); result->bs_idx = result - bm->sessions; result->transport = t; - result->local_discr = random_u32 (&bm->random_seed); + const unsigned limit = 1000; + unsigned counter = 0; + do + { + result->local_discr = random_u32 (&bm->random_seed); + if (counter > limit) + { + clib_warning ("Couldn't allocate unused session discriminator even " + "after %u tries!", limit); + pool_put (bm->sessions, result); + return NULL; + } + ++counter; + } + while (hash_get (bm->session_by_disc, result->local_discr)); bfd_set_defaults (bm, result); hash_set (bm->session_by_disc, result->local_discr, result->bs_idx); return result; @@ -1372,29 +1607,45 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) bfd_usec_to_clocks (bm, clib_net_to_host_u32 (pkt->des_min_tx)); bs->remote_detect_mult = pkt->head.detect_mult; bfd_set_remote_required_min_rx (bm, bs, now, - clib_net_to_host_u32 (pkt->req_min_rx), 0); - /* FIXME - If the Required Min Echo RX Interval field is zero, the - transmission of Echo packets, if any, MUST cease. - - If a Poll Sequence is being transmitted by the local system and - the Final (F) bit in the received packet is set, the Poll Sequence - MUST be terminated. - */ + clib_net_to_host_u32 (pkt->req_min_rx)); + bfd_set_remote_required_min_echo_rx (bm, bs, now, + clib_net_to_host_u32 + (pkt->req_min_echo_rx)); /* FIXME 6.8.2 */ /* FIXME 6.8.4 */ - if (bs->poll_state == POLL_IN_PROGRESS && bfd_pkt_get_final (pkt)) + if (bfd_pkt_get_final (pkt)) { - bs->poll_state = POLL_NOT_NEEDED; - BFD_DBG ("Poll sequence terminated, bs_idx=%u", bs->bs_idx); - if (BFD_STATE_up == bs->local_state) + if (BFD_POLL_IN_PROGRESS == bs->poll_state) { - bfd_set_effective_required_min_rx (bm, bs, now, - bs->config_required_min_rx_clocks); - bfd_recalc_detection_time (bm, bs); - bfd_set_timer (bm, bs, now, 0); + BFD_DBG ("Poll sequence terminated, bs_idx=%u", bs->bs_idx); + bfd_set_poll_state (bs, BFD_POLL_NOT_NEEDED); + if (BFD_STATE_up == bs->local_state) + { + bfd_set_effective_required_min_rx (bm, bs, now, + clib_max (bs->echo * + bm->min_required_min_rx_while_echo_clocks, + bs->config_required_min_rx_clocks)); + } + } + else if (BFD_POLL_IN_PROGRESS_AND_QUEUED == bs->poll_state) + { + /* + * next poll sequence must be delayed by at least the round trip + * time, so calculate that here + */ + BFD_DBG ("Next poll sequence can commence in " BFD_CLK_FMT, + BFD_CLK_PRN (now - + bs->poll_state_start_or_timeout_clocks)); + bs->poll_state_start_or_timeout_clocks = + now + (now - bs->poll_state_start_or_timeout_clocks); + BFD_DBG + ("Poll sequence terminated, but another is needed, bs_idx=%u", + bs->bs_idx); + bfd_set_poll_state (bs, BFD_POLL_NEEDED); } } + bfd_calc_next_tx (bm, bs, now); + bfd_set_timer (bm, bs, now, 0); if (BFD_STATE_admin_down == bs->local_state) { BFD_DBG ("Session is admin-down, ignoring packet, bs_idx=%u", @@ -1435,52 +1686,75 @@ bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * pkt, u32 bs_idx) } } -static const char * -bfd_poll_state_string (bfd_poll_state_e state) +int +bfd_consume_echo_pkt (bfd_main_t * bm, vlib_buffer_t * b) { - switch (state) + bfd_echo_pkt_t *pkt = NULL; + if (b->current_length != sizeof (*pkt)) { -#define F(x) \ - case POLL_##x: \ - return "POLL_" #x; - foreach_bfd_poll_state (F) -#undef F + return 0; } - return "UNKNOWN"; + pkt = vlib_buffer_get_current (b); + bfd_session_t *bs = bfd_find_session_by_disc (bm, pkt->discriminator); + if (!bs) + { + return 0; + } + BFD_DBG ("Scanning bfd echo packet, bs_idx=%d", bs->bs_idx); + u64 checksum = + bfd_calc_echo_checksum (bs->local_discr, pkt->expire_time_clocks, + bs->echo_secret); + if (checksum != pkt->checksum) + { + BFD_DBG ("Invalid echo packet, checksum mismatch"); + return 1; + } + u64 now = clib_cpu_time_now (); + if (pkt->expire_time_clocks < now) + { + BFD_DBG ("Stale packet received, expire time %lu < now %lu", + pkt->expire_time_clocks, now); + } + else + { + bs->echo_last_rx_clocks = now; + } + return 1; } u8 * format_bfd_session (u8 * s, va_list * args) { const bfd_session_t *bs = va_arg (*args, bfd_session_t *); - s = format (s, "BFD(%u): bfd.SessionState=%s, " - "bfd.RemoteSessionState=%s, " - "bfd.LocalDiscr=%u, " - "bfd.RemoteDiscr=%u, " - "bfd.LocalDiag=%s, " - "bfd.DesiredMinTxInterval=%u, " - "bfd.RequiredMinRxInterval=%u, " - "bfd.RequiredMinEchoRxInterval=%u, " - "bfd.RemoteMinRxInterval=%u, " - "bfd.DemandMode=%s, " - "bfd.RemoteDemandMode=%s, " - "bfd.DetectMult=%u, " - "Auth: {local-seq-num=%u, " - "remote-seq-num=%u, " - "is-delayed=%s, " - "curr-key=%U, " - "next-key=%U}," - "poll-state: %s", + uword indent = format_get_indent (s); + s = format (s, "bs_idx=%u local-state=%s remote-state=%s\n" + "%Ulocal-discriminator=%u remote-discriminator=%u\n" + "%Ulocal-diag=%s echo-active=%s\n" + "%Udesired-min-tx=%u required-min-rx=%u\n" + "%Urequired-min-echo-rx=%u detect-mult=%u\n" + "%Uremote-min-rx=%u remote-min-echo-rx=%u\n" + "%Uremote-demand=%s poll-state=%s\n" + "%Uauth: local-seq-num=%u remote-seq-num=%u\n" + "%U is-delayed=%s\n" + "%U curr-key=%U\n" + "%U next-key=%U", bs->bs_idx, bfd_state_string (bs->local_state), - bfd_state_string (bs->remote_state), bs->local_discr, - bs->remote_discr, bfd_diag_code_string (bs->local_diag), + bfd_state_string (bs->remote_state), format_white_space, indent, + bs->local_discr, bs->remote_discr, format_white_space, indent, + bfd_diag_code_string (bs->local_diag), + (bs->echo ? "yes" : "no"), format_white_space, indent, bs->config_desired_min_tx_usec, bs->config_required_min_rx_usec, - 1, bs->remote_min_rx_usec, (bs->local_demand ? "yes" : "no"), - (bs->remote_demand ? "yes" : "no"), bs->local_detect_mult, - bs->auth.local_seq_number, bs->auth.remote_seq_number, - (bs->auth.is_delayed ? "yes" : "no"), format_bfd_auth_key, - bs->auth.curr_key, format_bfd_auth_key, bs->auth.next_key, - bfd_poll_state_string (bs->poll_state)); + format_white_space, indent, 1, bs->local_detect_mult, + format_white_space, indent, bs->remote_min_rx_usec, + bs->remote_min_echo_rx_usec, format_white_space, indent, + (bs->remote_demand ? "yes" : "no"), + bfd_poll_state_string (bs->poll_state), format_white_space, + indent, bs->auth.local_seq_number, bs->auth.remote_seq_number, + format_white_space, indent, + (bs->auth.is_delayed ? "yes" : "no"), format_white_space, + indent, format_bfd_auth_key, bs->auth.curr_key, + format_white_space, indent, format_bfd_auth_key, + bs->auth.next_key); return s; } @@ -1537,7 +1811,7 @@ bfd_auth_activate (bfd_session_t * bs, u32 conf_key_id, bs->auth.is_delayed = 0; } ++key->use_count; - BFD_DBG ("Session auth modified: %U", format_bfd_session, bs); + BFD_DBG ("\nSession auth modified: %U", format_bfd_session, bs); return 0; } @@ -1571,7 +1845,7 @@ bfd_auth_deactivate (bfd_session_t * bs, u8 is_delayed) --bs->auth.next_key->use_count; bs->auth.next_key = NULL; } - BFD_DBG ("Session auth modified: %U", format_bfd_session, bs); + BFD_DBG ("\nSession auth modified: %U", format_bfd_session, bs); return 0; #else clib_warning ("SSL missing, cannot deactivate BFD authentication"); @@ -1588,10 +1862,10 @@ bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, bs->config_desired_min_tx_usec != desired_min_tx_usec || bs->config_required_min_rx_usec != required_min_rx_usec) { - BFD_DBG ("Changing session params: %U", format_bfd_session, bs); + BFD_DBG ("\nChanging session params: %U", format_bfd_session, bs); switch (bs->poll_state) { - case POLL_NOT_NEEDED: + case BFD_POLL_NOT_NEEDED: if (BFD_STATE_up == bs->local_state || BFD_STATE_init == bs->local_state) { @@ -1599,21 +1873,26 @@ bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, if (bs->config_desired_min_tx_usec != desired_min_tx_usec || bs->config_required_min_rx_usec != required_min_rx_usec) { - bs->poll_state = POLL_NEEDED; - BFD_DBG ("Set poll state=%s, bs_idx=%u", - bfd_poll_state_string (bs->poll_state), - bs->bs_idx); + bfd_set_poll_state (bs, BFD_POLL_NEEDED); } } break; - case POLL_NEEDED: - /* nothing to do */ + case BFD_POLL_NEEDED: + case BFD_POLL_IN_PROGRESS_AND_QUEUED: + /* + * nothing to do - will be handled in the future poll which is + * already scheduled for execution + */ break; - case POLL_IN_PROGRESS: - /* can't change params now ... */ - BFD_ERR ("Poll in progress, cannot change params for session with " - "bs_idx=%u", bs->bs_idx); - return VNET_API_ERROR_BFD_EAGAIN; + case BFD_POLL_IN_PROGRESS: + /* poll sequence is not needed for detect multiplier change */ + if (bs->config_desired_min_tx_usec != desired_min_tx_usec || + bs->config_required_min_rx_usec != required_min_rx_usec) + { + BFD_DBG ("Poll in progress, queueing extra poll, bs_idx=%u", + bs->bs_idx); + bfd_set_poll_state (bs, BFD_POLL_IN_PROGRESS_AND_QUEUED); + } } bs->local_detect_mult = detect_mult; @@ -1623,7 +1902,7 @@ bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, bs->config_required_min_rx_usec = required_min_rx_usec; bs->config_required_min_rx_clocks = bfd_usec_to_clocks (bm, required_min_rx_usec); - BFD_DBG ("Changed session params: %U", format_bfd_session, bs); + BFD_DBG ("\nChanged session params: %U", format_bfd_session, bs); vlib_process_signal_event (bm->vlib_main, bm->bfd_process_node_index, BFD_EVENT_CONFIG_CHANGED, bs->bs_idx); diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h index 14a54d6f3df..d8063f9de33 100644 --- a/src/vnet/bfd/bfd_main.h +++ b/src/vnet/bfd/bfd_main.h @@ -24,17 +24,6 @@ #include #include -#define foreach_bfd_transport(F) \ - F (UDP4, "ip4-rewrite") \ - F (UDP6, "ip6-rewrite") - -typedef enum -{ -#define F(t, n) BFD_TRANSPORT_##t, - foreach_bfd_transport (F) -#undef F -} bfd_transport_t; - #define foreach_bfd_mode(F) \ F (asynchronous) \ F (demand) @@ -64,14 +53,15 @@ typedef struct bfd_auth_type_e auth_type; } bfd_auth_key_t; -#define foreach_bfd_poll_state(F)\ - F(NOT_NEEDED)\ -F(NEEDED)\ -F(IN_PROGRESS) +#define foreach_bfd_poll_state(F) \ + F (NOT_NEEDED) \ + F (NEEDED) \ + F (IN_PROGRESS) \ + F (IN_PROGRESS_AND_QUEUED) typedef enum { -#define F(x) POLL_##x, +#define F(x) BFD_POLL_##x, foreach_bfd_poll_state (F) #undef F } bfd_poll_state_e; @@ -120,21 +110,27 @@ typedef struct bfd_session_s /* remote min rx interval (clocks) */ u64 remote_min_rx_clocks; + /* remote min echo rx interval (microseconds) */ + u64 remote_min_echo_rx_usec; + + /* remote min echo rx interval (clocks) */ + u64 remote_min_echo_rx_clocks; + /* remote desired min tx interval (clocks) */ u64 remote_desired_min_tx_clocks; /* configured detect multiplier */ u8 local_detect_mult; - /* 1 if in demand mode, 0 otherwise */ - u8 local_demand; - /* 1 if remote system sets demand mode, 0 otherwise */ u8 remote_demand; /* remote detect multiplier */ u8 remote_detect_mult; + /* 1 is echo function is active, 0 otherwise */ + u8 echo; + /* set to value of timer in timing wheel, 0 if never set */ u64 wheel_time_clocks; @@ -150,12 +146,33 @@ typedef struct bfd_session_s /* timestamp of last packet received */ u64 last_rx_clocks; + /* transmit interval for echo packets */ + u64 echo_transmit_interval_clocks; + + /* next time at which to transmit echo packet */ + u64 echo_tx_timeout_clocks; + + /* timestamp of last echo packet transmitted */ + u64 echo_last_tx_clocks; + + /* timestamp of last echo packet received */ + u64 echo_last_rx_clocks; + + /* secret used for calculating/checking checksum of echo packets */ + u32 echo_secret; + /* detection time */ u64 detection_time_clocks; /* state info regarding poll sequence */ bfd_poll_state_e poll_state; + /* + * helper for delayed poll sequence - marks either start of running poll + * sequence or timeout, after which we can start the next poll sequnce + */ + u64 poll_state_start_or_timeout_clocks; + /* authentication information */ struct { @@ -191,7 +208,7 @@ typedef struct bfd_session_s } auth; /* transport type for this session */ - bfd_transport_t transport; + bfd_transport_e transport; /* union of transport-specific data */ union @@ -227,6 +244,9 @@ typedef struct /* default desired min tx in clocks */ u64 default_desired_min_tx_clocks; + /* minimum required min rx while echo function is active - clocks */ + u64 min_required_min_rx_while_echo_clocks; + /* for generating random numbers */ u32 random_seed; @@ -268,36 +288,54 @@ enum BFD_EVENT_CONFIG_CHANGED, } bfd_process_event_e; -u8 *bfd_input_format_trace (u8 * s, va_list * args); +/* echo packet structure */ +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + /* local discriminator */ + u32 discriminator; + /* expire time of this packet - clocks */ + u64 expire_time_clocks; + /* checksum - based on discriminator, local secret and expire time */ + u64 checksum; +}) bfd_echo_pkt_t; +/* *INDENT-ON* */ -bfd_session_t *bfd_get_session (bfd_main_t * bm, bfd_transport_t t); +u8 *bfd_input_format_trace (u8 * s, va_list * args); +bfd_session_t *bfd_get_session (bfd_main_t * bm, bfd_transport_e t); void bfd_put_session (bfd_main_t * bm, bfd_session_t * bs); bfd_session_t *bfd_find_session_by_idx (bfd_main_t * bm, uword bs_idx); bfd_session_t *bfd_find_session_by_disc (bfd_main_t * bm, u32 disc); void bfd_session_start (bfd_main_t * bm, bfd_session_t * bs); void bfd_consume_pkt (bfd_main_t * bm, const bfd_pkt_t * bfd, u32 bs_idx); +int bfd_consume_echo_pkt (bfd_main_t * bm, vlib_buffer_t * b); int bfd_verify_pkt_common (const bfd_pkt_t * pkt); int bfd_verify_pkt_auth (const bfd_pkt_t * pkt, u16 pkt_size, bfd_session_t * bs); void bfd_event (bfd_main_t * bm, bfd_session_t * bs); void bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b, - bfd_session_t * bs); + bfd_main_t * bm, bfd_session_t * bs); u8 *format_bfd_session (u8 * s, va_list * args); void bfd_session_set_flags (bfd_session_t * bs, u8 admin_up_down); unsigned bfd_auth_type_supported (bfd_auth_type_e auth_type); vnet_api_error_t bfd_auth_activate (bfd_session_t * bs, u32 conf_key_id, u8 bfd_key_id, u8 is_delayed); vnet_api_error_t bfd_auth_deactivate (bfd_session_t * bs, u8 is_delayed); -vnet_api_error_t -bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, - u32 desired_min_tx_usec, - u32 required_min_rx_usec, u8 detect_mult); +vnet_api_error_t bfd_session_set_params (bfd_main_t * bm, bfd_session_t * bs, + u32 desired_min_tx_usec, + u32 required_min_rx_usec, + u8 detect_mult); #define USEC_PER_MS 1000LL #define USEC_PER_SECOND (1000 * USEC_PER_MS) /* default, slow transmission interval for BFD packets, per spec at least 1s */ -#define BFD_DEFAULT_DESIRED_MIN_TX_US USEC_PER_SECOND +#define BFD_DEFAULT_DESIRED_MIN_TX_USEC USEC_PER_SECOND + +/* + * minimum required min rx set locally when echo function is used, per spec + * should be set to at least 1s + */ +#define BFD_REQUIRED_MIN_RX_USEC_WHILE_ECHO USEC_PER_SECOND #endif /* __included_bfd_main_h__ */ diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c index 8519009d139..146faad6bfa 100644 --- a/src/vnet/bfd/bfd_udp.c +++ b/src/vnet/bfd/bfd_udp.c @@ -27,6 +27,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -38,6 +41,12 @@ typedef struct /* hashmap - bfd session index by bfd key - used for CLI/API lookup, where * discriminator is unknown */ mhash_t bfd_session_idx_by_bfd_key; + /* convenience variable */ + vnet_main_t *vnet_main; + /* flag indicating whether echo_source_sw_if_index holds a valid value */ + int echo_source_is_set; + /* loopback interface used to get echo source ip */ + u32 echo_source_sw_if_index; } bfd_udp_main_t; static vlib_node_registration_t bfd_udp4_input_node; @@ -47,6 +56,80 @@ static vlib_node_registration_t bfd_udp_echo6_input_node; bfd_udp_main_t bfd_udp_main; +vnet_api_error_t +bfd_udp_set_echo_source (u32 sw_if_index) +{ + vnet_sw_interface_t *sw_if = + vnet_get_sw_interface_safe (bfd_udp_main.vnet_main, + bfd_udp_main.echo_source_sw_if_index); + if (sw_if) + { + bfd_udp_main.echo_source_sw_if_index = sw_if_index; + bfd_udp_main.echo_source_is_set = 1; + return 0; + } + return VNET_API_ERROR_BFD_ENOENT; +} + +vnet_api_error_t +bfd_udp_del_echo_source (u32 sw_if_index) +{ + bfd_udp_main.echo_source_sw_if_index = ~0; + bfd_udp_main.echo_source_is_set = 0; + return 0; +} + +int +bfd_udp_is_echo_available (bfd_transport_e transport) +{ + if (!bfd_udp_main.echo_source_is_set) + { + return 0; + } + /* + * for the echo to work, we need a loopback interface with at least one + * address with netmask length at most 31 (ip4) or 127 (ip6) so that we can + * pick an unused address from that subnet + */ + vnet_sw_interface_t *sw_if = + vnet_get_sw_interface_safe (bfd_udp_main.vnet_main, + bfd_udp_main.echo_source_sw_if_index); + if (sw_if && sw_if->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + { + if (BFD_TRANSPORT_UDP4 == transport) + { + ip4_main_t *im = &ip4_main; + ip_interface_address_t *ia = NULL; + /* *INDENT-OFF* */ + foreach_ip_interface_address (&im->lookup_main, ia, + bfd_udp_main.echo_source_sw_if_index, + 0 /* honor unnumbered */, ({ + if (ia->address_length <= 31) + { + return 1; + } + })); + /* *INDENT-ON* */ + } + else if (BFD_TRANSPORT_UDP6 == transport) + { + ip6_main_t *im = &ip6_main; + ip_interface_address_t *ia = NULL; + /* *INDENT-OFF* */ + foreach_ip_interface_address (&im->lookup_main, ia, + bfd_udp_main.echo_source_sw_if_index, + 0 /* honor unnumbered */, ({ + if (ia->address_length <= 127) + { + return 1; + } + })); + /* *INDENT-ON* */ + } + } + return 0; +} + static u16 bfd_udp_bs_idx_to_sport (u32 bs_idx) { @@ -61,9 +144,78 @@ bfd_udp_bs_idx_to_sport (u32 bs_idx) return 49152 + bs_idx % (65535 - 49152 + 1); } -void +static void +lol () +{ +} + +int +bfd_udp_get_echo_src_ip4 (ip4_address_t * addr) +{ + if (!bfd_udp_main.echo_source_is_set) + { + BFD_ERR ("cannot find ip4 address, echo source not set"); + return 0; + } + ip_interface_address_t *ia = NULL; + ip4_main_t *im = &ip4_main; + + /* *INDENT-OFF* */ + foreach_ip_interface_address ( + &im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index, + 0 /* honor unnumbered */, ({ + ip4_address_t *x = + ip_interface_address_get_address (&im->lookup_main, ia); + if (ia->address_length <= 31) + { + addr->as_u32 = clib_host_to_net_u32 (x->as_u32); + /* + * flip the last bit to get a different address, might be network, + * we don't care ... + */ + addr->as_u32 ^= 1; + addr->as_u32 = clib_net_to_host_u32 (addr->as_u32); + return 1; + } + })); + /* *INDENT-ON* */ + BFD_ERR ("cannot find ip4 address, no usable address found"); + return 0; +} + +int +bfd_udp_get_echo_src_ip6 (ip6_address_t * addr) +{ + if (!bfd_udp_main.echo_source_is_set) + { + BFD_ERR ("cannot find ip6 address, echo source not set"); + return 0; + } + ip_interface_address_t *ia = NULL; + ip6_main_t *im = &ip6_main; + + /* *INDENT-OFF* */ + foreach_ip_interface_address ( + &im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index, + 0 /* honor unnumbered */, ({ + ip6_address_t *x = + ip_interface_address_get_address (&im->lookup_main, ia); + if (ia->address_length <= 127) + { + *addr = *x; + addr->as_u8[15] ^= 1; /* flip the last bit of the address */ + lol (); + return 1; + } + })); + /* *INDENT-ON* */ + BFD_ERR ("cannot find ip6 address, no usable address found"); + return 0; +} + +int bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, - const bfd_session_t * bs) + const bfd_session_t * bs, int is_echo) { const bfd_udp_session_t *bus = &bs->udp; const bfd_udp_key_t *key = &bus->key; @@ -83,12 +235,24 @@ bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, headers->ip4.ip_version_and_header_length = 0x45; headers->ip4.ttl = 255; headers->ip4.protocol = IP_PROTOCOL_UDP; - headers->ip4.src_address.as_u32 = key->local_addr.ip4.as_u32; - headers->ip4.dst_address.as_u32 = key->peer_addr.ip4.as_u32; - headers->udp.src_port = clib_host_to_net_u16 (bfd_udp_bs_idx_to_sport (bs->bs_idx)); - headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); + if (is_echo) + { + int rv; + if (!(rv = bfd_udp_get_echo_src_ip4 (&headers->ip4.src_address))) + { + return rv; + } + headers->ip4.dst_address.as_u32 = key->local_addr.ip4.as_u32; + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd_echo4); + } + else + { + headers->ip4.src_address.as_u32 = key->local_addr.ip4.as_u32; + headers->ip4.dst_address.as_u32 = key->peer_addr.ip4.as_u32; + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd4); + } /* fix ip length, checksum and udp length */ const u16 ip_length = vlib_buffer_length_in_chain (vm, b); @@ -98,11 +262,12 @@ bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, const u16 udp_length = ip_length - (sizeof (headers->ip4)); headers->udp.length = clib_host_to_net_u16 (udp_length); + return 1; } -void +int bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, - const bfd_session_t * bs) + const bfd_session_t * bs, int is_echo) { const bfd_udp_session_t *bus = &bs->udp; const bfd_udp_key_t *key = &bus->key; @@ -123,14 +288,28 @@ bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, clib_host_to_net_u32 (0x6 << 28); headers->ip6.hop_limit = 255; headers->ip6.protocol = IP_PROTOCOL_UDP; - clib_memcpy (&headers->ip6.src_address, &key->local_addr.ip6, - sizeof (headers->ip6.src_address)); - clib_memcpy (&headers->ip6.dst_address, &key->peer_addr.ip6, - sizeof (headers->ip6.dst_address)); - headers->udp.src_port = clib_host_to_net_u16 (bfd_udp_bs_idx_to_sport (bs->bs_idx)); - headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); + if (is_echo) + { + int rv; + if (!(rv = bfd_udp_get_echo_src_ip6 (&headers->ip6.src_address))) + { + return rv; + } + clib_memcpy (&headers->ip6.dst_address, &key->local_addr.ip6, + sizeof (headers->ip6.dst_address)); + + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd_echo6); + } + else + { + clib_memcpy (&headers->ip6.src_address, &key->local_addr.ip6, + sizeof (headers->ip6.src_address)); + clib_memcpy (&headers->ip6.dst_address, &key->peer_addr.ip6, + sizeof (headers->ip6.dst_address)); + headers->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_bfd6); + } /* fix ip payload length and udp length */ const u16 udp_length = @@ -147,6 +326,7 @@ bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, { headers->udp.checksum = 0xffff; } + return 1; } static bfd_session_t * @@ -182,12 +362,17 @@ bfd_udp_add_session_internal (bfd_udp_main_t * bum, u32 sw_if_index, bfd_session_t ** bs_out) { /* get a pool entry and if we end up not needing it, give it back */ - bfd_transport_t t = BFD_TRANSPORT_UDP4; + bfd_transport_e t = BFD_TRANSPORT_UDP4; if (!ip46_address_is_ip4 (local_addr)) { t = BFD_TRANSPORT_UDP6; } bfd_session_t *bs = bfd_get_session (bum->bfd_main, t); + if (!bs) + { + bfd_put_session (bum->bfd_main, bs); + return VNET_API_ERROR_BFD_EAGAIN; + } bfd_udp_session_t *bus = &bs->udp; memset (bus, 0, sizeof (*bus)); bfd_udp_key_t *key = &bus->key; @@ -213,6 +398,21 @@ bfd_udp_add_session_internal (bfd_udp_main_t * bum, u32 sw_if_index, BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, VNET_LINK_IP4, %U, %d) " "returns %d", format_ip46_address, &key->peer_addr, IP46_TYPE_ANY, key->sw_if_index, bus->adj_index); + + fib_prefix_t fib_prefix; + memset (&fib_prefix, 0, sizeof (fib_prefix)); + fib_prefix.fp_len = 0; + fib_prefix.fp_proto = FIB_PROTOCOL_IP4; + fib_prefix.fp_addr = key->local_addr; + u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, 0); /* FIXME table id 0? */ + dpo_id_t dpo = DPO_INVALID; + dpo_proto_t dproto; + dproto = fib_proto_to_dpo (fib_prefix.fp_proto); + receive_dpo_add_or_lock (dproto, ~0, NULL, &dpo); + fib_table_entry_special_dpo_update (fib_index, &fib_prefix, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_LOCAL, &dpo); + dpo_reset (&dpo); } else { @@ -234,7 +434,7 @@ bfd_udp_validate_api_input (u32 sw_if_index, const ip46_address_t * peer_addr) { vnet_sw_interface_t *sw_if = - vnet_get_sw_interface (vnet_get_main (), sw_if_index); + vnet_get_sw_interface_safe (bfd_udp_main.vnet_main, sw_if_index); u8 local_ip_valid = 0; ip_interface_address_t *ia = NULL; if (!sw_if) @@ -1001,7 +1201,8 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt, const bfd_pkt_t *pkt = vlib_buffer_get_current (b0); if (bfd_pkt_get_poll (pkt)) { - bfd_init_final_control_frame (vm, b0, bs); + bfd_init_final_control_frame (vm, b0, bfd_udp_main.bfd_main, + bs); if (is_ipv6) { vlib_node_increment_counter (vm, bfd_udp6_input_node.index, @@ -1081,44 +1282,6 @@ VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = { }; /* *INDENT-ON* */ -/** - * @brief swap the source and destination IP addresses in the packet - */ -static int -bfd_echo_address_swap (vlib_buffer_t * b, int is_ipv6) -{ - udp_header_t *dummy = NULL; - if (is_ipv6) - { - ip6_header_t *ip6 = NULL; - bfd_udp6_find_headers (b, &ip6, &dummy); - if (!ip6) - { - return 0; - } - ip6_address_t tmp = ip6->dst_address; - ip6->dst_address = ip6->src_address; - ip6->src_address = tmp; - vlib_buffer_advance (b, - (u8 *) ip6 - (u8 *) vlib_buffer_get_current (b)); - } - else - { - ip4_header_t *ip4 = NULL; - bfd_udp4_find_headers (b, &ip4, &dummy); - if (!ip4) - { - return 0; - } - ip4_address_t tmp = ip4->dst_address; - ip4->dst_address = ip4->src_address; - ip4->src_address = tmp; - vlib_buffer_advance (b, - (u8 *) ip4 - (u8 *) vlib_buffer_get_current (b)); - } - return 1; -} - /* * Process a frame of bfd echo packets * Expect 1 packet / frame @@ -1153,7 +1316,12 @@ bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt, clib_memcpy (t0->data, vlib_buffer_get_current (b0), len); } - if (bfd_echo_address_swap (b0, is_ipv6)) + if (bfd_consume_echo_pkt (bfd_udp_main.bfd_main, b0)) + { + b0->error = rt->errors[BFD_UDP_ERROR_NONE]; + next0 = BFD_UDP_INPUT_NEXT_NORMAL; + } + else { /* loop back the packet */ b0->error = rt->errors[BFD_UDP_ERROR_NONE]; @@ -1169,11 +1337,6 @@ bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt, } next0 = BFD_UDP_INPUT_NEXT_REPLY; } - else - { - b0->error = rt->errors[BFD_UDP_ERROR_BAD]; - next0 = BFD_UDP_INPUT_NEXT_NORMAL; - } vlib_set_next_frame_buffer (vm, rt, next0, bi0); @@ -1294,6 +1457,7 @@ bfd_udp_init (vlib_main_t * vm) mhash_init (&bfd_udp_main.bfd_session_idx_by_bfd_key, sizeof (uword), sizeof (bfd_udp_key_t)); bfd_udp_main.bfd_main = &bfd_main; + bfd_udp_main.vnet_main = vnet_get_main (); udp_register_dst_port (vm, UDP_DST_PORT_bfd4, bfd_udp4_input_node.index, 1); udp_register_dst_port (vm, UDP_DST_PORT_bfd6, bfd_udp6_input_node.index, 0); udp_register_dst_port (vm, UDP_DST_PORT_bfd_echo4, diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h index 502e2314707..ce2ee3cbf48 100644 --- a/src/vnet/bfd/bfd_udp.h +++ b/src/vnet/bfd/bfd_udp.h @@ -22,6 +22,7 @@ #include #include #include +#include /* *INDENT-OFF* */ typedef CLIB_PACKED (struct { @@ -49,10 +50,17 @@ typedef struct struct bfd_session_s; -void bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, - const struct bfd_session_s *bs); -void bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, - const struct bfd_session_s *bs); +int bfd_add_udp4_transport (vlib_main_t * vm, vlib_buffer_t * b, + const struct bfd_session_s *bs, int is_echo); +int bfd_add_udp6_transport (vlib_main_t * vm, vlib_buffer_t * b, + const struct bfd_session_s *bs, int is_echo); + +/** + * @brief check if the bfd udp layer is echo-capable at this time + * + * @return 1 if available, 0 otherwise + */ +int bfd_udp_is_echo_available (bfd_transport_e transport); #endif /* __included_bfd_udp_h__ */ -- cgit 1.2.3-korg