From 98d82ca04ba438cd2ba3c03de6e1e82e4786cd83 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Tue, 2 Feb 2021 13:25:40 +0100 Subject: nat: fix EI hairpinning thread safety Avoid doing inter-thread reads without locks by doing a handoff before destination address rewrite. Destination address is read from a session which is possibly owned by a different thread. By splitting the work in two parts with a handoff in the middle, we can do both in a thread safe way. Type: improvement Signed-off-by: Klement Sekera Change-Id: I1c50d188393a610f5564fa230c75771a8065f273 --- src/plugins/nat/nat44-ei/nat44_ei_in2out.c | 266 ++++++++++++++++++++++++++--- 1 file changed, 242 insertions(+), 24 deletions(-) (limited to 'src/plugins/nat/nat44-ei') diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c index ad0007bb8df..54ed1a92e8b 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c +++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c @@ -35,6 +35,7 @@ #include #include #include +#include typedef struct { @@ -108,9 +109,17 @@ typedef enum SNAT_IN2OUT_NEXT_DROP, SNAT_IN2OUT_NEXT_ICMP_ERROR, SNAT_IN2OUT_NEXT_SLOW_PATH, + SNAT_IN2OUT_NEXT_HAIRPINNING_HANDOFF, SNAT_IN2OUT_N_NEXT, } snat_in2out_next_t; +typedef enum +{ + NAT44_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP, + NAT44_IN2OUT_HAIRPINNING_FINISH_NEXT_LOOKUP, + NAT44_IN2OUT_HAIRPINNING_FINISH_N_NEXT, +} nat44_in2out_hairpinnig_finish_next_t; + static inline int snat_not_translate (snat_main_t * sm, vlib_node_runtime_t * node, u32 sw_if_index0, ip4_header_t * ip0, u32 proto0, @@ -167,13 +176,11 @@ nat_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip0, if (!clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0)) { /* hairpinning */ - /* *INDENT-OFF* */ - pool_foreach (i, sm->output_feature_interfaces) - { - if ((nat_interface_is_inside(i)) && (sw_if_index == i->sw_if_index)) - return 0; - } - /* *INDENT-ON* */ + pool_foreach (i, sm->output_feature_interfaces) + { + if ((nat_interface_is_inside (i)) && (sw_if_index == i->sw_if_index)) + return 0; + } return 1; } @@ -328,7 +335,6 @@ slow_path (snat_main_t * sm, vlib_buffer_t * b0, s->out2in.fib_index = sm->outside_fibs[0].fib_index; break; default: - /* *INDENT-OFF* */ vec_foreach (outside_fib, sm->outside_fibs) { fei = fib_table_lookup (outside_fib->fib_index, &pfx); @@ -341,7 +347,6 @@ slow_path (snat_main_t * sm, vlib_buffer_t * b0, } } } - /* *INDENT-ON* */ break; } s->ext_host_addr.as_u32 = ip0->dst_address.as_u32; @@ -660,6 +665,7 @@ icmp_in2out (snat_main_t *sm, vlib_buffer_t *b0, ip4_header_t *ip0, ip_csum_t sum0; u16 checksum0; u32 next0_tmp; + u32 required_thread_index = thread_index; echo0 = (icmp_echo_header_t *) (icmp0 + 1); @@ -784,8 +790,14 @@ icmp_in2out (snat_main_t *sm, vlib_buffer_t *b0, ip4_header_t *ip0, if (vnet_buffer (b0)->sw_if_index[VLIB_TX] == ~0) { - if (0 != snat_icmp_hairpinning (sm, b0, ip0, icmp0)) + if (0 != snat_icmp_hairpinning (sm, b0, thread_index, ip0, icmp0, + &required_thread_index)) vnet_buffer (b0)->sw_if_index[VLIB_TX] = fib_index; + if (thread_index != required_thread_index) + { + vnet_buffer (b0)->snat.required_thread_index = required_thread_index; + next0 = SNAT_IN2OUT_NEXT_HAIRPINNING_HANDOFF; + } } out: @@ -1623,7 +1635,6 @@ VLIB_NODE_FN (snat_in2out_node) (vlib_main_t * vm, 0); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (snat_in2out_node) = { .name = "nat44-in2out", .vector_size = sizeof (u32), @@ -1643,9 +1654,9 @@ VLIB_REGISTER_NODE (snat_in2out_node) = { [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [SNAT_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-in2out-hairpinning-handoff-ip4-lookup", }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (snat_in2out_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1655,7 +1666,6 @@ VLIB_NODE_FN (snat_in2out_output_node) (vlib_main_t * vm, 1); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (snat_in2out_output_node) = { .name = "nat44-in2out-output", .vector_size = sizeof (u32), @@ -1675,9 +1685,9 @@ VLIB_REGISTER_NODE (snat_in2out_output_node) = { [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [SNAT_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-in2out-hairpinning-handoff-interface-output", }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (snat_in2out_slowpath_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1687,7 +1697,6 @@ VLIB_NODE_FN (snat_in2out_slowpath_node) (vlib_main_t * vm, 0); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = { .name = "nat44-in2out-slowpath", .vector_size = sizeof (u32), @@ -1707,9 +1716,9 @@ VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = { [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [SNAT_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-in2out-hairpinning-handoff-ip4-lookup", }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (snat_in2out_output_slowpath_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1719,7 +1728,6 @@ VLIB_NODE_FN (snat_in2out_output_slowpath_node) (vlib_main_t * vm, 1); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = { .name = "nat44-in2out-output-slowpath", .vector_size = sizeof (u32), @@ -1739,15 +1747,16 @@ VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = { [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [SNAT_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-in2out-hairpinning-handoff-interface-output", }, }; -/* *INDENT-ON* */ VLIB_NODE_FN (snat_in2out_fast_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_left_from, *from, *to_next; + u32 thread_index = vm->thread_index; snat_in2out_next_t next_index; snat_main_t *sm = &snat_main; int is_hairpinning = 0; @@ -1780,7 +1789,7 @@ VLIB_NODE_FN (snat_in2out_fast_node) (vlib_main_t * vm, ip4_address_t sm0_addr; u16 sm0_port; u32 sm0_fib_index; - + u32 required_thread_index = thread_index; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; @@ -1896,8 +1905,16 @@ VLIB_NODE_FN (snat_in2out_fast_node) (vlib_main_t * vm, } /* Hairpinning */ - is_hairpinning = snat_hairpinning (vm, node, sm, b0, ip0, udp0, tcp0, - proto0, 0 /* do_trace */); + is_hairpinning = snat_hairpinning ( + vm, node, sm, thread_index, b0, ip0, udp0, tcp0, proto0, + 0 /* do_trace */, &required_thread_index); + + if (thread_index != required_thread_index) + { + vnet_buffer (b0)->snat.required_thread_index = + required_thread_index; + next0 = SNAT_IN2OUT_NEXT_HAIRPINNING_HANDOFF; + } trace0: if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) @@ -1930,8 +1947,6 @@ VLIB_NODE_FN (snat_in2out_fast_node) (vlib_main_t * vm, return frame->n_vectors; } - -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (snat_in2out_fast_node) = { .name = "nat44-in2out-fast", .vector_size = sizeof (u32), @@ -1951,9 +1966,212 @@ VLIB_REGISTER_NODE (snat_in2out_fast_node) = { [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [SNAT_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-in2out-hairpinning-handoff-ip4-lookup", + }, +}; + +VLIB_NODE_FN (nat44_in2out_hairpinning_handoff_ip4_lookup_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return nat44_hairpinning_handoff_fn_inline ( + vm, node, frame, + snat_main.nat44_in2out_hairpinning_finish_ip4_lookup_node_fq_index); +} + +VLIB_REGISTER_NODE (nat44_in2out_hairpinning_handoff_ip4_lookup_node) = { + .name = "nat44-in2out-hairpinning-handoff-ip4-lookup", + .vector_size = sizeof (u32), + .n_errors = ARRAY_LEN(nat44_hairpinning_handoff_error_strings), + .error_strings = nat44_hairpinning_handoff_error_strings, + .format_trace = format_nat44_hairpinning_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FN (nat44_in2out_hairpinning_handoff_interface_output_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return nat44_hairpinning_handoff_fn_inline ( + vm, node, frame, + snat_main.nat44_in2out_hairpinning_finish_interface_output_node_fq_index); +} + +VLIB_REGISTER_NODE (nat44_in2out_hairpinning_handoff_interface_output_node) = { + .name = "nat44-in2out-hairpinning-handoff-interface-output", + .vector_size = sizeof (u32), + .n_errors = ARRAY_LEN(nat44_hairpinning_handoff_error_strings), + .error_strings = nat44_hairpinning_handoff_error_strings, + .format_trace = format_nat44_hairpinning_handoff_trace, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + +static_always_inline int +nat44_in2out_hairpinning_finish_inline (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, *to_next; + u32 thread_index = vm->thread_index; + snat_in2out_next_t next_index; + snat_main_t *sm = &snat_main; + int is_hairpinning = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + ip4_header_t *ip0; + udp_header_t *udp0; + tcp_header_t *tcp0; + icmp46_header_t *icmp0; + u32 proto0; + u32 required_thread_index = thread_index; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + next0 = NAT44_IN2OUT_HAIRPINNING_FINISH_NEXT_LOOKUP; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + proto0 = ip_proto_to_nat_proto (ip0->protocol); + + switch (proto0) + { + case NAT_PROTOCOL_TCP: + // fallthrough + case NAT_PROTOCOL_UDP: + is_hairpinning = snat_hairpinning ( + vm, node, sm, thread_index, b0, ip0, udp0, tcp0, proto0, + 0 /* do_trace */, &required_thread_index); + break; + case NAT_PROTOCOL_ICMP: + is_hairpinning = + (0 == snat_icmp_hairpinning (sm, b0, thread_index, ip0, icmp0, + &required_thread_index)); + break; + case NAT_PROTOCOL_OTHER: + // this should never happen + next0 = NAT44_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP; + break; + } + + if (thread_index != required_thread_index) + { + // but we already did a handoff ... + next0 = NAT44_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->is_hairpinning = is_hairpinning; + } + + if (next0 != NAT44_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP) + { + vlib_increment_simple_counter ( + &sm->counters.fastpath.in2out.other, sw_if_index0, + vm->thread_index, 1); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_NODE_FN (nat44_in2out_hairpinning_finish_ip4_lookup_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return nat44_in2out_hairpinning_finish_inline (vm, node, frame); +} + +VLIB_REGISTER_NODE (nat44_in2out_hairpinning_finish_ip4_lookup_node) = { + .name = "nat44-in2out-hairpinning-finish-ip4-lookup", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_fast_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = NAT44_IN2OUT_HAIRPINNING_FINISH_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [NAT44_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP] = "error-drop", + [NAT44_IN2OUT_HAIRPINNING_FINISH_NEXT_LOOKUP] = "ip4-lookup", + }, +}; + +VLIB_NODE_FN (nat44_in2out_hairpinning_finish_interface_output_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return nat44_in2out_hairpinning_finish_inline (vm, node, frame); +} + +VLIB_REGISTER_NODE (nat44_in2out_hairpinning_finish_interface_output_node) = { + .name = "nat44-in2out-hairpinning-finish-interface-output", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_fast_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = NAT44_IN2OUT_HAIRPINNING_FINISH_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [NAT44_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP] = "error-drop", + [NAT44_IN2OUT_HAIRPINNING_FINISH_NEXT_LOOKUP] = "interface-output", }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON -- cgit 1.2.3-korg