diff options
-rw-r--r-- | src/plugins/nat/in2out_ed.c | 1202 | ||||
-rw-r--r-- | src/plugins/nat/nat.c | 602 | ||||
-rw-r--r-- | src/plugins/nat/nat.h | 178 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ei/nat44_ei_ha.h | 2 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ei/nat44_ei_in2out.c | 8 | ||||
-rw-r--r-- | src/plugins/nat/nat44/ed_inlines.h | 116 | ||||
-rw-r--r-- | src/plugins/nat/nat44_classify.c | 31 | ||||
-rw-r--r-- | src/plugins/nat/nat44_cli.c | 4 | ||||
-rw-r--r-- | src/plugins/nat/nat44_hairpinning.c | 261 | ||||
-rw-r--r-- | src/plugins/nat/nat44_handoff.c | 13 | ||||
-rw-r--r-- | src/plugins/nat/nat_format.c | 15 | ||||
-rw-r--r-- | src/plugins/nat/nat_inlines.h | 125 | ||||
-rw-r--r-- | src/plugins/nat/out2in_ed.c | 1255 | ||||
-rw-r--r-- | src/plugins/nat/test/test_nat44_ed.py | 8 | ||||
-rw-r--r-- | src/vnet/buffer.h | 8 |
15 files changed, 1865 insertions, 1963 deletions
diff --git a/src/plugins/nat/in2out_ed.c b/src/plugins/nat/in2out_ed.c index ed9ad04bdb2..9dc68576fd4 100644 --- a/src/plugins/nat/in2out_ed.c +++ b/src/plugins/nat/in2out_ed.c @@ -48,7 +48,13 @@ typedef struct u32 sw_if_index; u32 next_index; u32 session_index; - u32 is_slow_path; + nat_translation_error_e translation_error; + nat_6t_flow_t i2of; + nat_6t_flow_t o2if; + clib_bihash_kv_16_8_t search_key; + u8 is_slow_path; + u8 translation_via_i2of; + u8 lookup_skipped; } nat_in2out_ed_trace_t; static u8 * @@ -65,144 +71,39 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args) s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag, t->sw_if_index, t->next_index, t->session_index); - - return s; -} - -#ifndef CLIB_MARCH_VARIANT -int -nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg) -{ - snat_main_t *sm = &snat_main; - nat44_is_idle_session_ctx_t *ctx = arg; - snat_session_t *s; - u64 sess_timeout_time; - u8 proto; - u16 r_port, l_port; - ip4_address_t *l_addr, *r_addr; - u32 fib_index; - clib_bihash_kv_16_8_t ed_kv; - int i; - snat_address_t *a; - snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data, - ctx->thread_index); - - ASSERT (ctx->thread_index == ed_value_get_thread_index (kv)); - s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (kv)); - sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s); - if (ctx->now >= sess_timeout_time) + if (~0 != t->session_index) { - if (is_fwd_bypass_session (s)) - goto delete; - - l_addr = &s->out2in.addr; - r_addr = &s->ext_host_addr; - fib_index = s->out2in.fib_index; - if (snat_is_unk_proto_session (s)) + s = format (s, ", translation result '%U' via %s", + format_nat_ed_translation_error, t->translation_error, + t->translation_via_i2of ? "i2of" : "o2if"); + s = format (s, "\n i2of %U", format_nat_6t_flow, &t->i2of); + s = format (s, "\n o2if %U", format_nat_6t_flow, &t->o2if); + } + if (!t->is_slow_path) + { + if (t->lookup_skipped) { - proto = s->in2out.port; - r_port = 0; - l_port = 0; + s = format (s, "\n lookup skipped - cached session index used"); } else { - proto = nat_proto_to_ip_proto (s->nat_proto); - l_port = s->out2in.port; - r_port = s->ext_host_port; - } - init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto); - if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0)) - nat_elog_warn ("out2in_ed key del failed"); - - if (snat_is_unk_proto_session (s)) - goto delete; - - nat_ipfix_logging_nat44_ses_delete (ctx->thread_index, - s->in2out.addr.as_u32, - s->out2in.addr.as_u32, - s->nat_proto, - s->in2out.port, - s->out2in.port, - s->in2out.fib_index); - - nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index, - &s->in2out.addr, s->in2out.port, - &s->ext_host_nat_addr, s->ext_host_nat_port, - &s->out2in.addr, s->out2in.port, - &s->ext_host_addr, s->ext_host_port, - s->nat_proto, is_twice_nat_session (s)); - - if (is_twice_nat_session (s)) - { - for (i = 0; i < vec_len (sm->twice_nat_addresses); i++) - { - // TODO FIXME this is obviously broken - which address should be - // freed here?! - a = sm->twice_nat_addresses + i; - if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32) - { - snat_free_outside_address_and_port (sm->twice_nat_addresses, - ctx->thread_index, - &s->ext_host_nat_addr, - s->ext_host_nat_port, - s->nat_proto); - break; - } - } + s = format (s, "\n search key %U", format_ed_session_kvp, + &t->search_key); } - - if (snat_is_session_static (s)) - goto delete; - - snat_free_outside_address_and_port (sm->addresses, ctx->thread_index, - &s->out2in.addr, s->out2in.port, - s->nat_proto); - delete: - nat_ed_session_delete (sm, s, ctx->thread_index, 1); - return 1; } - return 0; -} -#endif - -static inline u32 -icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0, - ip4_header_t * ip0, icmp46_header_t * icmp0, - u32 sw_if_index0, u32 rx_fib_index0, - vlib_node_runtime_t * node, u32 next0, f64 now, - u32 thread_index, snat_session_t ** p_s0) -{ - vlib_main_t *vm = vlib_get_main (); - - next0 = icmp_in2out (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, thread_index, p_s0, 0); - snat_session_t *s0 = *p_s0; - if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0)) - { - /* Accounting */ - nat44_session_update_counters (s0, now, - vlib_buffer_length_in_chain - (vm, b0), thread_index); - /* Per-user LRU list maintenance */ - nat44_session_update_lru (sm, s0, thread_index); - } - return next0; + return s; } static int -nat_ed_alloc_addr_and_port (snat_main_t * sm, u32 rx_fib_index, - u32 nat_proto, u32 thread_index, - ip4_address_t r_addr, u16 r_port, u8 proto, - u16 port_per_thread, u32 snat_thread_index, - snat_session_t * s, - ip4_address_t * outside_addr, - u16 * outside_port, - clib_bihash_kv_16_8_t * out2in_ed_kv) +nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto, + u32 thread_index, ip4_address_t r_addr, u16 r_port, + u8 proto, u16 port_per_thread, + u32 snat_thread_index, snat_session_t *s, + ip4_address_t *outside_addr, u16 *outside_port) { int i; snat_address_t *a, *ga = 0; - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024; @@ -211,48 +112,50 @@ nat_ed_alloc_addr_and_port (snat_main_t * sm, u32 rx_fib_index, a = sm->addresses + i; switch (nat_proto) { -#define _(N, j, n, unused) \ - case NAT_PROTOCOL_##N: \ - if (a->fib_index == rx_fib_index) \ - { \ - /* first try port suggested by caller */ \ - u16 port = clib_net_to_host_u16 (*outside_port); \ - u16 port_offset = port - port_thread_offset; \ - if (port <= port_thread_offset || \ - port > port_thread_offset + port_per_thread) \ - { \ - /* need to pick a different port, suggested port doesn't fit in \ - * this thread's port range */ \ - port_offset = snat_random_port (0, port_per_thread - 1); \ - port = port_thread_offset + port_offset; \ - } \ - u16 attempts = ED_PORT_ALLOC_ATTEMPTS; \ - do \ - { \ - init_ed_kv (out2in_ed_kv, a->addr, clib_host_to_net_u16 (port), \ - r_addr, r_port, s->out2in.fib_index, proto, \ - thread_index, s - tsm->sessions); \ - int rv = clib_bihash_add_del_16_8 (&sm->out2in_ed, out2in_ed_kv, \ - 2 /* is_add */); \ - if (0 == rv) \ - { \ - ++a->busy_##n##_port_refcounts[port]; \ - a->busy_##n##_ports_per_thread[thread_index]++; \ - a->busy_##n##_ports++; \ - *outside_addr = a->addr; \ - *outside_port = clib_host_to_net_u16 (port); \ - return 0; \ - } \ - port_offset = snat_random_port (0, port_per_thread - 1); \ - port = port_thread_offset + port_offset; \ - --attempts; \ - } \ - while (attempts > 0); \ - } \ - else if (a->fib_index == ~0) \ - { \ - ga = a; \ - } \ +#define _(N, j, n, unused) \ + case NAT_PROTOCOL_##N: \ + if (a->fib_index == rx_fib_index) \ + { \ + s->o2i.match.daddr = a->addr; \ + /* first try port suggested by caller */ \ + u16 port = clib_net_to_host_u16 (*outside_port); \ + u16 port_offset = port - port_thread_offset; \ + if (port <= port_thread_offset || \ + port > port_thread_offset + port_per_thread) \ + { \ + /* need to pick a different port, suggested port doesn't fit in \ + * this thread's port range */ \ + port_offset = snat_random_port (0, port_per_thread - 1); \ + port = port_thread_offset + port_offset; \ + } \ + u16 attempts = ED_PORT_ALLOC_ATTEMPTS; \ + do \ + { \ + if (NAT_PROTOCOL_ICMP == nat_proto) \ + { \ + s->o2i.match.sport = clib_host_to_net_u16 (port); \ + } \ + s->o2i.match.dport = clib_host_to_net_u16 (port); \ + if (0 == \ + nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2)) \ + { \ + ++a->busy_##n##_port_refcounts[port]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *outside_addr = a->addr; \ + *outside_port = clib_host_to_net_u16 (port); \ + return 0; \ + } \ + port_offset = snat_random_port (0, port_per_thread - 1); \ + port = port_thread_offset + port_offset; \ + --attempts; \ + } \ + while (attempts > 0); \ + } \ + else if (a->fib_index == ~0) \ + { \ + ga = a; \ + } \ break; foreach_nat_protocol; @@ -311,28 +214,51 @@ nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr) return ~0; } +static_always_inline int +nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr, + u16 match_port, nat_protocol_t match_protocol, + u32 match_fib_index, ip4_address_t *daddr, + u16 *dport) +{ + clib_bihash_kv_8_8_t kv, value; + init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + { + /* Try address only mapping */ + init_nat_k (&kv, match_addr, 0, 0, 0); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, + &value)) + return 0; + } + + snat_static_mapping_t *m = + pool_elt_at_index (sm->static_mappings, value.value); + *daddr = m->local_addr; + if (dport) + { + /* Address only mapping doesn't change port */ + *dport = is_addr_only_static_mapping (m) ? match_port : m->local_port; + } + return 1; +} + static u32 -slow_path_ed (snat_main_t * sm, - vlib_buffer_t * b, - ip4_address_t l_addr, - ip4_address_t r_addr, - u16 l_port, - u16 r_port, - u8 proto, - u32 rx_fib_index, - snat_session_t ** sessionp, - vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now) +slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, + ip4_address_t r_addr, u16 l_port, u16 r_port, u8 proto, + u32 rx_fib_index, snat_session_t **sessionp, + vlib_node_runtime_t *node, u32 next, u32 thread_index, f64 now) { snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - clib_bihash_kv_16_8_t out2in_ed_kv; - nat44_is_idle_session_ctx_t ctx; ip4_address_t outside_addr; u16 outside_port; - u8 identity_nat; + u32 outside_fib_index; + u8 is_identity_nat; u32 nat_proto = ip_proto_to_nat_proto (proto); snat_session_t *s = NULL; lb_nat_type_t lb = 0; + ip4_address_t daddr = r_addr; + u16 dport = r_port; if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP)) { @@ -358,43 +284,81 @@ slow_path_ed (snat_main_t * sm, } } + outside_fib_index = sm->outside_fib_index; + + switch (vec_len (sm->outside_fibs)) + { + case 0: + outside_fib_index = sm->outside_fib_index; + break; + case 1: + outside_fib_index = sm->outside_fibs[0].fib_index; + break; + default: + outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr); + break; + } + ip4_address_t sm_addr; u16 sm_port; u32 sm_fib_index; /* First try to match static mapping by local address and port */ - if (snat_static_mapping_match - (sm, l_addr, l_port, rx_fib_index, nat_proto, &sm_addr, &sm_port, - &sm_fib_index, 0, 0, 0, &lb, 0, &identity_nat, 0)) + int is_sm; + if (snat_static_mapping_match (sm, l_addr, l_port, rx_fib_index, nat_proto, + &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0, + &lb, 0, &is_identity_nat, 0)) + { + is_sm = 0; + } + else + { + is_sm = 1; + } + + if (PREDICT_FALSE (is_sm && is_identity_nat)) + { + *sessionp = NULL; + return next; + } + + s = nat_ed_session_alloc (sm, thread_index, now, proto); + ASSERT (s); + + if (!is_sm) { - s = nat_ed_session_alloc (sm, thread_index, now, proto); - ASSERT (s); s->in2out.addr = l_addr; s->in2out.port = l_port; s->nat_proto = nat_proto; s->in2out.fib_index = rx_fib_index; - s->out2in.fib_index = sm->outside_fib_index; + s->out2in.fib_index = outside_fib_index; + + // suggest using local port to allocation function + outside_port = l_port; - switch (vec_len (sm->outside_fibs)) + // hairpinning? + int is_hairpinning = nat44_ed_external_sm_lookup ( + sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport); + s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; + + // destination addr/port updated with real values in + // nat_ed_alloc_addr_and_port + nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0, + s->out2in.fib_index, proto); + nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) { - case 0: - s->out2in.fib_index = sm->outside_fib_index; - break; - case 1: - s->out2in.fib_index = sm->outside_fibs[0].fib_index; - break; - default: - s->out2in.fib_index = nat_outside_fib_index_lookup (sm, r_addr); - break; + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port); } + else + { + nat_6t_flow_dport_rewrite_set (&s->o2i, l_port); + } + nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); - /* Try to create dynamic translation */ - outside_port = l_port; // suggest using local port to allocation function - if (nat_ed_alloc_addr_and_port (sm, rx_fib_index, nat_proto, - thread_index, r_addr, r_port, proto, - sm->port_per_thread, - tsm->snat_thread_index, s, - &outside_addr, - &outside_port, &out2in_ed_kv)) + if (nat_ed_alloc_addr_and_port ( + sm, rx_fib_index, nat_proto, thread_index, daddr, dport, proto, + sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr, + &outside_port)) { nat_elog_notice ("addresses exhausted"); b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS]; @@ -406,42 +370,40 @@ slow_path_ed (snat_main_t * sm, } else { - if (PREDICT_FALSE (identity_nat)) - { - *sessionp = NULL; - return next; - } - s = nat_ed_session_alloc (sm, thread_index, now, proto); - ASSERT (s); - s->out2in.addr = sm_addr; - s->out2in.port = sm_port; + // static mapping + s->out2in.addr = outside_addr = sm_addr; + s->out2in.port = outside_port = sm_port; s->in2out.addr = l_addr; s->in2out.port = l_port; s->nat_proto = nat_proto; s->in2out.fib_index = rx_fib_index; - s->out2in.fib_index = sm->outside_fib_index; - switch (vec_len (sm->outside_fibs)) - { - case 0: - s->out2in.fib_index = sm->outside_fib_index; - break; - case 1: - s->out2in.fib_index = sm->outside_fibs[0].fib_index; - break; - default: - s->out2in.fib_index = nat_outside_fib_index_lookup (sm, r_addr); - break; - } - + s->out2in.fib_index = outside_fib_index; s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; - init_ed_kv (&out2in_ed_kv, sm_addr, sm_port, r_addr, r_port, - s->out2in.fib_index, proto, thread_index, - s - tsm->sessions); - if (clib_bihash_add_or_overwrite_stale_16_8 - (&sm->out2in_ed, &out2in_ed_kv, nat44_o2i_ed_is_idle_session_cb, - &ctx)) - nat_elog_notice ("out2in-ed key add failed"); + // hairpinning? + int is_hairpinning = nat44_ed_external_sm_lookup ( + sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport); + s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; + + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr, + sm_port, s->out2in.fib_index, proto); + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port); + } + else + { + nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr, + sm_port, s->out2in.fib_index, proto); + nat_6t_flow_dport_rewrite_set (&s->o2i, l_port); + } + nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2)) + { + nat_elog_notice ("out2in key add failed"); + goto error; + } } if (lb) @@ -450,17 +412,26 @@ slow_path_ed (snat_main_t * sm, s->ext_host_addr = r_addr; s->ext_host_port = r_port; - clib_bihash_kv_16_8_t in2out_ed_kv; - init_ed_kv (&in2out_ed_kv, l_addr, l_port, r_addr, r_port, rx_fib_index, - proto, thread_index, s - tsm->sessions); - ctx.now = now; - ctx.thread_index = thread_index; - if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &in2out_ed_kv, - nat44_i2o_ed_is_idle_session_cb, - &ctx)) - nat_elog_notice ("in2out-ed key add failed"); + nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port, + rx_fib_index, proto); + nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32); + nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port); + } + else + { + nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port); + nat_6t_flow_dport_rewrite_set (&s->i2o, dport); + } + nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); - *sessionp = s; + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("in2out key add failed"); + goto error; + } /* log NAT event */ nat_ipfix_logging_nat44_ses_create (thread_index, @@ -479,7 +450,21 @@ slow_path_ed (snat_main_t * sm, per_vrf_sessions_register_session (s, thread_index); + *sessionp = s; return next; +error: + if (s) + { + if (!is_sm) + { + snat_free_outside_address_and_port (sm->addresses, thread_index, + &outside_addr, outside_port, + nat_proto); + } + nat_ed_session_delete (sm, s, thread_index, 1); + } + *sessionp = s = NULL; + return NAT_NEXT_DROP; } static_always_inline int @@ -494,7 +479,7 @@ nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node, udp->src_port, sm->outside_fib_index, ip->protocol); /* NAT packet aimed at external address if has active sessions */ - if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value)) + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { /* or is static mappings */ ip4_address_t placeholder_addr; @@ -530,8 +515,15 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, if (ip->protocol == IP_PROTOCOL_ICMP) { - if (get_icmp_i2o_ed_key (b, ip, 0, ~0, ~0, 0, 0, 0, &kv)) + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, + &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol)) return 0; + init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport, + 0, lookup_protocol); } else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP) { @@ -545,12 +537,13 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, ip->protocol); } - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); + if (is_fwd_bypass_session (s)) { if (ip->protocol == IP_PROTOCOL_TCP) @@ -588,7 +581,7 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, /* src NAT check */ init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port, tx_fib_index, ip->protocol); - if (!clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); s = @@ -606,12 +599,13 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, /* dst NAT check */ init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port, rx_fib_index, ip->protocol); - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); + if (is_fwd_bypass_session (s)) return 0; @@ -629,141 +623,115 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, return 0; } -#ifndef CLIB_MARCH_VARIANT -u32 -icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node, - u32 thread_index, vlib_buffer_t * b, - ip4_header_t * ip, ip4_address_t * addr, - u16 * port, u32 * fib_index, nat_protocol_t * proto, - void *d, void *e, u8 * dont_translate) +static inline u32 +icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + icmp46_header_t *icmp, u32 sw_if_index, + u32 rx_fib_index, vlib_node_runtime_t *node, + u32 next, f64 now, u32 thread_index, + nat_protocol_t nat_proto, snat_session_t **s_p) { - u32 sw_if_index; - u32 rx_fib_index; - clib_bihash_kv_16_8_t kv, value; - u32 next = ~0; + vlib_main_t *vm = vlib_get_main (); + u16 checksum; int err; snat_session_t *s = NULL; - u16 l_port = 0, r_port = 0; // initialize to workaround gcc warning - vlib_main_t *vm = vlib_get_main (); - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - *dont_translate = 0; - f64 now = vlib_time_now (vm); - - sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; - rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + u8 lookup_protocol = ip->protocol; + u16 lookup_sport, lookup_dport; + ip4_address_t lookup_saddr, lookup_daddr; - err = - get_icmp_i2o_ed_key (b, ip, rx_fib_index, ~0, ~0, proto, &l_port, - &r_port, &kv); + err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, + &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol); if (err != 0) { b->error = node->errors[err]; - next = NAT_NEXT_DROP; - goto out; + return NAT_NEXT_DROP; } - if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) + if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0) { - if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0) + if (PREDICT_FALSE (nat44_ed_not_translate_output_feature ( + sm, ip, lookup_sport, lookup_dport, thread_index, sw_if_index, + vnet_buffer (b)->sw_if_index[VLIB_TX], now))) { - if (PREDICT_FALSE - (nat44_ed_not_translate_output_feature - (sm, ip, l_port, r_port, thread_index, - sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_TX], now))) - { - *dont_translate = 1; - goto out; - } + return next; } - else + } + else + { + if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index, ip, + NAT_PROTOCOL_ICMP, + rx_fib_index, thread_index))) { - if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index, - ip, NAT_PROTOCOL_ICMP, - rx_fib_index, - thread_index))) - { - *dont_translate = 1; - goto out; - } + return next; } + } - if (PREDICT_FALSE - (icmp_type_is_error_message - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))) - { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; - next = NAT_NEXT_DROP; - goto out; - } + if (PREDICT_FALSE (icmp_type_is_error_message ( + vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; + return NAT_NEXT_DROP; + } - next = - slow_path_ed (sm, b, ip->src_address, ip->dst_address, l_port, r_port, - ip->protocol, rx_fib_index, &s, node, next, - thread_index, vlib_time_now (vm)); + next = slow_path_ed (sm, b, ip->src_address, ip->dst_address, lookup_sport, + lookup_dport, ip->protocol, rx_fib_index, &s, node, + next, thread_index, vlib_time_now (vm)); - if (PREDICT_FALSE (next == NAT_NEXT_DROP)) - goto out; + if (NAT_NEXT_DROP == next) + goto out; - if (!s) - { - *dont_translate = 1; - goto out; - } - } - else + if (PREDICT_TRUE (!ip4_is_fragment (ip))) { - if (PREDICT_FALSE - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_request - && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_reply - && !icmp_type_is_error_message (vnet_buffer (b)->ip. - reass.icmp_type_or_tcp_flags))) + ip_csum_t sum = ip_incremental_checksum_buffer ( + vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b), + ntohs (ip->length) - ip4_header_bytes (ip), 0); + checksum = ~ip_csum_fold (sum); + if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff)) { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; next = NAT_NEXT_DROP; goto out; } - - ASSERT (thread_index == ed_value_get_thread_index (&value)); - s = - pool_elt_at_index (tsm->sessions, - ed_value_get_session_index (&value)); } + out: - if (s) + if (PREDICT_TRUE (next != NAT_NEXT_DROP && s)) { - *addr = s->out2in.addr; - *port = s->out2in.port; - *fib_index = s->out2in.fib_index; - } - if (d) - { - *(snat_session_t **) d = s; + /* Accounting */ + nat44_session_update_counters ( + s, now, vlib_buffer_length_in_chain (vm, b), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s, thread_index); } + *s_p = s; return next; } -#endif static snat_session_t * -nat44_ed_in2out_unknown_proto (snat_main_t * sm, - vlib_buffer_t * b, - ip4_header_t * ip, - u32 rx_fib_index, - u32 thread_index, - f64 now, - vlib_main_t * vm, vlib_node_runtime_t * node) +nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, + ip4_header_t *ip, u32 rx_fib_index, + u32 thread_index, f64 now, + vlib_main_t *vm, + vlib_node_runtime_t *node) { clib_bihash_kv_8_8_t kv, value; clib_bihash_kv_16_8_t s_kv, s_value; - snat_static_mapping_t *m; - u32 old_addr, new_addr = 0; - ip_csum_t sum; + snat_static_mapping_t *m = NULL; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - snat_session_t *s; + snat_session_t *s = NULL; u32 outside_fib_index = sm->outside_fib_index; int i; - u8 is_sm = 0; + ip4_address_t new_src_addr = { 0 }; + ip4_address_t new_dst_addr = ip->dst_address; + + if (PREDICT_FALSE ( + nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index))) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_ipfix_logging_max_sessions (thread_index, + sm->max_translations_per_thread); + nat_elog_notice ("maximum sessions exceeded"); + return 0; + } switch (vec_len (sm->outside_fibs)) { @@ -777,112 +745,105 @@ nat44_ed_in2out_unknown_proto (snat_main_t * sm, outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address); break; } - old_addr = ip->src_address.as_u32; - init_ed_k (&s_kv, ip->src_address, 0, ip->dst_address, 0, rx_fib_index, - ip->protocol); + init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0); - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value)) + /* Try to find static mapping first */ + if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value)) { - ASSERT (thread_index == ed_value_get_thread_index (&s_value)); - s = - pool_elt_at_index (tsm->sessions, - ed_value_get_session_index (&s_value)); - new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32; + m = pool_elt_at_index (sm->static_mappings, value.value); + new_src_addr = m->external_addr; } else { - if (PREDICT_FALSE - (nat44_ed_maximum_sessions_exceeded - (sm, rx_fib_index, thread_index))) + pool_foreach (s, tsm->sessions) { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; - nat_ipfix_logging_max_sessions (thread_index, - sm->max_translations_per_thread); - nat_elog_notice ("maximum sessions exceeded"); - return 0; + if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32) + { + init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0, + outside_fib_index, ip->protocol); + if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value)) + { + new_src_addr = s->out2in.addr; + } + break; + } } - init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0); - - /* Try to find static mapping first */ - if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value)) - { - m = pool_elt_at_index (sm->static_mappings, value.value); - new_addr = ip->src_address.as_u32 = m->external_addr.as_u32; - is_sm = 1; - goto create_ses; - } - else + if (!new_src_addr.as_u32) { - /* *INDENT-OFF* */ - pool_foreach (s, tsm->sessions) { - if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32) - { - new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32; - - init_ed_k(&s_kv, s->out2in.addr, 0, ip->dst_address, 0, outside_fib_index, ip->protocol); - if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) - goto create_ses; - - break; - } - } - /* *INDENT-ON* */ - for (i = 0; i < vec_len (sm->addresses); i++) { init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0, outside_fib_index, ip->protocol); - if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) + if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value)) { - new_addr = ip->src_address.as_u32 = - sm->addresses[i].addr.as_u32; - goto create_ses; + new_src_addr = sm->addresses[i].addr; } } - return 0; } + } - create_ses: - s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); - if (!s) - { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; - nat_elog_warn ("create NAT session failed"); - return 0; - } + if (!new_src_addr.as_u32) + { + // could not allocate address for translation ... + return 0; + } - s->ext_host_addr.as_u32 = ip->dst_address.as_u32; - s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; - s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; - s->out2in.addr.as_u32 = new_addr; - s->out2in.fib_index = outside_fib_index; - s->in2out.addr.as_u32 = old_addr; - s->in2out.fib_index = rx_fib_index; - s->in2out.port = s->out2in.port = ip->protocol; - if (is_sm) - s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; - - /* Add to lookup tables */ - init_ed_kv (&s_kv, s->in2out.addr, 0, ip->dst_address, 0, rx_fib_index, - ip->protocol, thread_index, s - tsm->sessions); - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1)) - nat_elog_notice ("in2out key add failed"); - - init_ed_kv (&s_kv, s->out2in.addr, 0, ip->dst_address, 0, - outside_fib_index, ip->protocol, thread_index, - s - tsm->sessions); - if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) - nat_elog_notice ("out2in key add failed"); - - per_vrf_sessions_register_session (s, thread_index); + s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); + if (!s) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_elog_warn ("create NAT session failed"); + return 0; } - /* Update IP checksum */ - sum = ip->checksum; - sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address); - ip->checksum = ip_csum_fold (sum); + nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0, + ip->dst_address, 0, rx_fib_index, ip->protocol); + nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); + + // hairpinning? + int is_hairpinning = + nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER, + outside_fib_index, &new_dst_addr, NULL); + s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; + + nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); + + nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0, + outside_fib_index, ip->protocol); + nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32); + nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); + + s->ext_host_addr.as_u32 = ip->dst_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; + s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; + s->out2in.addr.as_u32 = new_src_addr.as_u32; + s->out2in.fib_index = outside_fib_index; + s->in2out.addr.as_u32 = ip->src_address.as_u32; + s->in2out.fib_index = rx_fib_index; + s->in2out.port = s->out2in.port = ip->protocol; + if (m) + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("in2out flow hash add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; + } + + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("out2in flow hash add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; + } + + per_vrf_sessions_register_session (s, thread_index); /* Accounting */ nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b), @@ -890,13 +851,6 @@ nat44_ed_in2out_unknown_proto (snat_main_t * sm, /* Per-user LRU list maintenance */ nat44_session_update_lru (sm, s, thread_index); - /* Hairpinning */ - if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0) - nat44_ed_hairpinning_unknown_proto (sm, b, ip); - - if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0) - vnet_buffer (b)->sw_if_index[VLIB_TX] = outside_fib_index; - return s; } @@ -924,15 +878,17 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, while (n_left_from > 0) { vlib_buffer_t *b0; - u32 sw_if_index0, rx_fib_index0, proto0, new_addr0, old_addr0, - iph_offset0 = 0; - u16 old_port0, new_port0; + u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0; + nat_protocol_t proto0; ip4_header_t *ip0; - udp_header_t *udp0; - tcp_header_t *tcp0; snat_session_t *s0 = 0; clib_bihash_kv_16_8_t kv0, value0; - ip_csum_t sum0; + nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS; + nat_6t_flow_t *f = 0; + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + int lookup_skipped = 0; b0 = *b; b++; @@ -973,16 +929,8 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, goto trace0; } - udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; proto0 = ip_proto_to_nat_proto (ip0->protocol); - if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) - { - next[0] = def_slow; - goto trace0; - } - if (is_output_feature) { if (PREDICT_FALSE @@ -993,27 +941,79 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) { - next[0] = def_slow; - goto trace0; + if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request && + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply && + !icmp_type_is_error_message ( + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)) + { + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + int err = nat_get_icmp_session_lookup_values ( + b0, ip0, &lookup_saddr, &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol); + if (err != 0) + { + b0->error = node->errors[err]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + } + else + { + lookup_protocol = ip0->protocol; + lookup_saddr = ip0->src_address; + lookup_daddr = ip0->dst_address; + lookup_sport = vnet_buffer (b0)->ip.reass.l4_src_port; + lookup_dport = vnet_buffer (b0)->ip.reass.l4_dst_port; } - init_ed_k (&kv0, ip0->src_address, - vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address, - vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, - ip0->protocol); + /* there might be a stashed index in vnet_buffer2 from handoff or + * classify node, see if it can be used */ + if (!pool_is_free_index (tsm->sessions, + vnet_buffer2 (b0)->nat.cached_session_index)) + { + s0 = pool_elt_at_index (tsm->sessions, + vnet_buffer2 (b0)->nat.cached_session_index); + if (PREDICT_TRUE ( + nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0) + // for some hairpinning cases there are two "i2i" flows instead + // of i2o and o2i as both hosts are on inside + || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING && + nat_6t_flow_match ( + &s0->o2i, b0, lookup_saddr, lookup_sport, lookup_daddr, + lookup_dport, lookup_protocol, rx_fib_index0)))) + { + /* yes, this is the droid we're looking for */ + lookup_skipped = 1; + goto skip_lookup; + } + s0 = NULL; + } + + init_ed_k (&kv0, ip0->src_address, lookup_sport, ip0->dst_address, + lookup_dport, rx_fib_index0, lookup_protocol); - // lookup for session - if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0)) + // lookup flow + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) { - // session does not exist go slow path + // flow does not exist go slow path next[0] = def_slow; goto trace0; } + ASSERT (thread_index == ed_value_get_thread_index (&value0)); s0 = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value0)); + skip_lookup: + if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index))) { // session is closed, go slow path @@ -1054,96 +1054,58 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, b0->flags |= VNET_BUFFER_F_IS_NATED; - if (!is_output_feature) - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; - - old_addr0 = ip0->src_address.as_u32; - new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32; - sum0 = ip0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - src_address); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - sum0 = - ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - ip0->checksum = ip_csum_fold (sum0); + if (nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) + { + f = &s0->i2o; + } + else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING && + nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) + { + f = &s0->o2i; + } + else + { + translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH; + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + goto trace0; + } - old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, f, proto0, is_output_feature))) + { + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + goto trace0; + } - if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + switch (proto0) { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->src_port = s0->out2in.port; - sum0 = tcp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = - ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_dst_port, - s0->ext_host_port, ip4_header_t, length); - tcp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - mss_clamping (sm->mss_clamping, tcp0, &sum0); - tcp0->checksum = ip_csum_fold (sum0); - } + case NAT_PROTOCOL_TCP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out_ed.tcp, thread_index, sw_if_index0, 1); nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); - } - else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment - && udp0->checksum) - { - new_port0 = udp0->src_port = s0->out2in.port; - sum0 = udp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = - ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, vnet_buffer (b0)->ip.reass.l4_dst_port, - s0->ext_host_port, ip4_header_t, length); - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - udp0->checksum = ip_csum_fold (sum0); + break; + case NAT_PROTOCOL_UDP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out_ed.udp, thread_index, sw_if_index0, 1); - } - else - { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->src_port = s0->out2in.port; - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - vlib_increment_simple_counter (&sm->counters.fastpath. - in2out_ed.udp, thread_index, - sw_if_index0, 1); - } + break; + case NAT_PROTOCOL_ICMP: + vlib_increment_simple_counter (&sm->counters.fastpath.in2out_ed.icmp, + thread_index, sw_if_index0, 1); + break; + case NAT_PROTOCOL_OTHER: + vlib_increment_simple_counter ( + &sm->counters.fastpath.in2out_ed.other, thread_index, sw_if_index0, + 1); + break; } /* Accounting */ @@ -1163,11 +1125,21 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, t->sw_if_index = sw_if_index0; t->next_index = next[0]; t->is_slow_path = 0; + t->translation_error = translation_error; + t->lookup_skipped = lookup_skipped; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); if (s0) - t->session_index = s0 - tsm->sessions; + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->translation_via_i2of = (&s0->i2o == f); + } else - t->session_index = ~0; + { + t->session_index = ~0; + } } if (next[0] == NAT_NEXT_DROP) @@ -1208,16 +1180,14 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, while (n_left_from > 0) { vlib_buffer_t *b0; - u32 sw_if_index0, rx_fib_index0, proto0, new_addr0, old_addr0, - iph_offset0 = 0; - u16 old_port0, new_port0; + u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0; + nat_protocol_t proto0; ip4_header_t *ip0; udp_header_t *udp0; - tcp_header_t *tcp0; icmp46_header_t *icmp0; snat_session_t *s0 = 0; clib_bihash_kv_16_8_t kv0, value0; - ip_csum_t sum0; + int translation_error = NAT_ED_TRNSL_ERR_SUCCESS; b0 = *b; @@ -1244,18 +1214,23 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, } udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; icmp0 = (icmp46_header_t *) udp0; proto0 = ip_proto_to_nat_proto (ip0->protocol); if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) { - s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0, - rx_fib_index0, - thread_index, now, vm, node); + s0 = nat44_ed_in2out_slowpath_unknown_proto ( + sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node); if (!s0) next[0] = NAT_NEXT_DROP; + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + { + goto trace0; + } + vlib_increment_simple_counter (&sm->counters.slowpath. in2out_ed.other, thread_index, sw_if_index0, 1); @@ -1264,10 +1239,17 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) { - next[0] = - icmp_in2out_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0, - rx_fib_index0, node, next[0], now, - thread_index, &s0); + next[0] = icmp_in2out_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next[0], + now, thread_index, proto0, &s0); + if (NAT_NEXT_DROP != next[0] && s0 && + NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + { + goto trace0; + } + vlib_increment_simple_counter (&sm->counters.slowpath. in2out_ed.icmp, thread_index, sw_if_index0, 1); @@ -1278,7 +1260,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, ip0->protocol); - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) { ASSERT (thread_index == ed_value_get_thread_index (&value0)); s0 = @@ -1342,95 +1324,27 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, b0->flags |= VNET_BUFFER_F_IS_NATED; - if (!is_output_feature) - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; - - old_addr0 = ip0->src_address.as_u32; - new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32; - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - src_address); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - ip0->checksum = ip_csum_fold (sum0); - - old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + { + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + s0 = NULL; + goto trace0; + } if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->src_port = s0->out2in.port; - sum0 = tcp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t, length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = - ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_dst_port, - s0->ext_host_port, ip4_header_t, length); - tcp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - mss_clamping (sm->mss_clamping, tcp0, &sum0); - tcp0->checksum = ip_csum_fold (sum0); - } vlib_increment_simple_counter (&sm->counters.slowpath.in2out_ed.tcp, thread_index, sw_if_index0, 1); nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); } - else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment - && udp0->checksum) + else { - new_port0 = udp0->src_port = s0->out2in.port; - sum0 = udp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_dst_port, - s0->ext_host_port, ip4_header_t, length); - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - udp0->checksum = ip_csum_fold (sum0); vlib_increment_simple_counter (&sm->counters.slowpath.in2out_ed.udp, thread_index, sw_if_index0, 1); } - else - { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->src_port = s0->out2in.port; - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - vlib_increment_simple_counter (&sm->counters.slowpath. - in2out_ed.udp, thread_index, - sw_if_index0, 1); - } - } /* Accounting */ nat44_session_update_counters (s0, now, @@ -1448,11 +1362,21 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, t->sw_if_index = sw_if_index0; t->next_index = next[0]; t->is_slow_path = 1; + t->translation_error = translation_error; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); if (s0) - t->session_index = s0 - tsm->sessions; + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->translation_via_i2of = 1; + } + else - t->session_index = ~0; + { + t->session_index = ~0; + } } if (next[0] == NAT_NEXT_DROP) diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 81af143ab2c..245689db45d 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -37,6 +37,7 @@ #include <nat/nat44-ei/nat44_ei.h> #include <vpp/app/version.h> +#include <nat/lib/nat_inlines.h> snat_main_t snat_main; @@ -152,11 +153,6 @@ VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = { .node_name = "nat44-hairpin-dst", .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), }; -VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_dst, static) = { - .arc_name = "ip4-unicast", - .node_name = "nat44-ed-hairpin-dst", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), -}; /* Hook up output features */ VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = { @@ -186,12 +182,6 @@ VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = { .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"), .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"), }; -VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_src, static) = { - .arc_name = "ip4-output", - .node_name = "nat44-ed-hairpin-src", - .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"), - .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"), -}; /* Hook up ip4-local features */ VNET_FEATURE_INIT (ip4_nat_hairpinning, static) = @@ -200,13 +190,6 @@ VNET_FEATURE_INIT (ip4_nat_hairpinning, static) = .node_name = "nat44-hairpinning", .runs_before = VNET_FEATURES("ip4-local-end-of-arc"), }; -VNET_FEATURE_INIT (ip4_nat44_ed_hairpinning, static) = -{ - .arc_name = "ip4-local", - .node_name = "nat44-ed-hairpinning", - .runs_before = VNET_FEATURES("ip4-local-end-of-arc"), -}; - VLIB_PLUGIN_REGISTER () = { .version = VPP_BUILD_VER, @@ -275,13 +258,13 @@ format_ed_session_kvp (u8 * s, va_list * args) u32 fib_index; split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port); - s = - format (s, - "local %U:%d remote %U:%d proto %U fib %d thread-index %u session-index %u", - format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port), - format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port), - format_ip_protocol, proto, fib_index, - ed_value_get_session_index (v), ed_value_get_thread_index (v)); + s = format (s, + "local %U:%d remote %U:%d proto %U fib %d thread-index %u " + "session-index %u", + format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port), + format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port), + format_ip_protocol, proto, fib_index, + ed_value_get_thread_index (v), ed_value_get_session_index (v)); return s; } @@ -291,39 +274,22 @@ nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index, u8 is_ha) { clib_bihash_kv_8_8_t kv; - u8 proto; - u16 r_port, l_port; - ip4_address_t *l_addr, *r_addr; - u32 fib_index = 0; - clib_bihash_kv_16_8_t ed_kv; snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data, thread_index); if (is_ed_session (s)) { per_vrf_sessions_unregister_session (s, thread_index); + + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0)) + nat_elog_warn ("flow hash del failed"); + + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0)) + nat_elog_warn ("flow hash del failed"); } if (is_fwd_bypass_session (s)) { - if (snat_is_unk_proto_session (s)) - { - init_ed_k (&ed_kv, s->in2out.addr, 0, s->ext_host_addr, 0, 0, - s->in2out.port); - } - else - { - l_port = s->in2out.port; - r_port = s->ext_host_port; - l_addr = &s->in2out.addr; - r_addr = &s->ext_host_addr; - proto = nat_proto_to_ip_proto (s->nat_proto); - fib_index = s->in2out.fib_index; - init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, - proto); - } - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)) - nat_elog_warn ("in2out_ed key del failed"); return; } @@ -333,36 +299,6 @@ nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index, if (is_affinity_sessions (s)) nat_affinity_unlock (s->ext_host_addr, s->out2in.addr, s->nat_proto, s->out2in.port); - l_addr = &s->out2in.addr; - r_addr = &s->ext_host_addr; - fib_index = s->out2in.fib_index; - if (snat_is_unk_proto_session (s)) - { - proto = s->in2out.port; - r_port = 0; - l_port = 0; - } - else - { - proto = nat_proto_to_ip_proto (s->nat_proto); - l_port = s->out2in.port; - r_port = s->ext_host_port; - } - init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto); - if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0)) - nat_elog_warn ("out2in_ed key del failed"); - l_addr = &s->in2out.addr; - fib_index = s->in2out.fib_index; - if (!snat_is_unk_proto_session (s)) - l_port = s->in2out.port; - if (is_twice_nat_session (s)) - { - r_addr = &s->ext_host_nat_addr; - r_port = s->ext_host_nat_port; - } - init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto); - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)) - nat_elog_warn ("in2out_ed key del failed"); if (!is_ha) nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index, @@ -1985,18 +1921,14 @@ feature_set: sw_if_index, 1, 0, 0); if (!is_inside) { - if (sm->endpoint_dependent) - vnet_feature_enable_disable ("ip4-local", - "nat44-ed-hairpinning", - sw_if_index, 1, 0, 0); - else - vnet_feature_enable_disable ("ip4-local", - "nat44-hairpinning", - sw_if_index, 1, 0, 0); - } - } - else - { + if (!sm->endpoint_dependent) + vnet_feature_enable_disable ("ip4-local", + "nat44-hairpinning", + sw_if_index, 1, 0, 0); + } + } + else + { int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0); if (rv) return rv; @@ -2005,41 +1937,38 @@ feature_set: pool_put (sm->interfaces, i); if (is_inside) { - if (sm->endpoint_dependent) - vnet_feature_enable_disable ("ip4-local", - "nat44-ed-hairpinning", - sw_if_index, 0, 0, 0); - else - vnet_feature_enable_disable ("ip4-local", - "nat44-hairpinning", - sw_if_index, 0, 0, 0); - } - } - } - else - { - if ((nat_interface_is_inside(i) && is_inside) || - (nat_interface_is_outside(i) && !is_inside)) - return 0; + if (!sm->endpoint_dependent) + vnet_feature_enable_disable ("ip4-local", + "nat44-hairpinning", + sw_if_index, 0, 0, 0); + } + } + } + else + { + if ((nat_interface_is_inside (i) && is_inside) || + (nat_interface_is_outside (i) && !is_inside)) + return 0; - if (sm->num_workers > 1) - { - del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" : - "nat44-out2in-worker-handoff"; - feature_name = "nat44-handoff-classify"; - } - else if (sm->endpoint_dependent) - { - del_feature_name = !is_inside ? "nat-pre-in2out" : - "nat-pre-out2in"; + if (sm->num_workers > 1) + { + del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" : + "nat44-out2in-worker-handoff"; + feature_name = "nat44-handoff-classify"; + } + else if (sm->endpoint_dependent) + { + del_feature_name = + !is_inside ? "nat-pre-in2out" : "nat-pre-out2in"; - feature_name = "nat44-ed-classify"; - } - else - { - del_feature_name = !is_inside ? "nat44-in2out" : "nat44-out2in"; - feature_name = "nat44-classify"; - } + feature_name = "nat44-ed-classify"; + } + else + { + del_feature_name = + !is_inside ? "nat44-in2out" : "nat44-out2in"; + feature_name = "nat44-classify"; + } int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1); if (rv) @@ -2050,17 +1979,14 @@ feature_set: sw_if_index, 1, 0, 0); if (!is_inside) { - if (sm->endpoint_dependent) - vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning", - sw_if_index, 0, 0, 0); - else - vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning", - sw_if_index, 0, 0, 0); - } - goto set_flags; - } + if (!sm->endpoint_dependent) + vnet_feature_enable_disable ( + "ip4-local", "nat44-hairpinning", sw_if_index, 0, 0, 0); + } + goto set_flags; + } - goto fib; + goto fib; } } /* *INDENT-ON* */ @@ -2085,10 +2011,7 @@ feature_set: if (is_inside && !sm->out2in_dpo) { - if (sm->endpoint_dependent) - vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning", - sw_if_index, 1, 0, 0); - else + if (!sm->endpoint_dependent) vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning", sw_if_index, 1, 0, 0); } @@ -2199,10 +2122,6 @@ feature_set: !is_del); if (rv) return rv; - vnet_feature_enable_disable ("ip4-unicast", "nat44-ed-hairpin-dst", - sw_if_index, !is_del, 0, 0); - vnet_feature_enable_disable ("ip4-output", "nat44-ed-hairpin-src", - sw_if_index, !is_del, 0, 0); } else { @@ -2575,12 +2494,6 @@ nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm) sm->hairpin_dst_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-src"); sm->hairpin_src_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpinning"); - sm->ed_hairpinning_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-dst"); - sm->ed_hairpin_dst_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-src"); - sm->ed_hairpin_src_node_index = node->index; } #define nat_init_simple_counter(c, n, sn) \ @@ -2778,8 +2691,8 @@ nat44_ed_plugin_enable (nat44_config_t c) if (sm->pat) { - sm->icmp_match_in2out_cb = icmp_match_in2out_ed; - sm->icmp_match_out2in_cb = icmp_match_out2in_ed; + sm->icmp_match_in2out_cb = NULL; + sm->icmp_match_out2in_cb = NULL; } else { @@ -2907,8 +2820,6 @@ nat44_ed_plugin_disable () vec_free (sm->max_translations_per_fib); - nat_affinity_disable (); - nat44_ed_db_free (); nat44_addresses_free (&sm->addresses); @@ -3258,8 +3169,8 @@ nat44_ed_get_worker_in2out_cb (ip4_header_t *ip, u32 rx_fib_index, init_ed_k (&kv16, ip->src_address, udp->src_port, ip->dst_address, udp->dst_port, fib_index, ip->protocol); - if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed, - &kv16, &value16))) + if (PREDICT_TRUE ( + !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))) { tsm = vec_elt_at_index (sm->per_thread_data, @@ -3327,13 +3238,13 @@ nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip, init_ed_k (&kv16, ip->dst_address, udp->dst_port, ip->src_address, udp->src_port, rx_fib_index, ip->protocol); - if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed, - &kv16, &value16))) + if (PREDICT_TRUE ( + !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))) { tsm = vec_elt_at_index (sm->per_thread_data, ed_value_get_thread_index (&value16)); - vnet_buffer2 (b)->nat.ed_out2in_nat_session_index = + vnet_buffer2 (b)->nat.cached_session_index = ed_value_get_session_index (&value16); next_worker_index = sm->first_worker_index + tsm->thread_index; nat_elog_debug_handoff ("HANDOFF OUT2IN (session)", @@ -3347,10 +3258,17 @@ nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip, } else if (proto == NAT_PROTOCOL_ICMP) { - if (!get_icmp_o2i_ed_key (b, ip, rx_fib_index, ~0, ~0, 0, 0, 0, &kv16)) + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + if (!nat_get_icmp_session_lookup_values ( + b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport, + &lookup_protocol)) { - if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed, - &kv16, &value16))) + init_ed_k (&kv16, lookup_saddr, lookup_sport, lookup_daddr, + lookup_dport, rx_fib_index, lookup_protocol); + if (PREDICT_TRUE ( + !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))) { tsm = vec_elt_at_index (sm->per_thread_data, @@ -3558,9 +3476,6 @@ nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations, tsm->unk_proto_lru_head_index = head - tsm->lru_pool; clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index); - clib_bihash_init_16_8 (&tsm->in2out_ed, "in2out-ed", translation_buckets, 0); - clib_bihash_set_kvp_format_fn_16_8 (&tsm->in2out_ed, format_ed_session_kvp); - // TODO: ED nat is not using these // before removal large refactor required pool_alloc (tsm->list_pool, translations); @@ -3569,6 +3484,17 @@ nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations, } static void +reinit_ed_flow_hash () +{ + snat_main_t *sm = &snat_main; + // we expect 2 flows per session, so multiply translation_buckets by 2 + clib_bihash_init_16_8 ( + &sm->flow_hash, "ed-flow-hash", + clib_max (1, sm->num_workers) * 2 * sm->translation_buckets, 0); + clib_bihash_set_kvp_format_fn_16_8 (&sm->flow_hash, format_ed_session_kvp); +} + +static void nat44_ed_db_init (u32 translations, u32 translation_buckets, u32 user_buckets) { snat_main_t *sm = &snat_main; @@ -3576,8 +3502,7 @@ nat44_ed_db_init (u32 translations, u32 translation_buckets, u32 user_buckets) u32 static_mapping_buckets = 1024; u32 static_mapping_memory_size = 64 << 20; - clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed", translation_buckets, 0); - clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed, format_ed_session_kvp); + reinit_ed_flow_hash (); clib_bihash_init_8_8 (&sm->static_mapping_by_local, "static_mapping_by_local", static_mapping_buckets, @@ -3607,7 +3532,6 @@ nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm) pool_free (tsm->sessions); pool_free (tsm->lru_pool); - clib_bihash_free_16_8 (&tsm->in2out_ed); vec_free (tsm->per_vrf_sessions_vec); // TODO: resolve static mappings (put only to !ED) @@ -3623,7 +3547,7 @@ nat44_ed_db_free () snat_main_per_thread_data_t *tsm; pool_free (sm->static_mappings); - clib_bihash_free_16_8 (&sm->out2in_ed); + clib_bihash_free_16_8 (&sm->flow_hash); clib_bihash_free_8_8 (&sm->static_mapping_by_local); clib_bihash_free_8_8 (&sm->static_mapping_by_external); @@ -3642,11 +3566,7 @@ nat44_ed_sessions_clear () snat_main_t *sm = &snat_main; snat_main_per_thread_data_t *tsm; - clib_bihash_free_16_8 (&sm->out2in_ed); - clib_bihash_init_16_8 ( - &sm->out2in_ed, "out2in-ed", - clib_max (1, sm->num_workers) * sm->translation_buckets, 0); - clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed, format_ed_session_kvp); + reinit_ed_flow_hash (); if (sm->pat) { @@ -3896,7 +3816,6 @@ nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port, u32 vrf_id, int is_in) { ip4_header_t ip; - clib_bihash_16_8_t *t; clib_bihash_kv_16_8_t kv, value; u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id); snat_session_t *s; @@ -3913,16 +3832,15 @@ nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port, else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); - t = is_in ? &tsm->in2out_ed : &sm->out2in_ed; init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto); - if (clib_bihash_search_16_8 (t, &kv, &value)) + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { return VNET_API_ERROR_NO_SUCH_ENTRY; } - if (pool_is_free_index (tsm->sessions, value.value)) + if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value))) return VNET_API_ERROR_UNSPECIFIED; - s = pool_elt_at_index (tsm->sessions, value.value); + s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0); nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1); return 0; @@ -3952,13 +3870,343 @@ VLIB_REGISTER_NODE (nat_default_node) = { [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath", [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in", [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath", - [NAT_NEXT_OUT2IN_ED_HANDOFF] = "nat44-ed-out2in-handoff", [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff", [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff", }, }; /* *INDENT-ON* */ +void +nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f) +{ + f->l3_csum_delta = 0; + f->l4_csum_delta = 0; + if (f->ops & NAT_FLOW_OP_SADDR_REWRITE && + f->rewrite.saddr.as_u32 != f->match.saddr.as_u32) + { + f->l3_csum_delta = + ip_csum_add_even (f->l3_csum_delta, f->rewrite.saddr.as_u32); + f->l3_csum_delta = + ip_csum_sub_even (f->l3_csum_delta, f->match.saddr.as_u32); + } + else + { + f->rewrite.saddr.as_u32 = f->match.saddr.as_u32; + } + if (f->ops & NAT_FLOW_OP_DADDR_REWRITE && + f->rewrite.daddr.as_u32 != f->match.daddr.as_u32) + { + f->l3_csum_delta = + ip_csum_add_even (f->l3_csum_delta, f->rewrite.daddr.as_u32); + f->l3_csum_delta = + ip_csum_sub_even (f->l3_csum_delta, f->match.daddr.as_u32); + } + else + { + f->rewrite.daddr.as_u32 = f->match.daddr.as_u32; + } + if (f->ops & NAT_FLOW_OP_SPORT_REWRITE && f->rewrite.sport != f->match.sport) + { + f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.sport); + f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport); + } + else + { + f->rewrite.sport = f->match.sport; + } + if (f->ops & NAT_FLOW_OP_DPORT_REWRITE && f->rewrite.dport != f->match.dport) + { + f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.dport); + f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.dport); + } + else + { + f->rewrite.dport = f->match.dport; + } + if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE && + f->rewrite.icmp_id != f->match.icmp_id) + { + f->l4_csum_delta = + ip_csum_add_even (f->l4_csum_delta, f->rewrite.icmp_id); + f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.icmp_id); + } + else + { + f->rewrite.icmp_id = f->match.icmp_id; + } + if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE) + { + } + else + { + f->rewrite.fib_index = f->match.fib_index; + } +} + +static_always_inline int nat_6t_flow_icmp_translate (snat_main_t *sm, + vlib_buffer_t *b, + ip4_header_t *ip, + nat_6t_flow_t *f); + +static_always_inline void +nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + nat_6t_flow_t *f, nat_protocol_t proto, + int is_icmp_inner_ip4) +{ + udp_header_t *udp = ip4_next_header (ip); + tcp_header_t *tcp = (tcp_header_t *) udp; + + if ((NAT_PROTOCOL_TCP == proto || NAT_PROTOCOL_UDP == proto) && + !vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + if (!is_icmp_inner_ip4) + { // regular case + ip->src_address = f->rewrite.saddr; + ip->dst_address = f->rewrite.daddr; + udp->src_port = f->rewrite.sport; + udp->dst_port = f->rewrite.dport; + } + else + { // icmp inner ip4 - reversed saddr/daddr + ip->src_address = f->rewrite.daddr; + ip->dst_address = f->rewrite.saddr; + udp->src_port = f->rewrite.dport; + udp->dst_port = f->rewrite.sport; + } + + if (NAT_PROTOCOL_TCP == proto) + { + ip_csum_t tcp_sum = tcp->checksum; + tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta); + tcp_sum = ip_csum_sub_even (tcp_sum, f->l4_csum_delta); + mss_clamping (sm->mss_clamping, tcp, &tcp_sum); + tcp->checksum = ip_csum_fold (tcp_sum); + } + else if (proto == NAT_PROTOCOL_UDP && udp->checksum) + { + ip_csum_t udp_sum = udp->checksum; + udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta); + udp_sum = ip_csum_sub_even (udp_sum, f->l4_csum_delta); + udp->checksum = ip_csum_fold (udp_sum); + } + } + else + { + if (!is_icmp_inner_ip4) + { // regular case + ip->src_address = f->rewrite.saddr; + ip->dst_address = f->rewrite.daddr; + } + else + { // icmp inner ip4 - reversed saddr/daddr + ip->src_address = f->rewrite.daddr; + ip->dst_address = f->rewrite.saddr; + } + } + + ip_csum_t ip_sum = ip->checksum; + ip_sum = ip_csum_sub_even (ip_sum, f->l3_csum_delta); + ip->checksum = ip_csum_fold (ip_sum); + ASSERT (ip->checksum == ip4_header_checksum (ip)); +} + +static_always_inline int +nat_6t_flow_icmp_translate (snat_main_t *sm, vlib_buffer_t *b, + ip4_header_t *ip, nat_6t_flow_t *f) +{ + if (IP_PROTOCOL_ICMP != ip->protocol) + return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED; + + icmp46_header_t *icmp = ip4_next_header (ip); + icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); + + if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment)) + { + if (icmp->checksum == 0) + icmp->checksum = 0xffff; + + if (!icmp_type_is_error_message (icmp->type)) + { + if ((f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) && + (f->rewrite.icmp_id != echo->identifier)) + { + ip_csum_t sum = icmp->checksum; + sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id, + icmp_echo_header_t, + identifier /* changed member */); + echo->identifier = f->rewrite.icmp_id; + icmp->checksum = ip_csum_fold (sum); + } + } + else + { + // errors are not fragmented + ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); + + if (!ip4_header_checksum_is_valid (inner_ip)) + { + return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED; + } + + nat_protocol_t inner_proto = + ip_proto_to_nat_proto (inner_ip->protocol); + + ip_csum_t icmp_sum = icmp->checksum; + + switch (inner_proto) + { + case NAT_PROTOCOL_UDP: + case NAT_PROTOCOL_TCP: + nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto, + 1 /* is_icmp_inner_ip4 */); + icmp_sum = ip_csum_sub_even (icmp_sum, f->l3_csum_delta); + icmp->checksum = ip_csum_fold (icmp_sum); + break; + case NAT_PROTOCOL_ICMP: + if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) + { + icmp46_header_t *inner_icmp = ip4_next_header (inner_ip); + icmp_echo_header_t *inner_echo = + (icmp_echo_header_t *) (inner_icmp + 1); + if (f->rewrite.icmp_id != inner_echo->identifier) + { + ip_csum_t sum = icmp->checksum; + sum = ip_csum_update ( + sum, inner_echo->identifier, f->rewrite.icmp_id, + icmp_echo_header_t, identifier /* changed member */); + icmp->checksum = ip_csum_fold (sum); + ip_csum_t inner_sum = inner_icmp->checksum; + inner_sum = ip_csum_update ( + sum, inner_echo->identifier, f->rewrite.icmp_id, + icmp_echo_header_t, identifier /* changed member */); + inner_icmp->checksum = ip_csum_fold (inner_sum); + inner_echo->identifier = f->rewrite.icmp_id; + } + } + break; + default: + clib_warning ("unexpected NAT protocol value `%d'", inner_proto); + return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED; + } + } + } + return NAT_ED_TRNSL_ERR_SUCCESS; +} + +nat_translation_error_e +nat_6t_flow_buf_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + nat_6t_flow_t *f, nat_protocol_t proto, + int is_output_feature) +{ + if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE) + { + vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index; + } + + nat_6t_flow_ip4_translate (sm, b, ip, f, proto, 0 /* is_icmp_inner_ip4 */); + + if (NAT_PROTOCOL_ICMP == proto) + { + return nat_6t_flow_icmp_translate (sm, b, ip, f); + } + + return NAT_ED_TRNSL_ERR_SUCCESS; +} + +u8 * +format_nat_6t (u8 *s, va_list *args) +{ + nat_6t_t *t = va_arg (*args, nat_6t_t *); + + s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u", + format_ip4_address, t->saddr.as_u8, + clib_net_to_host_u16 (t->sport), format_ip4_address, + t->daddr.as_u8, clib_net_to_host_u16 (t->dport), + format_ip_protocol, t->proto, t->fib_index); + return s; +} + +u8 * +format_nat_ed_translation_error (u8 *s, va_list *args) +{ + nat_translation_error_e e = va_arg (*args, nat_translation_error_e); + + switch (e) + { + case NAT_ED_TRNSL_ERR_SUCCESS: + s = format (s, "success"); + break; + case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED: + s = format (s, "translation-failed"); + break; + case NAT_ED_TRNSL_ERR_FLOW_MISMATCH: + s = format (s, "flow-mismatch"); + break; + } + return s; +} + +u8 * +format_nat_6t_flow (u8 *s, va_list *args) +{ + nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *); + + s = format (s, "match: %U ", format_nat_6t, &f->match); + int r = 0; + if (f->ops & NAT_FLOW_OP_SADDR_REWRITE) + { + s = format (s, "rewrite: saddr %U ", format_ip4_address, + f->rewrite.saddr.as_u8); + r = 1; + } + if (f->ops & NAT_FLOW_OP_SPORT_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport)); + } + if (f->ops & NAT_FLOW_OP_DADDR_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8); + } + if (f->ops & NAT_FLOW_OP_DPORT_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport)); + } + if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id)); + } + if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "txfib %u ", f->rewrite.fib_index); + } + return s; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index 58883d491aa..7fa1ef79c3d 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -88,7 +88,6 @@ typedef enum NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH, NAT_NEXT_OUT2IN_ED_FAST_PATH, NAT_NEXT_OUT2IN_ED_SLOW_PATH, - NAT_NEXT_OUT2IN_ED_HANDOFF, NAT_NEXT_IN2OUT_CLASSIFY, NAT_NEXT_OUT2IN_CLASSIFY, NAT_N_NEXT, @@ -163,29 +162,17 @@ typedef enum NAT_IN2OUT_ED_N_ERROR, } nat_in2out_ed_error_t; -#define foreach_nat44_handoff_error \ -_(CONGESTION_DROP, "congestion drop") \ -_(SAME_WORKER, "same worker") \ -_(DO_HANDOFF, "do handoff") - -typedef enum -{ -#define _(sym,str) NAT44_HANDOFF_ERROR_##sym, - foreach_nat44_handoff_error -#undef _ - NAT44_HANDOFF_N_ERROR, -} nat44_handoff_error_t; - -#define foreach_nat_out2in_ed_error \ -_(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ -_(OUT_OF_PORTS, "out of ports") \ -_(BAD_ICMP_TYPE, "unsupported ICMP type") \ -_(NO_TRANSLATION, "no translation") \ -_(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \ -_(MAX_USER_SESS_EXCEEDED, "max user sessions exceeded") \ -_(CANNOT_CREATE_USER, "cannot create NAT user") \ -_(NON_SYN, "non-SYN packet try to create session") \ -_(TCP_CLOSED, "drops due to TCP in transitory timeout") +#define foreach_nat_out2in_ed_error \ + _ (UNSUPPORTED_PROTOCOL, "unsupported protocol") \ + _ (OUT_OF_PORTS, "out of ports") \ + _ (BAD_ICMP_TYPE, "unsupported ICMP type") \ + _ (NO_TRANSLATION, "no translation") \ + _ (MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \ + _ (MAX_USER_SESS_EXCEEDED, "max user sessions exceeded") \ + _ (CANNOT_CREATE_USER, "cannot create NAT user") \ + _ (NON_SYN, "non-SYN packet try to create session") \ + _ (TCP_CLOSED, "drops due to TCP in transitory timeout") \ + _ (HASH_ADD_FAILED, "hash table add failed") typedef enum { @@ -206,14 +193,15 @@ typedef enum #define NAT44_SES_RST 64 /* Session flags */ -#define SNAT_SESSION_FLAG_STATIC_MAPPING 1 -#define SNAT_SESSION_FLAG_UNKNOWN_PROTO 2 -#define SNAT_SESSION_FLAG_LOAD_BALANCING 4 -#define SNAT_SESSION_FLAG_TWICE_NAT 8 -#define SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT 16 -#define SNAT_SESSION_FLAG_FWD_BYPASS 32 -#define SNAT_SESSION_FLAG_AFFINITY 64 -#define SNAT_SESSION_FLAG_EXACT_ADDRESS 128 +#define SNAT_SESSION_FLAG_STATIC_MAPPING (1 << 0) +#define SNAT_SESSION_FLAG_UNKNOWN_PROTO (1 << 1) +#define SNAT_SESSION_FLAG_LOAD_BALANCING (1 << 2) +#define SNAT_SESSION_FLAG_TWICE_NAT (1 << 3) +#define SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT (1 << 4) +#define SNAT_SESSION_FLAG_FWD_BYPASS (1 << 5) +#define SNAT_SESSION_FLAG_AFFINITY (1 << 6) +#define SNAT_SESSION_FLAG_EXACT_ADDRESS (1 << 7) +#define SNAT_SESSION_FLAG_HAIRPINNING (1 << 8) /* NAT interface flags */ #define NAT_INTERFACE_FLAG_IS_INSIDE 1 @@ -240,6 +228,72 @@ typedef CLIB_PACKED(struct }) per_vrf_sessions_t; /* *INDENT-ON* */ +typedef struct +{ + ip4_address_t saddr, daddr; + u32 fib_index; + u16 sport, dport; + u16 icmp_id; + u8 proto; +} nat_6t_t; + +typedef struct +{ +#define NAT_FLOW_OP_SADDR_REWRITE (1 << 1) +#define NAT_FLOW_OP_SPORT_REWRITE (1 << 2) +#define NAT_FLOW_OP_DADDR_REWRITE (1 << 3) +#define NAT_FLOW_OP_DPORT_REWRITE (1 << 4) +#define NAT_FLOW_OP_ICMP_ID_REWRITE (1 << 5) +#define NAT_FLOW_OP_TXFIB_REWRITE (1 << 6) + int ops; + nat_6t_t match; + nat_6t_t rewrite; + uword l3_csum_delta; + uword l4_csum_delta; +} nat_6t_flow_t; + +always_inline void +nat_6t_flow_saddr_rewrite_set (nat_6t_flow_t *f, u32 saddr) +{ + f->ops |= NAT_FLOW_OP_SADDR_REWRITE; + f->rewrite.saddr.as_u32 = saddr; +} + +always_inline void +nat_6t_flow_daddr_rewrite_set (nat_6t_flow_t *f, u32 daddr) +{ + f->ops |= NAT_FLOW_OP_DADDR_REWRITE; + f->rewrite.daddr.as_u32 = daddr; +} + +always_inline void +nat_6t_flow_sport_rewrite_set (nat_6t_flow_t *f, u32 sport) +{ + f->ops |= NAT_FLOW_OP_SPORT_REWRITE; + f->rewrite.sport = sport; +} + +always_inline void +nat_6t_flow_dport_rewrite_set (nat_6t_flow_t *f, u32 dport) +{ + f->ops |= NAT_FLOW_OP_DPORT_REWRITE; + f->rewrite.dport = dport; +} + +always_inline void +nat_6t_flow_txfib_rewrite_set (nat_6t_flow_t *f, u32 tx_fib_index) +{ + f->ops |= NAT_FLOW_OP_TXFIB_REWRITE; + f->rewrite.fib_index = tx_fib_index; +} + +always_inline void +nat_6t_flow_icmp_id_rewrite_set (nat_6t_flow_t *f, u16 id) +{ + f->ops |= NAT_FLOW_OP_ICMP_ID_REWRITE; + f->rewrite.icmp_id = id; +} + /* *INDENT-OFF* */ typedef CLIB_PACKED(struct { @@ -261,6 +315,9 @@ typedef CLIB_PACKED(struct nat_protocol_t nat_proto; + nat_6t_flow_t i2o; + nat_6t_flow_t o2i; + /* Flags */ u32 flags; @@ -439,9 +496,6 @@ typedef struct clib_bihash_8_8_t out2in; clib_bihash_8_8_t in2out; - /* Endpoint dependent sessions lookup tables */ - clib_bihash_16_8_t in2out_ed; - /* Find-a-user => src address lookup */ clib_bihash_8_8_t user_hash; @@ -536,8 +590,8 @@ typedef struct snat_main_s /* Static mapping pool */ snat_static_mapping_t *static_mappings; - /* Endpoint-dependent out2in mappings */ - clib_bihash_16_8_t out2in_ed; + /* Endpoint dependent lookup table */ + clib_bihash_16_8_t flow_hash; /* Interface pool */ snat_interface_t *interfaces; @@ -616,9 +670,6 @@ typedef struct snat_main_s u32 hairpinning_node_index; u32 hairpin_dst_node_index; u32 hairpin_src_node_index; - u32 ed_hairpinning_node_index; - u32 ed_hairpin_dst_node_index; - u32 ed_hairpin_src_node_index; nat44_config_t rconfig; //nat44_config_t cconfig; @@ -1103,18 +1154,6 @@ u32 icmp_match_out2in_slow (snat_main_t * sm, vlib_node_runtime_t * node, nat_protocol_t * proto, void *d, void *e, u8 * dont_translate); -/* ICMP endpoint-dependent session match functions */ -u32 icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node, - u32 thread_index, vlib_buffer_t * b0, - ip4_header_t * ip0, ip4_address_t * addr, - u16 * port, u32 * fib_index, nat_protocol_t * proto, - void *d, void *e, u8 * dont_translate); -u32 icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node, - u32 thread_index, vlib_buffer_t * b0, - ip4_header_t * ip0, ip4_address_t * addr, - u16 * port, u32 * fib_index, nat_protocol_t * proto, - void *d, void *e, u8 * dont_translate); - u32 icmp_in2out (snat_main_t * sm, vlib_buffer_t * b0, ip4_header_t * ip0, icmp46_header_t * icmp0, u32 sw_if_index0, u32 rx_fib_index0, vlib_node_runtime_t * node, u32 next0, u32 thread_index, @@ -1126,22 +1165,17 @@ u32 icmp_out2in (snat_main_t * sm, vlib_buffer_t * b0, ip4_header_t * ip0, void *d, void *e); /* hairpinning functions */ -u32 snat_icmp_hairpinning (snat_main_t * sm, vlib_buffer_t * b0, - ip4_header_t * ip0, icmp46_header_t * icmp0, - int is_ed); +u32 snat_icmp_hairpinning (snat_main_t *sm, vlib_buffer_t *b0, + ip4_header_t *ip0, icmp46_header_t *icmp0); + void nat_hairpinning_sm_unknown_proto (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip); -void nat44_ed_hairpinning_unknown_proto (snat_main_t * sm, vlib_buffer_t * b, - ip4_header_t * ip); -int snat_hairpinning (vlib_main_t * vm, vlib_node_runtime_t * node, - snat_main_t * sm, vlib_buffer_t * b0, - ip4_header_t * ip0, udp_header_t * udp0, - tcp_header_t * tcp0, u32 proto0, int is_ed, +int snat_hairpinning (vlib_main_t *vm, vlib_node_runtime_t *node, + snat_main_t *sm, vlib_buffer_t *b0, ip4_header_t *ip0, + udp_header_t *udp0, tcp_header_t *tcp0, u32 proto0, int do_trace); /* Call back functions for clib_bihash_add_or_overwrite_stale */ -int nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg); -int nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg); int nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg); int nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg); @@ -1496,6 +1530,24 @@ u32 nat_calc_bihash_buckets (u32 n_elts); void nat44_addresses_free (snat_address_t **addresses); +typedef enum +{ + NAT_ED_TRNSL_ERR_SUCCESS = 0, + NAT_ED_TRNSL_ERR_TRANSLATION_FAILED = 1, + NAT_ED_TRNSL_ERR_FLOW_MISMATCH = 2, +} nat_translation_error_e; + +nat_translation_error_e +nat_6t_flow_buf_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + nat_6t_flow_t *f, nat_protocol_t proto, + int is_output_feature); + +void nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f); + +format_function_t format_nat_ed_translation_error; +format_function_t format_nat_6t_flow; +format_function_t format_ed_session_kvp; + #endif /* __included_nat_h__ */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/nat/nat44-ei/nat44_ei_ha.h b/src/plugins/nat/nat44-ei/nat44_ei_ha.h index c466d4c9288..5639c8d0239 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei_ha.h +++ b/src/plugins/nat/nat44-ei/nat44_ei_ha.h @@ -22,6 +22,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> +#include <nat/nat.h> /* Call back functions for received HA events on passive/failover */ typedef void (*nat_ha_sadd_cb_t) (ip4_address_t * in_addr, u16 in_port, @@ -30,6 +31,7 @@ typedef void (*nat_ha_sadd_cb_t) (ip4_address_t * in_addr, u16 in_port, ip4_address_t * ehn_addr, u16 ehn_port, u8 proto, u32 fib_index, u16 flags, u32 thread_index); + typedef void (*nat_ha_sdel_cb_t) (ip4_address_t * out_addr, u16 out_port, ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index, u32 thread_index); diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c index 892518fff97..303c588d34e 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c +++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c @@ -785,8 +785,7 @@ icmp_in2out (snat_main_t * sm, if (vnet_buffer (b0)->sw_if_index[VLIB_TX] == ~0) { - if (0 != snat_icmp_hairpinning (sm, b0, ip0, icmp0, - sm->endpoint_dependent)) + if (0 != snat_icmp_hairpinning (sm, b0, ip0, icmp0)) vnet_buffer (b0)->sw_if_index[VLIB_TX] = fib_index; } @@ -1904,9 +1903,8 @@ VLIB_NODE_FN (snat_in2out_fast_node) (vlib_main_t * vm, } /* Hairpinning */ - is_hairpinning = - snat_hairpinning (vm, node, sm, b0, ip0, udp0, tcp0, proto0, 0, - 0 /* do_trace */ ); + is_hairpinning = snat_hairpinning (vm, node, sm, b0, ip0, udp0, tcp0, + proto0, 0 /* do_trace */); trace0: if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) diff --git a/src/plugins/nat/nat44/ed_inlines.h b/src/plugins/nat/nat44/ed_inlines.h index 1b4df4d02fd..87de25e990b 100644 --- a/src/plugins/nat/nat44/ed_inlines.h +++ b/src/plugins/nat/nat44/ed_inlines.h @@ -51,6 +51,60 @@ nat_ed_lru_insert (snat_main_per_thread_data_t * tsm, return 1; } +static_always_inline void +nat_6t_flow_to_ed_k (clib_bihash_kv_16_8_t *kv, nat_6t_flow_t *f) +{ + init_ed_k (kv, f->match.saddr, f->match.sport, f->match.daddr, + f->match.dport, f->match.fib_index, f->match.proto); +} + +static_always_inline void +nat_6t_flow_to_ed_kv (clib_bihash_kv_16_8_t *kv, nat_6t_flow_t *f, + u32 thread_idx, u32 session_idx) +{ + init_ed_kv (kv, f->match.saddr, f->match.sport, f->match.daddr, + f->match.dport, f->match.fib_index, f->match.proto, thread_idx, + session_idx); +} + +static_always_inline int +nat_ed_ses_i2o_flow_hash_add_del (snat_main_t *sm, u32 thread_idx, + snat_session_t *s, int is_add) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_idx); + clib_bihash_kv_16_8_t kv; + if (0 == is_add) + { + nat_6t_flow_to_ed_k (&kv, &s->i2o); + } + else + { + nat_6t_flow_to_ed_kv (&kv, &s->i2o, thread_idx, s - tsm->sessions); + nat_6t_l3_l4_csum_calc (&s->i2o); + } + return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, is_add); +} + +static_always_inline int +nat_ed_ses_o2i_flow_hash_add_del (snat_main_t *sm, u32 thread_idx, + snat_session_t *s, int is_add) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_idx); + clib_bihash_kv_16_8_t kv; + if (0 == is_add) + { + nat_6t_flow_to_ed_k (&kv, &s->o2i); + } + else + { + nat_6t_flow_to_ed_kv (&kv, &s->o2i, thread_idx, s - tsm->sessions); + nat_6t_l3_l4_csum_calc (&s->o2i); + } + return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, is_add); +} + always_inline void nat_ed_session_delete (snat_main_t * sm, snat_session_t * ses, u32 thread_index, int lru_delete @@ -64,6 +118,10 @@ nat_ed_session_delete (snat_main_t * sm, snat_session_t * ses, clib_dlist_remove (tsm->lru_pool, ses->lru_index); } pool_put_index (tsm->lru_pool, ses->lru_index); + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, ses, 0)) + nat_elog_warn ("flow hash del failed"); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, ses, 0)) + nat_elog_warn ("flow hash del failed"); pool_put (tsm->sessions, ses); vlib_set_simple_counter (&sm->total_sessions, thread_index, 0, pool_elts (tsm->sessions)); @@ -225,10 +283,10 @@ per_vrf_sessions_unregister_session (snat_session_t * s, u32 thread_index) per_vrf_sessions_t *per_vrf_sessions; ASSERT (s->per_vrf_sessions_index != ~0); - + tsm = vec_elt_at_index (sm->per_thread_data, thread_index); - per_vrf_sessions = vec_elt_at_index (tsm->per_vrf_sessions_vec, - s->per_vrf_sessions_index); + per_vrf_sessions = + vec_elt_at_index (tsm->per_vrf_sessions_vec, s->per_vrf_sessions_index); ASSERT (per_vrf_sessions->ses_count != 0); @@ -247,9 +305,57 @@ per_vrf_sessions_is_expired (snat_session_t * s, u32 thread_index) ASSERT (s->per_vrf_sessions_index != ~0); tsm = vec_elt_at_index (sm->per_thread_data, thread_index); - per_vrf_sessions = vec_elt_at_index (tsm->per_vrf_sessions_vec, - s->per_vrf_sessions_index); + per_vrf_sessions = + vec_elt_at_index (tsm->per_vrf_sessions_vec, s->per_vrf_sessions_index); return per_vrf_sessions->expired; } +static_always_inline void +nat_6t_flow_init (nat_6t_flow_t *f, u32 thread_idx, ip4_address_t saddr, + u16 sport, ip4_address_t daddr, u16 dport, u32 fib_index, + u8 proto, u32 session_idx) +{ + clib_memset (f, 0, sizeof (*f)); + f->match.saddr = saddr; + f->match.sport = sport; + f->match.daddr = daddr; + f->match.dport = dport; + f->match.proto = proto; + f->match.fib_index = fib_index; +} + +static_always_inline void +nat_6t_i2o_flow_init (snat_main_t *sm, u32 thread_idx, snat_session_t *s, + ip4_address_t saddr, u16 sport, ip4_address_t daddr, + u16 dport, u32 fib_index, u8 proto) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_idx); + nat_6t_flow_init (&s->i2o, thread_idx, saddr, sport, daddr, dport, fib_index, + proto, s - tsm->sessions); +} + +static_always_inline void +nat_6t_o2i_flow_init (snat_main_t *sm, u32 thread_idx, snat_session_t *s, + ip4_address_t saddr, u16 sport, ip4_address_t daddr, + u16 dport, u32 fib_index, u8 proto) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_idx); + nat_6t_flow_init (&s->o2i, thread_idx, saddr, sport, daddr, dport, fib_index, + proto, s - tsm->sessions); +} + +static_always_inline int +nat_6t_flow_match (nat_6t_flow_t *f, vlib_buffer_t *b, ip4_address_t saddr, + u16 sport, ip4_address_t daddr, u16 dport, u8 protocol, + u32 fib_index) +{ + return f->match.daddr.as_u32 == daddr.as_u32 && + f->match.dport == vnet_buffer (b)->ip.reass.l4_dst_port && + f->match.proto == protocol && f->match.fib_index == fib_index && + f->match.saddr.as_u32 == saddr.as_u32 && + f->match.sport == vnet_buffer (b)->ip.reass.l4_src_port; +} + #endif diff --git a/src/plugins/nat/nat44_classify.c b/src/plugins/nat/nat44_classify.c index 6cdb57721aa..85f8c64afd5 100644 --- a/src/plugins/nat/nat44_classify.c +++ b/src/plugins/nat/nat44_classify.c @@ -22,6 +22,7 @@ #include <vnet/fib/ip4_fib.h> #include <nat/nat.h> #include <nat/nat_inlines.h> +#include <nat/nat44/ed_inlines.h> #define foreach_nat44_classify_error \ _(NEXT_IN2OUT, "next in2out") \ @@ -294,8 +295,6 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm, nat44_classify_next_t next_index; snat_main_t *sm = &snat_main; snat_static_mapping_t *m; - u32 thread_index = vm->thread_index; - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; u32 next_in2out = 0, next_out2in = 0; from = vlib_frame_vector_args (frame); @@ -347,9 +346,31 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm, vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, ip0->protocol); /* process whole packet */ - if (!clib_bihash_search_16_8 - (&tsm->in2out_ed, &ed_kv0, &ed_value0)) - goto enqueue0; + if (!clib_bihash_search_16_8 (&sm->flow_hash, &ed_kv0, + &ed_value0)) + { + ASSERT (vm->thread_index == + ed_value_get_thread_index (&ed_value0)); + snat_main_per_thread_data_t *tsm = + &sm->per_thread_data[vm->thread_index]; + snat_session_t *s = pool_elt_at_index ( + tsm->sessions, ed_value_get_session_index (&ed_value0)); + clib_bihash_kv_16_8_t i2o_kv; + nat_6t_flow_to_ed_k (&i2o_kv, &s->i2o); + vnet_buffer2 (b0)->nat.cached_session_index = + ed_value_get_session_index (&ed_value0); + if (i2o_kv.key[0] == ed_kv0.key[0] && + i2o_kv.key[1] == ed_kv0.key[1]) + { + next0 = NAT_NEXT_IN2OUT_ED_FAST_PATH; + } + else + { + next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH; + } + + goto enqueue0; + } /* session doesn't exist so continue in code */ } diff --git a/src/plugins/nat/nat44_cli.c b/src/plugins/nat/nat44_cli.c index adcf324850d..d1a08718ed7 100644 --- a/src/plugins/nat/nat44_cli.c +++ b/src/plugins/nat/nat44_cli.c @@ -300,7 +300,7 @@ nat44_show_hash_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->static_mapping_by_external, verbose); - vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->out2in_ed, verbose); + vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->flow_hash, verbose); vec_foreach_index (i, sm->per_thread_data) { tsm = vec_elt_at_index (sm->per_thread_data, i); @@ -308,7 +308,7 @@ nat44_show_hash_command_fn (vlib_main_t * vm, unformat_input_t * input, i, vlib_worker_threads[i].name); if (sm->endpoint_dependent) { - vlib_cli_output (vm, "%U", format_bihash_16_8, &tsm->in2out_ed, + vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->flow_hash, verbose); } else diff --git a/src/plugins/nat/nat44_hairpinning.c b/src/plugins/nat/nat44_hairpinning.c index 9432f554246..37dfd7827f6 100644 --- a/src/plugins/nat/nat44_hairpinning.c +++ b/src/plugins/nat/nat44_hairpinning.c @@ -94,10 +94,9 @@ is_hairpinning (snat_main_t * sm, ip4_address_t * dst_addr) #ifndef CLIB_MARCH_VARIANT int -snat_hairpinning (vlib_main_t * vm, vlib_node_runtime_t * node, - snat_main_t * sm, vlib_buffer_t * b0, ip4_header_t * ip0, - udp_header_t * udp0, tcp_header_t * tcp0, u32 proto0, - int is_ed, int do_trace) +snat_hairpinning (vlib_main_t *vm, vlib_node_runtime_t *node, snat_main_t *sm, + vlib_buffer_t *b0, ip4_header_t *ip0, udp_header_t *udp0, + tcp_header_t *tcp0, u32 proto0, int do_trace) { snat_session_t *s0 = NULL; clib_bihash_kv_8_8_t kv0, value0; @@ -128,32 +127,17 @@ snat_hairpinning (vlib_main_t * vm, vlib_node_runtime_t * node, else ti = sm->num_workers; - if (is_ed) - { - clib_bihash_kv_16_8_t ed_kv, ed_value; - init_ed_k (&ed_kv, ip0->dst_address, udp0->dst_port, - ip0->src_address, udp0->src_port, sm->outside_fib_index, - ip0->protocol); - rv = clib_bihash_search_16_8 (&sm->out2in_ed, &ed_kv, &ed_value); - ASSERT (ti == ed_value_get_thread_index (&ed_value)); - si = ed_value_get_session_index (&ed_value); - } - else - { - - init_nat_k (&kv0, ip0->dst_address, udp0->dst_port, - sm->outside_fib_index, proto0); - rv = - clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, - &value0); - si = value0.value; - } + init_nat_k (&kv0, ip0->dst_address, udp0->dst_port, + sm->outside_fib_index, proto0); + rv = clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, + &value0); if (rv) { rv = 0; goto trace; } + si = value0.value; s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); new_dst_addr0 = s0->in2out.addr.as_u32; new_dst_port0 = s0->in2out.port; @@ -237,9 +221,8 @@ trace: #ifndef CLIB_MARCH_VARIANT u32 -snat_icmp_hairpinning (snat_main_t * sm, - vlib_buffer_t * b0, - ip4_header_t * ip0, icmp46_header_t * icmp0, int is_ed) +snat_icmp_hairpinning (snat_main_t *sm, vlib_buffer_t *b0, ip4_header_t *ip0, + icmp46_header_t *icmp0) { clib_bihash_kv_8_8_t kv0, value0; u32 old_dst_addr0, new_dst_addr0; @@ -264,26 +247,12 @@ snat_icmp_hairpinning (snat_main_t * sm, if (protocol != NAT_PROTOCOL_TCP && protocol != NAT_PROTOCOL_UDP) return 1; - if (is_ed) - { - clib_bihash_kv_16_8_t ed_kv, ed_value; - init_ed_k (&ed_kv, ip0->dst_address, l4_header->src_port, - ip0->src_address, l4_header->dst_port, - sm->outside_fib_index, inner_ip0->protocol); - if (clib_bihash_search_16_8 (&sm->out2in_ed, &ed_kv, &ed_value)) - return 1; - ASSERT (ti == ed_value_get_thread_index (&ed_value)); - si = ed_value_get_session_index (&ed_value); - } - else - { - init_nat_k (&kv0, ip0->dst_address, l4_header->src_port, - sm->outside_fib_index, protocol); - if (clib_bihash_search_8_8 - (&sm->per_thread_data[ti].out2in, &kv0, &value0)) - return 1; - si = value0.value; - } + init_nat_k (&kv0, ip0->dst_address, l4_header->src_port, + sm->outside_fib_index, protocol); + if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, + &value0)) + return 1; + si = value0.value; s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); new_dst_addr0 = s0->in2out.addr.as_u32; vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; @@ -324,36 +293,29 @@ snat_icmp_hairpinning (snat_main_t * sm, if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv0, &value0)) { - if (!is_ed) + icmp_echo_header_t *echo0 = (icmp_echo_header_t *) (icmp0 + 1); + u16 icmp_id0 = echo0->identifier; + init_nat_k (&kv0, ip0->dst_address, icmp_id0, sm->outside_fib_index, + NAT_PROTOCOL_ICMP); + if (sm->num_workers > 1) + ti = + (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread; + else + ti = sm->num_workers; + int rv = clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, + &kv0, &value0); + if (!rv) { - icmp_echo_header_t *echo0 = (icmp_echo_header_t *) (icmp0 + 1); - u16 icmp_id0 = echo0->identifier; - init_nat_k (&kv0, ip0->dst_address, icmp_id0, - sm->outside_fib_index, NAT_PROTOCOL_ICMP); - if (sm->num_workers > 1) - ti = - (clib_net_to_host_u16 (icmp_id0) - - 1024) / sm->port_per_thread; - else - ti = sm->num_workers; - int rv = - clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, - &value0); - if (!rv) - { - si = value0.value; - s0 = - pool_elt_at_index (sm->per_thread_data[ti].sessions, si); - new_dst_addr0 = s0->in2out.addr.as_u32; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = - s0->in2out.fib_index; - echo0->identifier = s0->in2out.port; - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port, - icmp_echo_header_t, identifier); - icmp0->checksum = ip_csum_fold (sum0); - goto change_addr; - } + si = value0.value; + s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); + new_dst_addr0 = s0->in2out.addr.as_u32; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + echo0->identifier = s0->in2out.port; + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port, + icmp_echo_header_t, identifier); + icmp0->checksum = ip_csum_fold (sum0); + goto change_addr; } return 1; @@ -407,58 +369,9 @@ nat_hairpinning_sm_unknown_proto (snat_main_t * sm, } #endif -#ifndef CLIB_MARCH_VARIANT -void -nat44_ed_hairpinning_unknown_proto (snat_main_t * sm, - vlib_buffer_t * b, ip4_header_t * ip) -{ - u32 old_addr, new_addr = 0, ti = 0; - clib_bihash_kv_8_8_t kv, value; - clib_bihash_kv_16_8_t s_kv, s_value; - snat_static_mapping_t *m; - ip_csum_t sum; - snat_session_t *s; - - if (sm->num_workers > 1) - ti = sm->worker_out2in_cb (b, ip, sm->outside_fib_index, 0); - else - ti = sm->num_workers; - - old_addr = ip->dst_address.as_u32; - init_ed_k (&s_kv, ip->dst_address, 0, ip->src_address, 0, - sm->outside_fib_index, ip->protocol); - if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) - { - init_nat_k (&kv, ip->dst_address, 0, 0, 0); - if (clib_bihash_search_8_8 - (&sm->static_mapping_by_external, &kv, &value)) - return; - - m = pool_elt_at_index (sm->static_mappings, value.value); - if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0) - vnet_buffer (b)->sw_if_index[VLIB_TX] = m->fib_index; - new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32; - } - else - { - ASSERT (ti == ed_value_get_thread_index (&s_value)); - s = - pool_elt_at_index (sm->per_thread_data[ti].sessions, - ed_value_get_session_index (&s_value)); - if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0) - vnet_buffer (b)->sw_if_index[VLIB_TX] = s->in2out.fib_index; - new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32; - } - sum = ip->checksum; - sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); - ip->checksum = ip_csum_fold (sum); -} -#endif - static inline uword -nat44_hairpinning_fn_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int is_ed) +nat44_hairpinning_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) { u32 n_left_from, *from, *to_next; nat_hairpin_next_t next_index; @@ -507,9 +420,8 @@ nat44_hairpinning_fn_inline (vlib_main_t * vm, vnet_get_config_data (&cm->config_main, &b0->current_config_index, &next0, 0); - if (snat_hairpinning - (vm, node, sm, b0, ip0, udp0, tcp0, proto0, is_ed, - 1 /* do_trace */ )) + if (snat_hairpinning (vm, node, sm, b0, ip0, udp0, tcp0, proto0, + 1 /* do_trace */)) next0 = NAT_HAIRPIN_NEXT_LOOKUP; if (next0 != NAT_HAIRPIN_NEXT_DROP) @@ -535,7 +447,7 @@ VLIB_NODE_FN (nat44_hairpinning_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return nat44_hairpinning_fn_inline (vm, node, frame, 0); + return nat44_hairpinning_fn_inline (vm, node, frame); } /* *INDENT-OFF* */ @@ -552,31 +464,9 @@ VLIB_REGISTER_NODE (nat44_hairpinning_node) = { }; /* *INDENT-ON* */ -VLIB_NODE_FN (nat44_ed_hairpinning_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return nat44_hairpinning_fn_inline (vm, node, frame, 1); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_ed_hairpinning_node) = { - .name = "nat44-ed-hairpinning", - .vector_size = sizeof (u32), - .type = VLIB_NODE_TYPE_INTERNAL, - .format_trace = format_nat_hairpin_trace, - .n_next_nodes = NAT_HAIRPIN_N_NEXT, - .next_nodes = { - [NAT_HAIRPIN_NEXT_DROP] = "error-drop", - [NAT_HAIRPIN_NEXT_LOOKUP] = "ip4-lookup", - }, -}; -/* *INDENT-ON* */ - static inline uword -snat_hairpin_dst_fn_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int is_ed) +snat_hairpin_dst_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) { u32 n_left_from, *from, *to_next; nat_hairpin_next_t next_index; @@ -625,20 +515,17 @@ snat_hairpin_dst_fn_inline (vlib_main_t * vm, tcp_header_t *tcp0 = (tcp_header_t *) udp0; snat_hairpinning (vm, node, sm, b0, ip0, udp0, tcp0, proto0, - is_ed, 1 /* do_trace */ ); + 1 /* do_trace */); } else if (proto0 == NAT_PROTOCOL_ICMP) { icmp46_header_t *icmp0 = ip4_next_header (ip0); - snat_icmp_hairpinning (sm, b0, ip0, icmp0, is_ed); + snat_icmp_hairpinning (sm, b0, ip0, icmp0); } else { - if (is_ed) - nat44_ed_hairpinning_unknown_proto (sm, b0, ip0); - else - nat_hairpinning_sm_unknown_proto (sm, b0, ip0); + nat_hairpinning_sm_unknown_proto (sm, b0, ip0); } vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING; @@ -668,7 +555,7 @@ VLIB_NODE_FN (snat_hairpin_dst_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return snat_hairpin_dst_fn_inline (vm, node, frame, 0); + return snat_hairpin_dst_fn_inline (vm, node, frame); } /* *INDENT-OFF* */ @@ -685,31 +572,9 @@ VLIB_REGISTER_NODE (snat_hairpin_dst_node) = { }; /* *INDENT-ON* */ -VLIB_NODE_FN (nat44_ed_hairpin_dst_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return snat_hairpin_dst_fn_inline (vm, node, frame, 1); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_ed_hairpin_dst_node) = { - .name = "nat44-ed-hairpin-dst", - .vector_size = sizeof (u32), - .type = VLIB_NODE_TYPE_INTERNAL, - .format_trace = format_nat_hairpin_trace, - .n_next_nodes = NAT_HAIRPIN_N_NEXT, - .next_nodes = { - [NAT_HAIRPIN_NEXT_DROP] = "error-drop", - [NAT_HAIRPIN_NEXT_LOOKUP] = "ip4-lookup", - }, -}; -/* *INDENT-ON* */ - static inline uword -snat_hairpin_src_fn_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int is_ed) +snat_hairpin_src_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) { u32 n_left_from, *from, *to_next; snat_hairpin_src_next_t next_index; @@ -787,7 +652,7 @@ VLIB_NODE_FN (snat_hairpin_src_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return snat_hairpin_src_fn_inline (vm, node, frame, 0); + return snat_hairpin_src_fn_inline (vm, node, frame); } /* *INDENT-OFF* */ @@ -805,28 +670,6 @@ VLIB_REGISTER_NODE (snat_hairpin_src_node) = { }; /* *INDENT-ON* */ -VLIB_NODE_FN (nat44_ed_hairpin_src_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return snat_hairpin_src_fn_inline (vm, node, frame, 1); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_ed_hairpin_src_node) = { - .name = "nat44-ed-hairpin-src", - .vector_size = sizeof (u32), - .type = VLIB_NODE_TYPE_INTERNAL, - .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT, - .next_nodes = { - [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop", - [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-ed-in2out-output", - [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output", - [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff", - }, -}; -/* *INDENT-ON* */ - /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/nat/nat44_handoff.c b/src/plugins/nat/nat44_handoff.c index 651c8d1f4ac..8c1b967c020 100644 --- a/src/plugins/nat/nat44_handoff.c +++ b/src/plugins/nat/nat44_handoff.c @@ -33,6 +33,19 @@ typedef struct u8 output; } nat44_handoff_trace_t; +#define foreach_nat44_handoff_error \ + _ (CONGESTION_DROP, "congestion drop") \ + _ (SAME_WORKER, "same worker") \ + _ (DO_HANDOFF, "do handoff") + +typedef enum +{ +#define _(sym, str) NAT44_HANDOFF_ERROR_##sym, + foreach_nat44_handoff_error +#undef _ + NAT44_HANDOFF_N_ERROR, +} nat44_handoff_error_t; + static char *nat44_handoff_error_strings[] = { #define _(sym,string) string, foreach_nat44_handoff_error diff --git a/src/plugins/nat/nat_format.c b/src/plugins/nat/nat_format.c index 90faeb96e9b..2fbd7498f49 100644 --- a/src/plugins/nat/nat_format.c +++ b/src/plugins/nat/nat_format.c @@ -121,9 +121,9 @@ format_snat_session (u8 * s, va_list * args) s = format (s, " i2o %U proto %u fib %u\n", format_ip4_address, &sess->in2out.addr, sess->in2out.port, sess->in2out.fib_index); - s = format (s, " o2i %U proto %u fib %u\n", - format_ip4_address, &sess->out2in.addr, - sess->out2in.port, sess->out2in.fib_index); + s = + format (s, " o2i %U proto %u fib %u\n", format_ip4_address, + &sess->out2in.addr, sess->out2in.port, sess->out2in.fib_index); } else { @@ -132,10 +132,9 @@ format_snat_session (u8 * s, va_list * args) format_nat_protocol, sess->nat_proto, clib_net_to_host_u16 (sess->in2out.port), sess->in2out.fib_index); - s = format (s, " o2i %U proto %U port %d fib %d\n", - format_ip4_address, &sess->out2in.addr, - format_nat_protocol, sess->nat_proto, - clib_net_to_host_u16 (sess->out2in.port), + s = format (s, " o2i %U proto %U port %d fib %d\n", + format_ip4_address, &sess->out2in.addr, format_nat_protocol, + sess->nat_proto, clib_net_to_host_u16 (sess->out2in.port), sess->out2in.fib_index); } if (is_ed_session (sess) || is_fwd_bypass_session (sess)) @@ -155,6 +154,8 @@ format_snat_session (u8 * s, va_list * args) format_ip4_address, &sess->ext_host_addr, clib_net_to_host_u16 (sess->ext_host_port)); } + s = format (s, " i2o flow: %U\n", format_nat_6t_flow, &sess->i2o); + s = format (s, " o2i flow: %U\n", format_nat_6t_flow, &sess->o2i); } s = format (s, " index %llu\n", sess - tsm->sessions); s = format (s, " last heard %.2f\n", sess->last_heard); diff --git a/src/plugins/nat/nat_inlines.h b/src/plugins/nat/nat_inlines.h index 401f1e5747f..3408e533f69 100644 --- a/src/plugins/nat/nat_inlines.h +++ b/src/plugins/nat/nat_inlines.h @@ -29,7 +29,7 @@ calc_nat_key (ip4_address_t addr, u16 port, u32 fib_index, u8 proto) ASSERT (fib_index <= (1 << 14) - 1); ASSERT (proto <= (1 << 3) - 1); return (u64) addr.as_u32 << 32 | (u64) port << 16 | fib_index << 3 | - (proto & 0x7); + (proto & 0x7); } always_inline void @@ -518,15 +518,12 @@ split_ed_kv (clib_bihash_kv_16_8_t * kv, } static_always_inline int -get_icmp_i2o_ed_key (vlib_buffer_t * b, ip4_header_t * ip0, u32 rx_fib_index, - u32 thread_index, u32 session_index, - nat_protocol_t * nat_proto, u16 * l_port, u16 * r_port, - clib_bihash_kv_16_8_t * kv) +nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0, + ip4_address_t *lookup_saddr, + u16 *lookup_sport, + ip4_address_t *lookup_daddr, + u16 *lookup_dport, u8 *lookup_protocol) { - u8 proto; - u16 _l_port, _r_port; - ip4_address_t *l_addr, *r_addr; - icmp46_header_t *icmp0; icmp_echo_header_t *echo0, *inner_echo0 = 0; ip4_header_t *inner_ip0 = 0; @@ -536,121 +533,43 @@ get_icmp_i2o_ed_key (vlib_buffer_t * b, ip4_header_t * ip0, u32 rx_fib_index, icmp0 = (icmp46_header_t *) ip4_next_header (ip0); echo0 = (icmp_echo_header_t *) (icmp0 + 1); - if (!icmp_type_is_error_message - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) - { - proto = IP_PROTOCOL_ICMP; - l_addr = &ip0->src_address; - r_addr = &ip0->dst_address; - _l_port = vnet_buffer (b)->ip.reass.l4_src_port; - _r_port = 0; - } - else - { - inner_ip0 = (ip4_header_t *) (echo0 + 1); - l4_header = ip4_next_header (inner_ip0); - proto = inner_ip0->protocol; - r_addr = &inner_ip0->src_address; - l_addr = &inner_ip0->dst_address; - switch (ip_proto_to_nat_proto (inner_ip0->protocol)) - { - case NAT_PROTOCOL_ICMP: - inner_icmp0 = (icmp46_header_t *) l4_header; - inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); - _r_port = 0; - _l_port = inner_echo0->identifier; - break; - case NAT_PROTOCOL_UDP: - case NAT_PROTOCOL_TCP: - _l_port = ((tcp_udp_header_t *) l4_header)->dst_port; - _r_port = ((tcp_udp_header_t *) l4_header)->src_port; - break; - default: - return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL; - } - } - init_ed_kv (kv, *l_addr, _l_port, *r_addr, _r_port, rx_fib_index, proto, - thread_index, session_index); - if (nat_proto) - { - *nat_proto = ip_proto_to_nat_proto (proto); - } - if (l_port) - { - *l_port = _l_port; - } - if (r_port) - { - *r_port = _r_port; - } - return 0; -} - -static_always_inline int -get_icmp_o2i_ed_key (vlib_buffer_t * b, ip4_header_t * ip0, u32 rx_fib_index, - u32 thread_index, u32 session_index, - nat_protocol_t * nat_proto, u16 * l_port, u16 * r_port, - clib_bihash_kv_16_8_t * kv) -{ - icmp46_header_t *icmp0; - u8 proto; - ip4_address_t *l_addr, *r_addr; - u16 _l_port, _r_port; - icmp_echo_header_t *echo0, *inner_echo0 = 0; - ip4_header_t *inner_ip0; - void *l4_header = 0; - icmp46_header_t *inner_icmp0; - - icmp0 = (icmp46_header_t *) ip4_next_header (ip0); - echo0 = (icmp_echo_header_t *) (icmp0 + 1); + // avoid warning about unused variables in caller by setting to bogus values + *lookup_sport = 0; + *lookup_dport = 0; if (!icmp_type_is_error_message (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) { - proto = IP_PROTOCOL_ICMP; - l_addr = &ip0->dst_address; - r_addr = &ip0->src_address; - _l_port = vnet_buffer (b)->ip.reass.l4_src_port; - _r_port = 0; + *lookup_protocol = IP_PROTOCOL_ICMP; + lookup_saddr->as_u32 = ip0->src_address.as_u32; + *lookup_sport = vnet_buffer (b)->ip.reass.l4_src_port; + lookup_daddr->as_u32 = ip0->dst_address.as_u32; + *lookup_dport = vnet_buffer (b)->ip.reass.l4_dst_port; } else { inner_ip0 = (ip4_header_t *) (echo0 + 1); l4_header = ip4_next_header (inner_ip0); - proto = inner_ip0->protocol; - l_addr = &inner_ip0->src_address; - r_addr = &inner_ip0->dst_address; + *lookup_protocol = inner_ip0->protocol; + lookup_saddr->as_u32 = inner_ip0->dst_address.as_u32; + lookup_daddr->as_u32 = inner_ip0->src_address.as_u32; switch (ip_proto_to_nat_proto (inner_ip0->protocol)) { case NAT_PROTOCOL_ICMP: inner_icmp0 = (icmp46_header_t *) l4_header; inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); - _l_port = inner_echo0->identifier; - _r_port = 0; + *lookup_sport = inner_echo0->identifier; + *lookup_dport = inner_echo0->identifier; break; case NAT_PROTOCOL_UDP: case NAT_PROTOCOL_TCP: - _l_port = ((tcp_udp_header_t *) l4_header)->src_port; - _r_port = ((tcp_udp_header_t *) l4_header)->dst_port; + *lookup_sport = ((tcp_udp_header_t *) l4_header)->dst_port; + *lookup_dport = ((tcp_udp_header_t *) l4_header)->src_port; break; default: - return -1; + return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL; } } - init_ed_kv (kv, *l_addr, _l_port, *r_addr, _r_port, rx_fib_index, proto, - thread_index, session_index); - if (nat_proto) - { - *nat_proto = ip_proto_to_nat_proto (proto); - } - if (l_port) - { - *l_port = _l_port; - } - if (r_port) - { - *r_port = _r_port; - } return 0; } diff --git a/src/plugins/nat/out2in_ed.c b/src/plugins/nat/out2in_ed.c index beb259eee33..d6beadc61bc 100644 --- a/src/plugins/nat/out2in_ed.c +++ b/src/plugins/nat/out2in_ed.c @@ -42,14 +42,15 @@ typedef struct u32 sw_if_index; u32 next_index; u32 session_index; - u32 is_slow_path; + nat_translation_error_e translation_error; + nat_6t_flow_t i2of; + nat_6t_flow_t o2if; + clib_bihash_kv_16_8_t search_key; + u8 is_slow_path; + u8 translation_via_i2of; + u8 lookup_skipped; } nat44_ed_out2in_trace_t; -typedef struct -{ - u16 thread_next; -} nat44_ed_out2in_handoff_trace_t; - static u8 * format_nat44_ed_out2in_trace (u8 * s, va_list * args) { @@ -64,130 +65,174 @@ format_nat44_ed_out2in_trace (u8 * s, va_list * args) s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag, t->sw_if_index, t->next_index, t->session_index); + if (~0 != t->session_index) + { + s = format (s, ", translation result '%U' via %s", + format_nat_ed_translation_error, t->translation_error, + t->translation_via_i2of ? "i2of" : "o2if"); + s = format (s, "\n i2of %U", format_nat_6t_flow, &t->i2of); + s = format (s, "\n o2if %U", format_nat_6t_flow, &t->o2if); + } + if (!t->is_slow_path) + { + if (t->lookup_skipped) + { + s = format (s, "\n lookup skipped - cached session index used"); + } + else + { + s = format (s, "\n search key %U", format_ed_session_kvp, + &t->search_key); + } + } return s; } +static int +next_src_nat (snat_main_t *sm, ip4_header_t *ip, u16 src_port, u16 dst_port, + u32 thread_index, u32 rx_fib_index) +{ + clib_bihash_kv_16_8_t kv, value; + + init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port, + rx_fib_index, ip->protocol); + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) + return 1; + + return 0; +} + +static void create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, + snat_session_t *s, ip4_header_t *ip, + u32 rx_fib_index, u32 thread_index); + +static snat_session_t *create_session_for_static_mapping_ed ( + snat_main_t *sm, vlib_buffer_t *b, ip4_address_t i2o_addr, u16 i2o_port, + u32 i2o_fib_index, ip4_address_t o2i_addr, u16 o2i_port, u32 o2i_fib_index, + nat_protocol_t nat_proto, vlib_node_runtime_t *node, u32 rx_fib_index, + u32 thread_index, twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now, + snat_static_mapping_t *mapping); + static inline u32 -icmp_out2in_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0, - ip4_header_t * ip0, icmp46_header_t * icmp0, - u32 sw_if_index0, u32 rx_fib_index0, - vlib_node_runtime_t * node, u32 next0, f64 now, - u32 thread_index, snat_session_t ** p_s0) +icmp_out2in_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + icmp46_header_t *icmp, u32 sw_if_index, + u32 rx_fib_index, vlib_node_runtime_t *node, + u32 next, f64 now, u32 thread_index, + snat_session_t **s_p) { vlib_main_t *vm = vlib_get_main (); - next0 = icmp_out2in (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, thread_index, p_s0, 0); - snat_session_t *s0 = *p_s0; - if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0)) + ip_csum_t sum; + u16 checksum; + + snat_session_t *s = 0; + u8 is_addr_only, identity_nat; + ip4_address_t sm_addr; + u16 sm_port; + u32 sm_fib_index; + snat_static_mapping_t *m; + u8 lookup_protocol; + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + + sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + + if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, &lookup_sport, + &lookup_daddr, &lookup_dport, + &lookup_protocol)) { - /* Accounting */ - nat44_session_update_counters (s0, now, - vlib_buffer_length_in_chain - (vm, b0), thread_index); - /* Per-user LRU list maintenance */ - nat44_session_update_lru (sm, s0, thread_index); + b->error = node->errors[NAT_OUT2IN_ED_ERROR_UNSUPPORTED_PROTOCOL]; + next = NAT_NEXT_DROP; + goto out; } - return next0; -} - -#ifndef CLIB_MARCH_VARIANT -int -nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg) -{ - snat_main_t *sm = &snat_main; - nat44_is_idle_session_ctx_t *ctx = arg; - snat_session_t *s; - u64 sess_timeout_time; - u8 proto; - u16 r_port, l_port; - ip4_address_t *l_addr, *r_addr; - u32 fib_index; - clib_bihash_kv_16_8_t ed_kv; - int i; - //snat_address_t *a; - snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data, - ctx->thread_index); - s = pool_elt_at_index (tsm->sessions, kv->value); - sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s); - if (ctx->now >= sess_timeout_time) + if (snat_static_mapping_match ( + sm, ip->dst_address, lookup_sport, rx_fib_index, + ip_proto_to_nat_proto (ip->protocol), &sm_addr, &sm_port, + &sm_fib_index, 1, &is_addr_only, 0, 0, 0, &identity_nat, &m)) { - l_addr = &s->in2out.addr; - r_addr = &s->ext_host_addr; - fib_index = s->in2out.fib_index; - if (snat_is_unk_proto_session (s)) + // static mapping not matched + if (!sm->forwarding_enabled) { - proto = s->in2out.port; - r_port = 0; - l_port = 0; + /* Don't NAT packet aimed at the intfc address */ + if (!is_interface_addr (sm, node, sw_if_index, + ip->dst_address.as_u32)) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; + next = NAT_NEXT_DROP; + } } else { - proto = nat_proto_to_ip_proto (s->nat_proto); - l_port = s->in2out.port; - r_port = s->ext_host_port; - } - if (is_twice_nat_session (s)) - { - r_addr = &s->ext_host_nat_addr; - r_port = s->ext_host_nat_port; - } - init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto); - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)) - nat_elog_warn ("in2out_ed key del failed"); - - if (snat_is_unk_proto_session (s)) - goto delete; - - nat_ipfix_logging_nat44_ses_delete (ctx->thread_index, - s->in2out.addr.as_u32, - s->out2in.addr.as_u32, - s->nat_proto, - s->in2out.port, - s->out2in.port, - s->in2out.fib_index); - - nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index, - &s->in2out.addr, s->in2out.port, - &s->ext_host_nat_addr, s->ext_host_nat_port, - &s->out2in.addr, s->out2in.port, - &s->ext_host_addr, s->ext_host_port, - s->nat_proto, is_twice_nat_session (s)); - - if (is_twice_nat_session (s)) - { - for (i = 0; i < vec_len (sm->twice_nat_addresses); i++) + if (next_src_nat (sm, ip, lookup_sport, lookup_dport, thread_index, + rx_fib_index)) { - // FIXME TODO this is obviously wrong code ... needs fix! - // key.protocol = s->nat_proto; - // key.port = s->ext_host_nat_port; - // a = sm->twice_nat_addresses + i; - // if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32) - // { - // snat_free_outside_address_and_port (sm->twice_nat_addresses, - // ctx->thread_index, - // &key); - // break; - // } + next = NAT_NEXT_IN2OUT_ED_FAST_PATH; + } + else + { + create_bypass_for_fwd (sm, b, s, ip, rx_fib_index, thread_index); } } + goto out; + } - if (snat_is_session_static (s)) - goto delete; + if (PREDICT_FALSE (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply && + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request || + !is_addr_only))) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE]; + next = NAT_NEXT_DROP; + goto out; + } - snat_free_outside_address_and_port (sm->addresses, ctx->thread_index, - &s->out2in.addr, s->out2in.port, - s->nat_proto); - delete: - nat_ed_session_delete (sm, s, ctx->thread_index, 1); - return 1; + if (PREDICT_FALSE (identity_nat)) + { + goto out; } - return 0; + /* Create session initiated by host from external network */ + s = create_session_for_static_mapping_ed ( + sm, b, sm_addr, sm_port, sm_fib_index, ip->dst_address, lookup_sport, + rx_fib_index, ip_proto_to_nat_proto (lookup_protocol), node, rx_fib_index, + thread_index, 0, 0, vlib_time_now (vm), m); + if (!s) + next = NAT_NEXT_DROP; + + if (PREDICT_TRUE (!ip4_is_fragment (ip))) + { + sum = ip_incremental_checksum_buffer ( + vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b), + ntohs (ip->length) - ip4_header_bytes (ip), 0); + checksum = ~ip_csum_fold (sum); + if (checksum != 0 && checksum != 0xffff) + { + next = NAT_NEXT_DROP; + goto out; + } + } + + if (PREDICT_TRUE (next != NAT_NEXT_DROP && s)) + { + /* Accounting */ + nat44_session_update_counters ( + s, now, vlib_buffer_length_in_chain (vm, b), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s, thread_index); + } +out: + if (NAT_NEXT_DROP == next && s) + { + nat_ed_session_delete (sm, s, thread_index, 1); + s = 0; + } + *s_p = s; + return next; } -#endif // allocate exact address based on preference static_always_inline int @@ -317,28 +362,17 @@ nat44_ed_alloc_outside_addr_and_port (snat_address_t *addresses, u32 fib_index, } static snat_session_t * -create_session_for_static_mapping_ed (snat_main_t * sm, - vlib_buffer_t * b, - ip4_address_t i2o_addr, - u16 i2o_port, - u32 i2o_fib_index, - ip4_address_t o2i_addr, - u16 o2i_port, - u32 o2i_fib_index, - nat_protocol_t nat_proto, - vlib_node_runtime_t * node, - u32 rx_fib_index, - u32 thread_index, - twice_nat_type_t twice_nat, - lb_nat_type_t lb_nat, f64 now, - snat_static_mapping_t * mapping) +create_session_for_static_mapping_ed ( + snat_main_t *sm, vlib_buffer_t *b, ip4_address_t i2o_addr, u16 i2o_port, + u32 i2o_fib_index, ip4_address_t o2i_addr, u16 o2i_port, u32 o2i_fib_index, + nat_protocol_t nat_proto, vlib_node_runtime_t *node, u32 rx_fib_index, + u32 thread_index, twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now, + snat_static_mapping_t *mapping) { snat_session_t *s; ip4_header_t *ip; udp_header_t *udp; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - clib_bihash_kv_16_8_t kv; - nat44_is_idle_session_ctx_t ctx; if (PREDICT_FALSE (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index))) @@ -375,15 +409,29 @@ create_session_for_static_mapping_ed (snat_main_t * sm, s->in2out.fib_index = i2o_fib_index; s->nat_proto = nat_proto; - /* Add to lookup tables */ - init_ed_kv (&kv, o2i_addr, o2i_port, s->ext_host_addr, s->ext_host_port, - o2i_fib_index, ip->protocol, thread_index, s - tsm->sessions); - ctx.now = now; - ctx.thread_index = thread_index; - if (clib_bihash_add_or_overwrite_stale_16_8 (&sm->out2in_ed, &kv, - nat44_o2i_ed_is_idle_session_cb, - &ctx)) - nat_elog_notice ("out2in-ed key add failed"); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_o2i_flow_init (sm, thread_index, s, s->ext_host_addr, o2i_port, + o2i_addr, o2i_port, o2i_fib_index, ip->protocol); + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, i2o_port); + } + else + { + nat_6t_o2i_flow_init (sm, thread_index, s, s->ext_host_addr, + s->ext_host_port, o2i_addr, o2i_port, + o2i_fib_index, ip->protocol); + nat_6t_flow_dport_rewrite_set (&s->o2i, i2o_port); + } + nat_6t_flow_daddr_rewrite_set (&s->o2i, i2o_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, i2o_fib_index); + + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1)) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_HASH_ADD_FAILED]; + nat_ed_session_delete (sm, s, thread_index, 1); + nat_elog_warn ("out2in flow hash add failed"); + return 0; + } if (twice_nat == TWICE_NAT || (twice_nat == TWICE_NAT_SELF && ip->src_address.as_u32 == i2o_addr.as_u32)) @@ -427,27 +475,80 @@ create_session_for_static_mapping_ed (snat_main_t * sm, if (rc) { b->error = node->errors[NAT_OUT2IN_ED_ERROR_OUT_OF_PORTS]; + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0)) + { + nat_elog_warn ("out2in flow hash del failed"); + } + snat_free_outside_address_and_port ( + sm->twice_nat_addresses, thread_index, &s->ext_host_nat_addr, + s->ext_host_nat_port, s->nat_proto); nat_ed_session_delete (sm, s, thread_index, 1); - if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &kv, 0)) - nat_elog_notice ("out2in-ed key del failed"); return 0; } s->flags |= SNAT_SESSION_FLAG_TWICE_NAT; - init_ed_kv (&kv, i2o_addr, i2o_port, s->ext_host_nat_addr, - s->ext_host_nat_port, i2o_fib_index, ip->protocol, - thread_index, s - tsm->sessions); + + nat_6t_flow_saddr_rewrite_set (&s->o2i, s->ext_host_nat_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, s->ext_host_nat_port); + } + else + { + nat_6t_flow_sport_rewrite_set (&s->o2i, s->ext_host_nat_port); + } + + nat_6t_l3_l4_csum_calc (&s->o2i); + + nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, + s->ext_host_nat_addr, s->ext_host_nat_port, + i2o_fib_index, ip->protocol); + nat_6t_flow_daddr_rewrite_set (&s->i2o, s->ext_host_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->i2o, s->ext_host_port); + } + else + { + nat_6t_flow_dport_rewrite_set (&s->i2o, s->ext_host_port); + } + } + else + { + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, + s->ext_host_addr, i2o_port, i2o_fib_index, + ip->protocol); + } + else + { + nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, + s->ext_host_addr, s->ext_host_port, + i2o_fib_index, ip->protocol); + } + } + + nat_6t_flow_saddr_rewrite_set (&s->i2o, o2i_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->i2o, o2i_port); } else { - init_ed_kv (&kv, i2o_addr, i2o_port, s->ext_host_addr, - s->ext_host_port, i2o_fib_index, ip->protocol, - thread_index, s - tsm->sessions); + nat_6t_flow_sport_rewrite_set (&s->i2o, o2i_port); + } + + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("in2out flow hash add failed"); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0)) + { + nat_elog_warn ("out2in flow hash del failed"); + } + nat_ed_session_delete (sm, s, thread_index, 1); + return 0; } - if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &kv, - nat44_i2o_ed_is_idle_session_cb, - &ctx)) - nat_elog_notice ("in2out-ed key add failed"); nat_ipfix_logging_nat44_ses_create (thread_index, s->in2out.addr.as_u32, @@ -468,37 +569,24 @@ create_session_for_static_mapping_ed (snat_main_t * sm, return s; } -static int -next_src_nat (snat_main_t * sm, ip4_header_t * ip, u16 src_port, - u16 dst_port, u32 thread_index, u32 rx_fib_index) -{ - clib_bihash_kv_16_8_t kv, value; - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - - init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port, - rx_fib_index, ip->protocol); - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) - return 1; - - return 0; -} - static void -create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip, - u32 rx_fib_index, u32 thread_index) +create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s, + ip4_header_t *ip, u32 rx_fib_index, u32 thread_index) { clib_bihash_kv_16_8_t kv, value; udp_header_t *udp; - snat_session_t *s = 0; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; vlib_main_t *vm = vlib_get_main (); f64 now = vlib_time_now (vm); - u16 l_port, r_port; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + ip4_address_t lookup_saddr, lookup_daddr; if (ip->protocol == IP_PROTOCOL_ICMP) { - if (get_icmp_o2i_ed_key - (b, ip, rx_fib_index, ~0, ~0, 0, &l_port, &r_port, &kv)) + if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, + &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol)) return; } else @@ -506,19 +594,23 @@ create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip, if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP) { udp = ip4_next_header (ip); - l_port = udp->dst_port; - r_port = udp->src_port; + lookup_sport = udp->dst_port; + lookup_dport = udp->src_port; } else { - l_port = 0; - r_port = 0; + lookup_sport = 0; + lookup_dport = 0; } - init_ed_k (&kv, ip->dst_address, l_port, ip->src_address, r_port, - rx_fib_index, ip->protocol); + lookup_saddr.as_u32 = ip->dst_address.as_u32; + lookup_daddr.as_u32 = ip->src_address.as_u32; + lookup_protocol = ip->protocol; } - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) + init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport, + rx_fib_index, lookup_protocol); + + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); s = @@ -550,10 +642,10 @@ create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip, proto = ip_proto_to_nat_proto (ip->protocol); s->ext_host_addr = ip->src_address; - s->ext_host_port = r_port; + s->ext_host_port = lookup_dport; s->flags |= SNAT_SESSION_FLAG_FWD_BYPASS; s->out2in.addr = ip->dst_address; - s->out2in.port = l_port; + s->out2in.port = lookup_sport; s->nat_proto = proto; if (proto == NAT_PROTOCOL_OTHER) { @@ -565,9 +657,16 @@ create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip, s->in2out.port = s->out2in.port; s->in2out.fib_index = s->out2in.fib_index; - kv.value = s - tsm->sessions; - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1)) - nat_elog_notice ("in2out_ed key add failed"); + nat_6t_i2o_flow_init (sm, thread_index, s, ip->dst_address, lookup_sport, + ip->src_address, lookup_dport, rx_fib_index, + ip->protocol); + nat_6t_flow_txfib_rewrite_set (&s->i2o, rx_fib_index); + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("in2out flow add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return; + } per_vrf_sessions_register_session (s, thread_index); } @@ -586,261 +685,75 @@ create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip, nat44_session_update_lru (sm, s, thread_index); } -static_always_inline int -create_bypass_for_fwd_worker (snat_main_t * sm, - vlib_buffer_t * b, ip4_header_t * ip, - u32 rx_fib_index, u32 thread_index) -{ - ip4_header_t tmp = { - .src_address = ip->dst_address, - }; - u32 index = sm->worker_in2out_cb (&tmp, rx_fib_index, 0); - - if (index != thread_index) - { - vnet_buffer2 (b)->nat.thread_next = index; - return 1; - } - - create_bypass_for_fwd (sm, b, ip, rx_fib_index, thread_index); - return 0; -} - -#ifndef CLIB_MARCH_VARIANT -u32 -icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node, - u32 thread_index, vlib_buffer_t * b, - ip4_header_t * ip, ip4_address_t * addr, - u16 * port, u32 * fib_index, nat_protocol_t * proto, - void *d, void *e, u8 * dont_translate) +static snat_session_t * +nat44_ed_out2in_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, + ip4_header_t *ip, u32 rx_fib_index, + u32 thread_index, f64 now, + vlib_main_t *vm, + vlib_node_runtime_t *node) { - u32 next = ~0, sw_if_index, rx_fib_index; - clib_bihash_kv_16_8_t kv, value; - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - snat_session_t *s = 0; - u8 is_addr_only, identity_nat; - u16 l_port, r_port; - vlib_main_t *vm = vlib_get_main (); - ip4_address_t sm_addr; - u16 sm_port; - u32 sm_fib_index; - *dont_translate = 0; + clib_bihash_kv_8_8_t kv, value; snat_static_mapping_t *m; + snat_session_t *s; - sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; - rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); - - if (get_icmp_o2i_ed_key - (b, ip, rx_fib_index, ~0, ~0, proto, &l_port, &r_port, &kv)) + if (PREDICT_FALSE ( + nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index))) { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_UNSUPPORTED_PROTOCOL]; - next = NAT_NEXT_DROP; - goto out; + b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_elog_notice ("maximum sessions exceeded"); + return 0; } - if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value)) + init_nat_k (&kv, ip->dst_address, 0, 0, 0); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) { - if (snat_static_mapping_match - (sm, ip->dst_address, l_port, rx_fib_index, - ip_proto_to_nat_proto (ip->protocol), &sm_addr, &sm_port, - &sm_fib_index, 1, &is_addr_only, 0, 0, 0, &identity_nat, &m)) - { - // static mapping not matched - if (!sm->forwarding_enabled) - { - /* Don't NAT packet aimed at the intfc address */ - if (PREDICT_FALSE (is_interface_addr (sm, node, sw_if_index, - ip->dst_address.as_u32))) - { - *dont_translate = 1; - } - else - { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; - next = NAT_NEXT_DROP; - } - } - else - { - *dont_translate = 1; - if (next_src_nat (sm, ip, l_port, r_port, - thread_index, rx_fib_index)) - { - next = NAT_NEXT_IN2OUT_ED_FAST_PATH; - } - else - { - if (sm->num_workers > 1) - { - if (create_bypass_for_fwd_worker (sm, b, ip, - rx_fib_index, - thread_index)) - { - next = NAT_NEXT_OUT2IN_ED_HANDOFF; - } - } - else - { - create_bypass_for_fwd (sm, b, ip, rx_fib_index, - thread_index); - } - } - } - goto out; - } - - if (PREDICT_FALSE - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_reply - && (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_request || !is_addr_only))) - { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE]; - next = NAT_NEXT_DROP; - goto out; - } - - if (PREDICT_FALSE (identity_nat)) - { - *dont_translate = 1; - goto out; - } - - /* Create session initiated by host from external network */ - s = - create_session_for_static_mapping_ed (sm, b, sm_addr, sm_port, - sm_fib_index, ip->dst_address, - l_port, rx_fib_index, *proto, - node, rx_fib_index, - thread_index, 0, 0, - vlib_time_now (vm), m); - if (!s) - next = NAT_NEXT_DROP; + b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; + return 0; } - else - { - if (PREDICT_FALSE - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_reply - && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_request - && !icmp_type_is_error_message (vnet_buffer (b)->ip. - reass.icmp_type_or_tcp_flags))) - { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE]; - next = NAT_NEXT_DROP; - goto out; - } - ASSERT (thread_index == ed_value_get_thread_index (&value)); - s = - pool_elt_at_index (tsm->sessions, - ed_value_get_session_index (&value)); - } -out: - if (s) + m = pool_elt_at_index (sm->static_mappings, value.value); + + /* Create a new session */ + s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); + if (!s) { - *addr = s->in2out.addr; - *port = s->in2out.port; - *fib_index = s->in2out.fib_index; + b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_USER_SESS_EXCEEDED]; + nat_elog_warn ("create NAT session failed"); + return 0; } - if (d) - *(snat_session_t **) d = s; - return next; -} -#endif - -static snat_session_t * -nat44_ed_out2in_unknown_proto (snat_main_t * sm, - vlib_buffer_t * b, - ip4_header_t * ip, - u32 rx_fib_index, - u32 thread_index, - f64 now, - vlib_main_t * vm, vlib_node_runtime_t * node) -{ - clib_bihash_kv_8_8_t kv, value; - clib_bihash_kv_16_8_t s_kv, s_value; - snat_static_mapping_t *m; - u32 old_addr, new_addr; - ip_csum_t sum; - snat_session_t *s; - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - old_addr = ip->dst_address.as_u32; - - init_ed_k (&s_kv, ip->dst_address, 0, ip->src_address, 0, rx_fib_index, - ip->protocol); - - if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) + s->ext_host_addr.as_u32 = ip->src_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; + s->out2in.addr.as_u32 = ip->dst_address.as_u32; + s->out2in.fib_index = rx_fib_index; + s->in2out.addr.as_u32 = m->local_addr.as_u32; + s->in2out.fib_index = m->fib_index; + s->in2out.port = s->out2in.port = ip->protocol; + + nat_6t_o2i_flow_init (sm, thread_index, s, ip->dst_address, 0, + ip->src_address, 0, m->fib_index, ip->protocol); + nat_6t_flow_saddr_rewrite_set (&s->i2o, ip->dst_address.as_u32); + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) { - ASSERT (thread_index == ed_value_get_thread_index (&s_value)); - s = - pool_elt_at_index (tsm->sessions, - ed_value_get_session_index (&s_value)); - new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32; + nat_elog_notice ("in2out key add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; } - else - { - if (PREDICT_FALSE - (nat44_ed_maximum_sessions_exceeded - (sm, rx_fib_index, thread_index))) - { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED]; - nat_elog_notice ("maximum sessions exceeded"); - return 0; - } - - init_nat_k (&kv, ip->dst_address, 0, 0, 0); - if (clib_bihash_search_8_8 - (&sm->static_mapping_by_external, &kv, &value)) - { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; - return 0; - } - - m = pool_elt_at_index (sm->static_mappings, value.value); - - new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32; - /* Create a new session */ - s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); - if (!s) - { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_USER_SESS_EXCEEDED]; - nat_elog_warn ("create NAT session failed"); - return 0; - } - - s->ext_host_addr.as_u32 = ip->src_address.as_u32; - s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; - s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; - s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; - s->out2in.addr.as_u32 = old_addr; - s->out2in.fib_index = rx_fib_index; - s->in2out.addr.as_u32 = new_addr; - s->in2out.fib_index = m->fib_index; - s->in2out.port = s->out2in.port = ip->protocol; - - /* Add to lookup tables */ - s_kv.value = s - tsm->sessions; - if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) - nat_elog_notice ("out2in key add failed"); - - init_ed_kv (&s_kv, ip->dst_address, 0, ip->src_address, 0, m->fib_index, - ip->protocol, thread_index, s - tsm->sessions); - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1)) - nat_elog_notice ("in2out key add failed"); - - per_vrf_sessions_register_session (s, thread_index); + nat_6t_o2i_flow_init (sm, thread_index, s, ip->src_address, 0, + ip->dst_address, 0, rx_fib_index, ip->protocol); + nat_6t_flow_daddr_rewrite_set (&s->o2i, m->local_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, m->fib_index); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("out2in flow hash add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; } - /* Update IP checksum */ - sum = ip->checksum; - sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); - ip->checksum = ip_csum_fold (sum); - - vnet_buffer (b)->sw_if_index[VLIB_TX] = s->in2out.fib_index; + per_vrf_sessions_register_session (s, thread_index); /* Accounting */ nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b), @@ -873,18 +786,24 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, while (n_left_from > 0) { vlib_buffer_t *b0; - u32 sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0; - u16 old_port0, new_port0; + u32 sw_if_index0, rx_fib_index0; + nat_protocol_t proto0; ip4_header_t *ip0; - udp_header_t *udp0; - tcp_header_t *tcp0; snat_session_t *s0 = 0; clib_bihash_kv_16_8_t kv0, value0; - ip_csum_t sum0; + nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS; + nat_6t_flow_t *f = 0; + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + int lookup_skipped = 0; b0 = *b; b++; + lookup_sport = vnet_buffer (b0)->ip.reass.l4_src_port; + lookup_dport = vnet_buffer (b0)->ip.reass.l4_dst_port; + /* Prefetch next iteration. */ if (PREDICT_TRUE (n_left_from >= 2)) { @@ -916,56 +835,68 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, goto trace0; } - udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; proto0 = ip_proto_to_nat_proto (ip0->protocol); - if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) { - next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; - goto trace0; + if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request && + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply && + !icmp_type_is_error_message ( + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)) + { + b0->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + int err = nat_get_icmp_session_lookup_values ( + b0, ip0, &lookup_saddr, &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol); + if (err != 0) + { + b0->error = node->errors[err]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } } - - if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + else { - next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; - goto trace0; + lookup_saddr.as_u32 = ip0->src_address.as_u32; + lookup_daddr.as_u32 = ip0->dst_address.as_u32; + lookup_protocol = ip0->protocol; } - init_ed_k (&kv0, ip0->dst_address, - vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->src_address, - vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0, - ip0->protocol); - - /* there is a stashed index in vnet_buffer2 from handoff node, - * see if we can use it */ - if (is_multi_worker - && - PREDICT_TRUE (!pool_is_free_index - (tsm->sessions, - vnet_buffer2 (b0)->nat.ed_out2in_nat_session_index))) + /* there might be a stashed index in vnet_buffer2 from handoff or + * classify node, see if it can be used */ + if (!pool_is_free_index (tsm->sessions, + vnet_buffer2 (b0)->nat.cached_session_index)) { s0 = pool_elt_at_index (tsm->sessions, - vnet_buffer2 (b0)-> - nat.ed_out2in_nat_session_index); - if (PREDICT_TRUE - (s0->out2in.addr.as_u32 == ip0->dst_address.as_u32 - && s0->out2in.port == vnet_buffer (b0)->ip.reass.l4_dst_port - && s0->nat_proto == ip_proto_to_nat_proto (ip0->protocol) - && s0->out2in.fib_index == rx_fib_index0 - && s0->ext_host_addr.as_u32 == ip0->src_address.as_u32 - && s0->ext_host_port == - vnet_buffer (b0)->ip.reass.l4_src_port)) + vnet_buffer2 (b0)->nat.cached_session_index); + if (PREDICT_TRUE ( + nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0) || + (s0->flags & SNAT_SESSION_FLAG_TWICE_NAT && + nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, + lookup_protocol, rx_fib_index0)))) { /* yes, this is the droid we're looking for */ + lookup_skipped = 1; goto skip_lookup; } + s0 = NULL; } - // lookup for session - if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0)) + init_ed_k (&kv0, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport, + rx_fib_index0, lookup_protocol); + + // lookup flow + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) { - // session does not exist go slow path + // flow does not exist go slow path next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; goto trace0; } @@ -973,7 +904,6 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, s0 = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value0)); - skip_lookup: if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index))) @@ -1014,48 +944,72 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, goto trace0; } - old_addr0 = ip0->dst_address.as_u32; - new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; - - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, ip4_header_t, - src_address); - ip0->checksum = ip_csum_fold (sum0); - - old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; - - if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + if (nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) + { + f = &s0->o2i; + } + else if (s0->flags & SNAT_SESSION_FLAG_TWICE_NAT && + nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + f = &s0->i2o; + } + else + { + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (PREDICT_FALSE ( + proto0 == NAT_PROTOCOL_UDP && + (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))) + { + goto trace0; + } + + if (!sm->forwarding_enabled) { - new_port0 = udp0->dst_port = s0->in2out.port; - sum0 = tcp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (is_twice_nat_session (s0)) + b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + else + { + if (nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, + lookup_protocol, rx_fib_index0)) { - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_src_port, - s0->ext_host_nat_port, ip4_header_t, - length); - tcp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; + f = &s0->i2o; + } + else + { + // FIXME TODO bypass ??? + // create_bypass_for_fwd (sm, b0, s0, ip0, rx_fib_index0, + // thread_index); + translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH; + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + goto trace0; } - tcp0->checksum = ip_csum_fold (sum0); } + } + + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, f, proto0, 0 /* is_output_feature */))) + { + next[0] = NAT_NEXT_DROP; + goto trace0; + } + + switch (proto0) + { + case NAT_PROTOCOL_TCP: vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.tcp, thread_index, sw_if_index0, 1); nat44_set_tcp_session_state_o2i (sm, now, s0, @@ -1066,46 +1020,20 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, vnet_buffer (b0)->ip. reass.tcp_seq_number, thread_index); - } - else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment - && udp0->checksum) - { - new_port0 = udp0->dst_port = s0->in2out.port; - sum0 = udp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = - ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, vnet_buffer (b0)->ip.reass.l4_src_port, - s0->ext_host_nat_port, ip4_header_t, length); - udp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - udp0->checksum = ip_csum_fold (sum0); + break; + case NAT_PROTOCOL_UDP: vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.udp, thread_index, sw_if_index0, 1); - } - else - { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->dst_port = s0->in2out.port; - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - udp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - } - vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.udp, + break; + case NAT_PROTOCOL_ICMP: + vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.icmp, thread_index, sw_if_index0, 1); + break; + case NAT_PROTOCOL_OTHER: + vlib_increment_simple_counter ( + &sm->counters.fastpath.out2in_ed.other, thread_index, sw_if_index0, + 1); + break; } /* Accounting */ @@ -1124,11 +1052,21 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, t->sw_if_index = sw_if_index0; t->next_index = next[0]; t->is_slow_path = 0; + t->translation_error = translation_error; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); + t->lookup_skipped = lookup_skipped; if (s0) - t->session_index = s0 - tsm->sessions; + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->translation_via_i2of = (&s0->i2o == f); + } else - t->session_index = ~0; + { + t->session_index = ~0; + } } if (next[0] == NAT_NEXT_DROP) @@ -1169,21 +1107,20 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, while (n_left_from > 0) { vlib_buffer_t *b0; - u32 sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0; - u16 old_port0, new_port0; + u32 sw_if_index0, rx_fib_index0; + nat_protocol_t proto0; ip4_header_t *ip0; udp_header_t *udp0; - tcp_header_t *tcp0; icmp46_header_t *icmp0; snat_session_t *s0 = 0; clib_bihash_kv_16_8_t kv0, value0; - ip_csum_t sum0; lb_nat_type_t lb_nat0; twice_nat_type_t twice_nat0; u8 identity_nat0; ip4_address_t sm_addr; u16 sm_port; u32 sm_fib_index; + nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS; b0 = *b; next[0] = vnet_buffer2 (b0)->nat.arc_next; @@ -1206,20 +1143,26 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, } udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; icmp0 = (icmp46_header_t *) udp0; proto0 = ip_proto_to_nat_proto (ip0->protocol); if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) { - s0 = - nat44_ed_out2in_unknown_proto (sm, b0, ip0, rx_fib_index0, - thread_index, now, vm, node); + s0 = nat44_ed_out2in_slowpath_unknown_proto ( + sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node); if (!sm->forwarding_enabled) { if (!s0) next[0] = NAT_NEXT_DROP; } + if (NAT_NEXT_DROP != next[0] && + NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->o2i, proto0, 0 /* is_output_feature */))) + { + goto trace0; + } + vlib_increment_simple_counter (&sm->counters.slowpath. out2in_ed.other, thread_index, sw_if_index0, 1); @@ -1231,19 +1174,28 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, next[0] = icmp_out2in_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next[0], now, thread_index, &s0); + + if (NAT_NEXT_DROP != next[0] && s0 && + NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->o2i, proto0, 0 /* is_output_feature */))) + { + goto trace0; + } + vlib_increment_simple_counter (&sm->counters.slowpath. out2in_ed.icmp, thread_index, sw_if_index0, 1); goto trace0; } - init_ed_k (&kv0, ip0->dst_address, - vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->src_address, - vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0, + init_ed_k (&kv0, ip0->src_address, + vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, ip0->protocol); s0 = NULL; - if (!clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) { ASSERT (thread_index == ed_value_get_thread_index (&value0)); s0 = @@ -1298,18 +1250,8 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, } else { - if ((sm->num_workers > 1) - && create_bypass_for_fwd_worker (sm, b0, ip0, - rx_fib_index0, - thread_index)) - { - next[0] = NAT_NEXT_OUT2IN_ED_HANDOFF; - } - else - { - create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0, - thread_index); - } + create_bypass_for_fwd (sm, b0, s0, ip0, rx_fib_index0, + thread_index); } } goto trace0; @@ -1345,48 +1287,16 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, } } - old_addr0 = ip0->dst_address.as_u32; - new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; - - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, ip4_header_t, - src_address); - ip0->checksum = ip_csum_fold (sum0); - - old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->o2i, proto0, 0 /* is_output_feature */))) + { + next[0] = NAT_NEXT_DROP; + goto trace0; + } if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->dst_port = s0->in2out.port; - sum0 = tcp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (is_twice_nat_session (s0)) - { - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_src_port, - s0->ext_host_nat_port, ip4_header_t, - length); - tcp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - tcp0->checksum = ip_csum_fold (sum0); - } vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.tcp, thread_index, sw_if_index0, 1); nat44_set_tcp_session_state_o2i (sm, now, s0, @@ -1398,42 +1308,8 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, reass.tcp_seq_number, thread_index); } - else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment - && udp0->checksum) - { - new_port0 = udp0->dst_port = s0->in2out.port; - sum0 = udp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_src_port, - s0->ext_host_nat_port, ip4_header_t, length); - udp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - udp0->checksum = ip_csum_fold (sum0); - vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.udp, - thread_index, sw_if_index0, 1); - } else { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->dst_port = s0->in2out.port; - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - udp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - } vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.udp, thread_index, sw_if_index0, 1); } @@ -1454,11 +1330,19 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, t->sw_if_index = sw_if_index0; t->next_index = next[0]; t->is_slow_path = 1; + t->translation_error = translation_error; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); if (s0) - t->session_index = s0 - tsm->sessions; + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + } else - t->session_index = ~0; + { + t->session_index = ~0; + } } if (next[0] == NAT_NEXT_DROP) @@ -1479,88 +1363,6 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, return frame->n_vectors; } -static inline uword -nat_handoff_node_fn_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, u32 fq_index) -{ - u32 n_enq, n_left_from, *from; - - u16 thread_indices[VLIB_FRAME_SIZE], *ti = thread_indices; - vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - - vlib_get_buffers (vm, from, b, n_left_from); - - while (n_left_from >= 4) - { - if (PREDICT_TRUE (n_left_from >= 8)) - { - vlib_prefetch_buffer_header (b[4], LOAD); - vlib_prefetch_buffer_header (b[5], LOAD); - vlib_prefetch_buffer_header (b[6], LOAD); - vlib_prefetch_buffer_header (b[7], LOAD); - CLIB_PREFETCH (&b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&b[6]->data, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&b[7]->data, CLIB_CACHE_LINE_BYTES, LOAD); - } - - ti[0] = vnet_buffer2 (b[0])->nat.thread_next; - ti[1] = vnet_buffer2 (b[1])->nat.thread_next; - ti[2] = vnet_buffer2 (b[2])->nat.thread_next; - ti[3] = vnet_buffer2 (b[3])->nat.thread_next; - - b += 4; - ti += 4; - n_left_from -= 4; - } - - while (n_left_from > 0) - { - ti[0] = vnet_buffer2 (b[0])->nat.thread_next; - - b += 1; - ti += 1; - n_left_from -= 1; - } - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - { - u32 i; - b = bufs; - ti = thread_indices; - - for (i = 0; i < frame->n_vectors; i++) - { - if (b[0]->flags & VLIB_BUFFER_IS_TRACED) - { - nat44_ed_out2in_handoff_trace_t *t = - vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->thread_next = ti[0]; - b += 1; - ti += 1; - } - else - break; - } - } - - n_enq = vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices, - frame->n_vectors, 1); - - if (n_enq < frame->n_vectors) - { - vlib_node_increment_counter (vm, node->node_index, - NAT44_HANDOFF_ERROR_CONGESTION_DROP, - frame->n_vectors - n_enq); - } - - return frame->n_vectors; -} - VLIB_NODE_FN (nat44_ed_out2in_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -1609,35 +1411,6 @@ VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = { /* *INDENT-ON* */ static u8 * -format_nat44_ed_out2in_handoff_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - nat44_ed_out2in_handoff_trace_t *t = - va_arg (*args, nat44_ed_out2in_handoff_trace_t *); - return format (s, "out2in ed handoff thread_next index %d", t->thread_next); -} - -VLIB_NODE_FN (nat44_ed_out2in_handoff_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return nat_handoff_node_fn_inline (vm, node, frame, - snat_main.ed_out2in_node_index); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_ed_out2in_handoff_node) = { - .name = "nat44-ed-out2in-handoff", - .vector_size = sizeof (u32), - .sibling_of = "nat-default", - .format_trace = format_nat44_ed_out2in_handoff_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = 0, -}; -/* *INDENT-ON* */ - -static u8 * format_nat_pre_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); diff --git a/src/plugins/nat/test/test_nat44_ed.py b/src/plugins/nat/test/test_nat44_ed.py index 0f4a7371048..3d8921790ee 100644 --- a/src/plugins/nat/test/test_nat44_ed.py +++ b/src/plugins/nat/test/test_nat44_ed.py @@ -2004,7 +2004,7 @@ class TestNAT44EDMW(TestNAT44ED): # out2in tc1 = self.get_stats_counter('/nat44/ed/out2in/fastpath/tcp') uc1 = self.get_stats_counter('/nat44/ed/out2in/fastpath/udp') - ic1 = self.get_stats_counter('/nat44/ed/out2in/slowpath/icmp') + ic1 = self.get_stats_counter('/nat44/ed/out2in/fastpath/icmp') dc1 = self.get_stats_counter('/nat44/ed/out2in/fastpath/drops') pkts = self.create_stream_out(self.pg1) @@ -2017,7 +2017,7 @@ class TestNAT44EDMW(TestNAT44ED): if_idx = self.pg1.sw_if_index tc2 = self.get_stats_counter('/nat44/ed/out2in/fastpath/tcp') uc2 = self.get_stats_counter('/nat44/ed/out2in/fastpath/udp') - ic2 = self.get_stats_counter('/nat44/ed/out2in/slowpath/icmp') + ic2 = self.get_stats_counter('/nat44/ed/out2in/fastpath/icmp') dc2 = self.get_stats_counter('/nat44/ed/out2in/fastpath/drops') self.assertEqual(tc2[if_idx] - tc1[if_idx], 2) @@ -3364,7 +3364,7 @@ class TestNAT44EDMW(TestNAT44ED): udpn = self.get_stats_counter( '/nat44/ed/out2in/fastpath/udp') icmpn = self.get_stats_counter( - '/nat44/ed/out2in/slowpath/icmp') + '/nat44/ed/out2in/fastpath/icmp') drops = self.get_stats_counter( '/nat44/ed/out2in/fastpath/drops') @@ -3383,7 +3383,7 @@ class TestNAT44EDMW(TestNAT44ED): '/nat44/ed/out2in/fastpath/udp') self.assertEqual(cnt[if_idx] - udpn[if_idx], 1) cnt = self.get_stats_counter( - '/nat44/ed/out2in/slowpath/icmp') + '/nat44/ed/out2in/fastpath/icmp') self.assertEqual(cnt[if_idx] - icmpn[if_idx], 1) cnt = self.get_stats_counter( '/nat44/ed/out2in/fastpath/drops') diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 9e997b81c52..aae999620ac 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -452,12 +452,14 @@ typedef struct /* size of L4 prototol header */ u16 gso_l4_hdr_sz; + /* The union below has a u64 alignment, so this space is unused */ + u32 __unused2[1]; + struct { - u16 unused; - u16 thread_next; u32 arc_next; - u32 ed_out2in_nat_session_index; + /* cached session index from previous node */ + u32 cached_session_index; } nat; union |