From 51e759fd0655b6089360e1ccf2f5341704549fd4 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Thu, 7 Dec 2017 23:22:51 -0800 Subject: NAT64: multi-thread support (VPP-891) Change-Id: Iebf859b6d86482e4465423bad598eecf87e53ec4 Signed-off-by: Matus Fabian --- src/plugins/nat/nat64_in2out.c | 485 +++++++++++++++++++++++++++++------------ 1 file changed, 341 insertions(+), 144 deletions(-) (limited to 'src/plugins/nat/nat64_in2out.c') diff --git a/src/plugins/nat/nat64_in2out.c b/src/plugins/nat/nat64_in2out.c index 4f94575ebcc..9f77ca33fa4 100644 --- a/src/plugins/nat/nat64_in2out.c +++ b/src/plugins/nat/nat64_in2out.c @@ -72,6 +72,7 @@ format_nat64_in2out_reass_trace (u8 * s, va_list * args) vlib_node_registration_t nat64_in2out_node; vlib_node_registration_t nat64_in2out_slowpath_node; vlib_node_registration_t nat64_in2out_reass_node; +vlib_node_registration_t nat64_in2out_handoff_node; #define foreach_nat64_in2out_error \ _(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ @@ -111,6 +112,7 @@ typedef struct nat64_in2out_set_ctx_t_ { vlib_buffer_t *b; vlib_main_t *vm; + u32 thread_index; } nat64_in2out_set_ctx_t; /** @@ -152,6 +154,7 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, u8 proto = ip6->protocol; u16 sport = udp->src_port; u16 dport = udp->dst_port; + nat64_db_t *db = &nm->db[ctx->thread_index]; sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = @@ -163,19 +166,18 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, daddr.as_u64[1] = ip6->dst_address.as_u64[1]; ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto, + nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, fib_index, 1); if (ste) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; } else { - bibe = - nat64_db_bib_entry_find (&nm->db, &saddr, sport, proto, fib_index, 1); + bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1); if (!bibe) { @@ -183,11 +185,11 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, ip4_address_t out_addr; if (nat64_alloc_out_addr_and_port (fib_index, ip_proto_to_snat_proto (proto), &out_addr, - &out_port)) + &out_port, ctx->thread_index)) return -1; bibe = - nat64_db_bib_entry_create (&nm->db, &ip6->src_address, &out_addr, + nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr, sport, clib_host_to_net_u16 (out_port), fib_index, proto, 0); if (!bibe) @@ -196,7 +198,7 @@ nat64_in2out_tcp_udp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address, + nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, dport); if (!ste) return -1; @@ -234,6 +236,7 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) ip46_address_t saddr, daddr; u32 sw_if_index, fib_index; icmp46_header_t *icmp = ip6_next_header (ip6); + nat64_db_t *db = &nm->db[ctx->thread_index]; sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = @@ -248,13 +251,13 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) { u16 in_id = ((u16 *) (icmp))[2]; ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, in_id, 0, + nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0, IP_PROTOCOL_ICMP, fib_index, 1); if (ste) { bibe = - nat64_db_bib_entry_by_index (&nm->db, IP_PROTOCOL_ICMP, + nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP, ste->bibe_index); if (!bibe) return -1; @@ -262,7 +265,7 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) else { bibe = - nat64_db_bib_entry_find (&nm->db, &saddr, in_id, + nat64_db_bib_entry_find (db, &saddr, in_id, IP_PROTOCOL_ICMP, fib_index, 1); if (!bibe) @@ -270,11 +273,12 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) u16 out_id; ip4_address_t out_addr; if (nat64_alloc_out_addr_and_port - (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id)) + (fib_index, SNAT_PROTOCOL_ICMP, &out_addr, &out_id, + ctx->thread_index)) return -1; bibe = - nat64_db_bib_entry_create (&nm->db, &ip6->src_address, + nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr, in_id, clib_host_to_net_u16 (out_id), fib_index, IP_PROTOCOL_ICMP, 0); @@ -284,7 +288,7 @@ nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address, + nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0); if (!ste) return -1; @@ -320,6 +324,7 @@ nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, ip46_address_t saddr, daddr; u32 sw_if_index, fib_index; u8 proto = ip6->protocol; + nat64_db_t *db = &nm->db[ctx->thread_index]; sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; fib_index = @@ -342,12 +347,12 @@ nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, return -1; ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, in_id, 0, proto, + nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto, fib_index, 1); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; @@ -366,12 +371,12 @@ nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, u16 dport = udp->dst_port; ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto, + nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto, fib_index, 1); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; @@ -397,6 +402,7 @@ typedef struct unk_proto_st_walk_ctx_t_ ip6_address_t dst_addr; ip4_address_t out_addr; u32 fib_index; + u32 thread_index; u8 proto; } unk_proto_st_walk_ctx_t; @@ -407,11 +413,11 @@ unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg) unk_proto_st_walk_ctx_t *ctx = arg; nat64_db_bib_entry_t *bibe; ip46_address_t saddr, daddr; + nat64_db_t *db = &nm->db[ctx->thread_index]; if (ip46_address_is_equal (&ste->in_r_addr, &ctx->dst_addr)) { - bibe = - nat64_db_bib_entry_by_index (&nm->db, ste->proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index); if (!bibe) return -1; @@ -424,7 +430,7 @@ unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg) nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index); if (nat64_db_st_entry_find - (&nm->db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0)) + (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0)) return -1; ctx->out_addr.as_u32 = bibe->out_addr.as_u32; @@ -440,15 +446,16 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) { nat64_main_t *nm = &nat64_main; - nat64_in2out_set_ctx_t *ctx = arg; + nat64_in2out_set_ctx_t *s_ctx = arg; nat64_db_bib_entry_t *bibe; nat64_db_st_entry_t *ste; ip46_address_t saddr, daddr, addr; u32 sw_if_index, fib_index; u8 proto = ip6->protocol; int i; + nat64_db_t *db = &nm->db[s_ctx->thread_index]; - sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; + sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX]; fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index); @@ -458,19 +465,17 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, daddr.as_u64[1] = ip6->dst_address.as_u64[1]; ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, 0, 0, proto, fib_index, - 1); + nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1); if (ste) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; } else { - bibe = - nat64_db_bib_entry_find (&nm->db, &saddr, 0, proto, fib_index, 1); + bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1); if (!bibe) { @@ -483,19 +488,18 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, .out_addr.as_u32 = 0, .fib_index = fib_index, .proto = proto, + .thread_index = s_ctx->thread_index, }; - nat64_db_st_walk (&nm->db, IP_PROTOCOL_TCP, unk_proto_st_walk, - &ctx); + nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx); if (!ctx.out_addr.as_u32) - nat64_db_st_walk (&nm->db, IP_PROTOCOL_UDP, unk_proto_st_walk, - &ctx); + nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx); /* Verify if out address is not already in use for protocol */ memset (&addr, 0, sizeof (addr)); addr.ip4.as_u32 = ctx.out_addr.as_u32; - if (nat64_db_bib_entry_find (&nm->db, &addr, 0, proto, 0, 0)) + if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) ctx.out_addr.as_u32 = 0; if (!ctx.out_addr.as_u32) @@ -503,8 +507,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, for (i = 0; i < vec_len (nm->addr_pool); i++) { addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32; - if (!nat64_db_bib_entry_find - (&nm->db, &addr, 0, proto, 0, 0)) + if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) break; } } @@ -513,7 +516,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, return -1; bibe = - nat64_db_bib_entry_create (&nm->db, &ip6->src_address, + nat64_db_bib_entry_create (db, &ip6->src_address, &ctx.out_addr, 0, 0, fib_index, proto, 0); if (!bibe) @@ -522,13 +525,12 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address, - &daddr.ip4, 0); + nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0); if (!ste) return -1; } - nat64_session_reset_timeout (ste, ctx->vm); + nat64_session_reset_timeout (ste, s_ctx->vm); ip4->src_address.as_u32 = bibe->out_addr.as_u32; ip4->dst_address.as_u32 = ste->out_r_addr.as_u32; @@ -540,7 +542,7 @@ nat64_in2out_unk_proto_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, static int nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, - ip6_header_t * ip6) + ip6_header_t * ip6, u32 thread_index) { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; @@ -554,6 +556,7 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, u16 dport = udp->dst_port; u16 *checksum; ip_csum_t csum; + nat64_db_t *db = &nm->db[thread_index]; sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; fib_index = @@ -577,19 +580,18 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, csum = ip_csum_sub_even (csum, dport); ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto, + nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, fib_index, 1); if (ste) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; } else { - bibe = - nat64_db_bib_entry_find (&nm->db, &saddr, sport, proto, fib_index, 1); + bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1); if (!bibe) { @@ -597,11 +599,11 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, ip4_address_t out_addr; if (nat64_alloc_out_addr_and_port (fib_index, ip_proto_to_snat_proto (proto), &out_addr, - &out_port)) + &out_port, thread_index)) return -1; bibe = - nat64_db_bib_entry_create (&nm->db, &ip6->src_address, &out_addr, + nat64_db_bib_entry_create (db, &ip6->src_address, &out_addr, sport, clib_host_to_net_u16 (out_port), fib_index, proto, 0); if (!bibe) @@ -610,7 +612,7 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address, + nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, dport); if (!ste) return -1; @@ -621,32 +623,22 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, sport = udp->src_port = bibe->out_port; nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index); - memset (&saddr, 0, sizeof (saddr)); memset (&daddr, 0, sizeof (daddr)); - saddr.ip4.as_u32 = bibe->out_addr.as_u32; daddr.ip4.as_u32 = ste->out_r_addr.as_u32; - ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto, 0, - 0); - - if (ste) + bibe = 0; + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); - if (!bibe) - return -1; - } - else - { - bibe = nat64_db_bib_entry_find (&nm->db, &daddr, dport, proto, 0, 0); - - if (!bibe) - return -1; + bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0); - ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address, - &saddr.ip4, sport); + if (bibe) + break; } + /* *INDENT-ON* */ + + if (!bibe) + return -1; ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; @@ -665,7 +657,7 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, static int nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, - ip6_header_t * ip6) + ip6_header_t * ip6, u32 thread_index) { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; @@ -679,6 +671,7 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, tcp_header_t *tcp; u16 *checksum, sport, dport; ip_csum_t csum; + nat64_db_t *db = &nm->db[thread_index]; if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) return -1; @@ -718,12 +711,12 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, csum = ip_csum_sub_even (csum, dport); ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, proto, + nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto, fib_index, 1); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; @@ -735,13 +728,22 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, saddr.ip4.as_u32 = ste->out_r_addr.as_u32; daddr.ip4.as_u32 = bibe->out_addr.as_u32; - ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, sport, dport, proto, 0, - 0); + ste = 0; + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, + 0, 0); + + if (ste) + break; + } + /* *INDENT-ON* */ + if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; @@ -781,7 +783,7 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, static int nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, - ip6_header_t * ip6) + ip6_header_t * ip6, u32 thread_index) { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; @@ -790,6 +792,7 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, u32 sw_if_index, fib_index; u8 proto = ip6->protocol; int i; + nat64_db_t *db = &nm->db[thread_index]; sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; fib_index = @@ -801,19 +804,17 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, daddr.as_u64[1] = ip6->dst_address.as_u64[1]; ste = - nat64_db_st_entry_find (&nm->db, &saddr, &daddr, 0, 0, proto, fib_index, - 1); + nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1); if (ste) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); if (!bibe) return -1; } else { - bibe = - nat64_db_bib_entry_find (&nm->db, &saddr, 0, proto, fib_index, 1); + bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1); if (!bibe) { @@ -826,19 +827,18 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, .out_addr.as_u32 = 0, .fib_index = fib_index, .proto = proto, + .thread_index = thread_index, }; - nat64_db_st_walk (&nm->db, IP_PROTOCOL_TCP, unk_proto_st_walk, - &ctx); + nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx); if (!ctx.out_addr.as_u32) - nat64_db_st_walk (&nm->db, IP_PROTOCOL_UDP, unk_proto_st_walk, - &ctx); + nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx); /* Verify if out address is not already in use for protocol */ memset (&addr, 0, sizeof (addr)); addr.ip4.as_u32 = ctx.out_addr.as_u32; - if (nat64_db_bib_entry_find (&nm->db, &addr, 0, proto, 0, 0)) + if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) ctx.out_addr.as_u32 = 0; if (!ctx.out_addr.as_u32) @@ -846,8 +846,7 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, for (i = 0; i < vec_len (nm->addr_pool); i++) { addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32; - if (!nat64_db_bib_entry_find - (&nm->db, &addr, 0, proto, 0, 0)) + if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) break; } } @@ -856,7 +855,7 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, return -1; bibe = - nat64_db_bib_entry_create (&nm->db, &ip6->src_address, + nat64_db_bib_entry_create (db, &ip6->src_address, &ctx.out_addr, 0, 0, fib_index, proto, 0); if (!bibe) @@ -865,8 +864,7 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->dst_address, - &daddr.ip4, 0); + nat64_db_st_entry_create (db, bibe, &ip6->dst_address, &daddr.ip4, 0); if (!ste) return -1; } @@ -875,30 +873,22 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index); - memset (&saddr, 0, sizeof (saddr)); memset (&daddr, 0, sizeof (daddr)); - saddr.ip4.as_u32 = bibe->out_addr.as_u32; daddr.ip4.as_u32 = ste->out_r_addr.as_u32; - ste = nat64_db_st_entry_find (&nm->db, &daddr, &saddr, 0, 0, proto, 0, 0); - - if (ste) + bibe = 0; + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) { - bibe = nat64_db_bib_entry_by_index (&nm->db, proto, ste->bibe_index); - if (!bibe) - return -1; - } - else - { - bibe = nat64_db_bib_entry_find (&nm->db, &daddr, 0, proto, 0, 0); + bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0); - if (!bibe) - return -1; - - ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address, - &saddr.ip4, 0); + if (bibe) + break; } + /* *INDENT-ON* */ + + if (!bibe) + return -1; ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; @@ -914,6 +904,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, nat64_in2out_next_t next_index; u32 pkts_processed = 0; u32 stats_node_index; + u32 thread_index = vlib_get_thread_index (); stats_node_index = is_slow_path ? nat64_in2out_slowpath_node.index : nat64_in2out_node.index; @@ -952,6 +943,7 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, ctx0.b = b0; ctx0.vm = vm; + ctx0.thread_index = thread_index; next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP; @@ -974,7 +966,8 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (is_hairpinning (&ip60->dst_address)) { next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; - if (nat64_in2out_unk_proto_hairpinning (vm, b0, ip60)) + if (nat64_in2out_unk_proto_hairpinning + (vm, b0, ip60, thread_index)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = @@ -1014,7 +1007,8 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (is_hairpinning (&ip60->dst_address)) { next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; - if (nat64_in2out_icmp_hairpinning (vm, b0, ip60)) + if (nat64_in2out_icmp_hairpinning + (vm, b0, ip60, thread_index)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = @@ -1037,7 +1031,8 @@ nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (is_hairpinning (&ip60->dst_address)) { next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; - if (nat64_in2out_tcp_udp_hairpinning (vm, b0, ip60)) + if (nat64_in2out_tcp_udp_hairpinning + (vm, b0, ip60, thread_index)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = @@ -1145,6 +1140,7 @@ typedef struct nat64_in2out_frag_set_ctx_t_ { vlib_main_t *vm; u32 sess_index; + u32 thread_index; u16 l4_offset; u8 proto; u8 first_frag; @@ -1158,12 +1154,13 @@ nat64_in2out_frag_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) nat64_db_st_entry_t *ste; nat64_db_bib_entry_t *bibe; udp_header_t *udp; + nat64_db_t *db = &nm->db[ctx->thread_index]; - ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index); + ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index); if (!bibe) return -1; @@ -1213,7 +1210,8 @@ nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6, u16 dport = udp->dst_port; u16 *checksum; ip_csum_t csum; - ip46_address_t saddr, daddr; + ip46_address_t daddr; + nat64_db_t *db = &nm->db[ctx->thread_index]; if (ctx->first_frag) { @@ -1230,11 +1228,11 @@ nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6, csum = ip_csum_sub_even (csum, dport); } - ste = nat64_db_st_entry_by_index (&nm->db, ctx->proto, ctx->sess_index); + ste = nat64_db_st_entry_by_index (db, ctx->proto, ctx->sess_index); if (!ste) return -1; - bibe = nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index); + bibe = nat64_db_bib_entry_by_index (db, ctx->proto, ste->bibe_index); if (!bibe) return -1; @@ -1245,34 +1243,22 @@ nat64_in2out_frag_hairpinning (vlib_buffer_t * b, ip6_header_t * ip6, nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, bibe->fib_index); - memset (&saddr, 0, sizeof (saddr)); memset (&daddr, 0, sizeof (daddr)); - saddr.ip4.as_u32 = bibe->out_addr.as_u32; daddr.ip4.as_u32 = ste->out_r_addr.as_u32; - ste = - nat64_db_st_entry_find (&nm->db, &daddr, &saddr, dport, sport, ctx->proto, - 0, 0); - - if (ste) - { - bibe = - nat64_db_bib_entry_by_index (&nm->db, ctx->proto, ste->bibe_index); - if (!bibe) - return -1; - } - else + bibe = 0; + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) { - bibe = - nat64_db_bib_entry_find (&nm->db, &daddr, dport, ctx->proto, 0, 0); + bibe = nat64_db_bib_entry_find (db, &daddr, dport, ctx->proto, 0, 0); - if (!bibe) - return -1; - - ste = - nat64_db_st_entry_create (&nm->db, bibe, &ip6->src_address, - &saddr.ip4, sport); + if (bibe) + break; } + /* *INDENT-ON* */ + + if (!bibe) + return -1; ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; @@ -1303,6 +1289,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, u32 *fragments_to_drop = 0; u32 *fragments_to_loopback = 0; nat64_main_t *nm = &nat64_main; + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -1332,6 +1319,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, u32 sw_if_index0, fib_index0; ip46_address_t saddr0, daddr0; nat64_in2out_frag_set_ctx_t ctx0; + nat64_db_t *db = &nm->db[thread_index]; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; @@ -1349,6 +1337,8 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index0); + ctx0.thread_index = thread_index; + if (PREDICT_FALSE (nat_reass_is_drop_frag (1))) { next0 = NAT64_IN2OUT_NEXT_DROP; @@ -1419,20 +1409,21 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, daddr0.as_u64[1] = ip60->dst_address.as_u64[1]; ste0 = - nat64_db_st_entry_find (&nm->db, &saddr0, &daddr0, + nat64_db_st_entry_find (db, &saddr0, &daddr0, udp0->src_port, udp0->dst_port, l4_protocol0, fib_index0, 1); if (!ste0) { bibe0 = - nat64_db_bib_entry_find (&nm->db, &saddr0, udp0->src_port, + nat64_db_bib_entry_find (db, &saddr0, udp0->src_port, l4_protocol0, fib_index0, 1); if (!bibe0) { u16 out_port0; ip4_address_t out_addr0; if (nat64_alloc_out_addr_and_port - (fib_index0, proto0, &out_addr0, &out_port0)) + (fib_index0, proto0, &out_addr0, &out_port0, + thread_index)) { next0 = NAT64_IN2OUT_NEXT_DROP; b0->error = @@ -1441,7 +1432,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, } bibe0 = - nat64_db_bib_entry_create (&nm->db, + nat64_db_bib_entry_create (db, &ip60->src_address, &out_addr0, udp0->src_port, clib_host_to_net_u16 @@ -1458,7 +1449,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, nat64_extract_ip4 (&ip60->dst_address, &daddr0.ip4, fib_index0); ste0 = - nat64_db_st_entry_create (&nm->db, bibe0, + nat64_db_st_entry_create (db, bibe0, &ip60->dst_address, &daddr0.ip4, udp0->dst_port); if (!ste0) @@ -1469,8 +1460,7 @@ nat64_in2out_reass_node_fn (vlib_main_t * vm, goto trace0; } } - reass0->sess_index = - nat64_db_st_entry_get_index (&nm->db, ste0); + reass0->sess_index = nat64_db_st_entry_get_index (db, ste0); nat_ip6_reass_get_frags (reass0, &fragments_to_loopback); } @@ -1590,6 +1580,213 @@ VLIB_REGISTER_NODE (nat64_in2out_reass_node) = { VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_reass_node, nat64_in2out_reass_node_fn); +typedef struct +{ + u32 next_worker_index; + u8 do_handoff; +} nat64_in2out_handoff_trace_t; + +static u8 * +format_nat64_in2out_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat64_in2out_handoff_trace_t *t = + va_arg (*args, nat64_in2out_handoff_trace_t *); + char *m; + + m = t->do_handoff ? "next worker" : "same worker"; + s = format (s, "NAT64-IN2OUT-HANDOFF: %s %d", m, t->next_worker_index); + + return s; +} + +static inline uword +nat64_in2out_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + nat64_main_t *nm = &nat64_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 n_left_from, *from, *to_next = 0, *to_next_drop = 0; + static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index; + static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index + = 0; + vlib_frame_queue_elt_t *hf = 0; + vlib_frame_queue_t *fq; + vlib_frame_t *f = 0, *d = 0; + int i; + u32 n_left_to_next_worker = 0, *to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + u32 thread_index = vlib_get_thread_index (); + u32 fq_index; + u32 to_node_index; + + fq_index = nm->fq_in2out_index; + to_node_index = nat64_in2out_node.index; + + if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0)) + { + vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + tm->n_vlib_mains - 1, + (vlib_frame_queue_t *) (~0)); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0; + u8 do_handoff; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + + next_worker_index = nat64_get_worker_in2out (&ip0->src_address); + + if (PREDICT_FALSE (next_worker_index != thread_index)) + { + do_handoff = 1; + + if (next_worker_index != current_worker_index) + { + fq = + is_vlib_frame_queue_congested (fq_index, next_worker_index, + 30, + congested_handoff_queue_by_worker_index); + + if (fq) + { + /* if this is 1st frame */ + if (!d) + { + d = vlib_get_frame_to_node (vm, nm->error_node_index); + to_next_drop = vlib_frame_vector_args (d); + } + + to_next_drop[0] = bi0; + to_next_drop += 1; + d->n_vectors++; + goto trace0; + } + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = + vlib_get_worker_handoff_queue_elt (fq_index, + next_worker_index, + handoff_queue_elt_by_worker_index); + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_frame_queue_elt (hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + } + else + { + do_handoff = 0; + /* if this is 1st frame */ + if (!f) + { + f = vlib_get_frame_to_node (vm, to_node_index); + to_next = vlib_frame_vector_args (f); + } + + to_next[0] = bi0; + to_next += 1; + f->n_vectors++; + } + + trace0: + if (PREDICT_FALSE + ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat64_in2out_handoff_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_worker_index = next_worker_index; + t->do_handoff = do_handoff; + } + } + + if (f) + vlib_put_frame_to_node (vm, to_node_index, f); + + if (d) + vlib_put_frame_to_node (vm, nm->error_node_index, d); + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_frame_queue_elt (hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = + (vlib_frame_queue_t *) (~0); + } + hf = 0; + current_worker_index = ~0; + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = { + .function = nat64_in2out_handoff_node_fn, + .name = "nat64-in2out-handoff", + .vector_size = sizeof (u32), + .format_trace = format_nat64_in2out_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (nat64_in2out_handoff_node, + nat64_in2out_handoff_node_fn); + /* * fd.io coding-style-patch-verification: ON * -- cgit 1.2.3-korg