From 0eaf4e6784efb2d058fe2f031578251b6bcc0aa8 Mon Sep 17 00:00:00 2001 From: Filip Varga Date: Wed, 17 Feb 2021 14:34:54 +0100 Subject: nat: Final NAT44 EI/ED split patch This patch achieves complete separation of endpoint-dependent and endpoint-independent IPv4 NAT features. Some common stuff is also moved to NAT library. Type: refactor Change-Id: I52468b7e2b5ac28958a2baf8e2ea01787322e801 Signed-off-by: Filip Varga --- src/plugins/nat/nat44-ed/nat44_ed.api | 1214 +++++++++ src/plugins/nat/nat44-ed/nat44_ed.c | 3610 ++++++++++++++++++++++++++ src/plugins/nat/nat44-ed/nat44_ed.h | 1183 +++++++++ src/plugins/nat/nat44-ed/nat44_ed_affinity.c | 288 ++ src/plugins/nat/nat44-ed/nat44_ed_affinity.h | 152 ++ src/plugins/nat/nat44-ed/nat44_ed_api.c | 1541 +++++++++++ src/plugins/nat/nat44-ed/nat44_ed_classify.c | 362 +++ src/plugins/nat/nat44-ed/nat44_ed_cli.c | 2029 +++++++++++++++ src/plugins/nat/nat44-ed/nat44_ed_format.c | 277 ++ src/plugins/nat/nat44-ed/nat44_ed_handoff.c | 344 +++ src/plugins/nat/nat44-ed/nat44_ed_in2out.c | 1579 +++++++++++ src/plugins/nat/nat44-ed/nat44_ed_inlines.h | 869 +++++++ src/plugins/nat/nat44-ed/nat44_ed_out2in.c | 1443 ++++++++++ 13 files changed, 14891 insertions(+) create mode 100644 src/plugins/nat/nat44-ed/nat44_ed.api create mode 100644 src/plugins/nat/nat44-ed/nat44_ed.c create mode 100644 src/plugins/nat/nat44-ed/nat44_ed.h create mode 100644 src/plugins/nat/nat44-ed/nat44_ed_affinity.c create mode 100644 src/plugins/nat/nat44-ed/nat44_ed_affinity.h create mode 100644 src/plugins/nat/nat44-ed/nat44_ed_api.c create mode 100644 src/plugins/nat/nat44-ed/nat44_ed_classify.c create mode 100644 src/plugins/nat/nat44-ed/nat44_ed_cli.c create mode 100644 src/plugins/nat/nat44-ed/nat44_ed_format.c create mode 100644 src/plugins/nat/nat44-ed/nat44_ed_handoff.c create mode 100644 src/plugins/nat/nat44-ed/nat44_ed_in2out.c create mode 100644 src/plugins/nat/nat44-ed/nat44_ed_inlines.h create mode 100644 src/plugins/nat/nat44-ed/nat44_ed_out2in.c (limited to 'src/plugins/nat/nat44-ed') diff --git a/src/plugins/nat/nat44-ed/nat44_ed.api b/src/plugins/nat/nat44-ed/nat44_ed.api new file mode 100644 index 00000000000..6a2d44a6b9c --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed.api @@ -0,0 +1,1214 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "5.2.0"; +import "vnet/ip/ip_types.api"; +import "vnet/interface_types.api"; +import "plugins/nat/lib/nat_types.api"; + +/** + * @file nat44.api + * @brief VPP control-plane API messages. + * + * This file defines VPP control-plane API messages which are generally + * called through a shared memory interface. + */ + +enum nat44_config_flags : u8 +{ + NAT44_IS_ENDPOINT_INDEPENDENT = 0x00, + NAT44_IS_ENDPOINT_DEPENDENT = 0x01, + NAT44_IS_STATIC_MAPPING_ONLY = 0x02, + NAT44_IS_CONNECTION_TRACKING = 0x04, + NAT44_IS_OUT2IN_DPO = 0x08, +}; + +/** \brief Enable/disable NAT44 plugin + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param inside_vrf - inside vrf id + @param outside_vrf - outside vrf id + @param users - maximum number of users per thread + (NAT44_IS_ENDPOINT_INDEPENDENT) + @param user_memory - overwrite hash allocation parameter + (NAT44_IS_ENDPOINT_INDEPENDENT) + @param sessions - maximum number of sessions per thread + @param session_memory - overwrite hash allocation parameter + @param user_sessions - maximum number of sessions per user + (NAT44_IS_ENDPOINT_INDEPENDENT) + @param enable - true if enable, false if disable + @param flags - flag NAT44_IS_ENDPOINT_INDEPENDENT, + NAT44_IS_ENDPOINT_DEPENDENT, + NAT44_IS_STATIC_MAPPING_ONLY, + NAT44_IS_CONNECTION_TRACKING, + NAT44_IS_OUT2IN_DPO +*/ +autoreply define nat44_plugin_enable_disable { + u32 client_index; + u32 context; + u32 inside_vrf; + u32 outside_vrf; + u32 users; + u32 user_memory; + u32 sessions; + u32 session_memory; + u32 user_sessions; + bool enable; + vl_api_nat44_config_flags_t flags; +}; + +/** \brief Control ping from client to api server request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat_control_ping +{ + option deprecated; + u32 client_index; + u32 context; +}; + +/** \brief Control ping from the client to the server response + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param vpe_pid - the pid of the vpe, returned by the server +*/ +define nat_control_ping_reply +{ + option deprecated; + u32 context; + i32 retval; + u32 client_index; + u32 vpe_pid; +}; + +/** \brief Show NAT plugin startup config + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat_show_config +{ + option deprecated; + u32 client_index; + u32 context; +}; + +/** \brief DEPRECATED: Show NAT plugin startup config reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param static_mapping_only - if true dynamic translations disabled + @param static_mapping_connection_tracking - if true create session data + @param deterministic - if true deterministic mapping + @param endpoint_dependent - if true endpoint-dependent mode + @param out2in_dpo - if true out2in dpo mode + @param dslite_ce - if true DS-Lite is CE/B4 element, if false AFTR elemet + @param translation_buckets - number of translation hash buckets + @param translation_memory_size - translation hash memory size + @param user_buckets - number of user hash buckets + @param user_memory_size - user hash memory size + @param max_translations_per_user - maximum number of translations per user + @param outside_vrf_id - outside VRF id + @param inside_vrf_id - default inside VRF id + @param nat64_bib_buckets - number of NAT64 BIB hash buckets + @param nat64_bib_memory_size - memory size of NAT64 BIB hash + @param nat64_st_buckets - number of NAT64 session table hash buckets + @param nat64_st_memory_size - memory size of NAT64 session table hash +*/ +define nat_show_config_reply +{ + option deprecated; + u32 context; + i32 retval; + bool static_mapping_only; + bool static_mapping_connection_tracking; + bool deterministic; + bool endpoint_dependent; + bool out2in_dpo; + bool dslite_ce; + u32 translation_buckets; + u32 translation_memory_size; + u32 user_buckets; + u64 user_memory_size; + u32 max_translations_per_user; + u32 outside_vrf_id; + u32 inside_vrf_id; + u32 nat64_bib_buckets; + u64 nat64_bib_memory_size; + u32 nat64_st_buckets; + u64 nat64_st_memory_size; +}; + +/** \brief Show NAT plugin startup config + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat_show_config_2 +{ + option deprecated; + u32 client_index; + u32 context; +}; + +/** \brief Show NAT plugin startup config reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param static_mapping_only - if true dynamic translations disabled + @param static_mapping_connection_tracking - if true create session data + @param deterministic - if true deterministic mapping + @param endpoint_dependent - if true endpoint-dependent mode + @param out2in_dpo - if true out2in dpo mode + @param dslite_ce - if true DS-Lite is CE/B4 element, if false AFTR elemet + @param translation_buckets - number of translation hash buckets + @param translation_memory_size - translation hash memory size + @param user_buckets - number of user hash buckets + @param user_memory_size - user hash memory size + @param max_translations_per_user - maximum number of translations per user + @param outside_vrf_id - outside VRF id + @param inside_vrf_id - default inside VRF id + @param nat64_bib_buckets - number of NAT64 BIB hash buckets + @param nat64_bib_memory_size - memory size of NAT64 BIB hash + @param nat64_st_buckets - number of NAT64 session table hash buckets + @param nat64_st_memory_size - memory size of NAT64 session table hash + @param max_translations_per_thread - max translations per worker thread + @param max_users_per_thread - max users per worker thread +*/ +define nat_show_config_2_reply +{ + option deprecated; + u32 context; + i32 retval; + bool static_mapping_only; + bool static_mapping_connection_tracking; + bool deterministic; + bool endpoint_dependent; + bool out2in_dpo; + bool dslite_ce; + u32 translation_buckets; + u64 translation_memory_size; + u32 user_buckets; + u64 user_memory_size; + u32 max_translations_per_user; + u32 outside_vrf_id; + u32 inside_vrf_id; + u32 nat64_bib_buckets; + u64 nat64_bib_memory_size; + u32 nat64_st_buckets; + u64 nat64_st_memory_size; + u32 max_translations_per_thread; + u32 max_users_per_thread; +}; + +/** \brief Show NAT44 plugin running config + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat44_show_running_config +{ + option in_progress; + u32 client_index; + u32 context; +}; + +/** \brief Show NAT44 plugin running config reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param inside_vrf - default inside VRF id + @param outside_vrf - outside VRF id + @param users - maximum number of users per worker thread + (NAT44_IS_ENDPOINT_INDEPENDENT) + @param sessions - maximum number of sessions per worker thread + @param user_sessions - maximum number of sessions per user + (NAT44_IS_ENDPOINT_INDEPENDENT) + @param user_buckets - number of user hash buckets + (NAT44_IS_ENDPOINT_INDEPENDENT) + @param translation_buckets - number of translation hash buckets + @param flags - flag NAT44_IS_ENDPOINT_INDEPENDENT, + NAT44_IS_ENDPOINT_DEPENDENT, + NAT44_IS_STATIC_MAPPING_ONLY, + NAT44_IS_CONNECTION_TRACKING, + NAT44_IS_OUT2IN_DPO +*/ +define nat44_show_running_config_reply +{ + option in_progress; + u32 context; + i32 retval; + u32 inside_vrf; + u32 outside_vrf; + u32 users; + u32 sessions; + u32 user_sessions; + u32 user_buckets; + u32 translation_buckets; + bool forwarding_enabled; + bool ipfix_logging_enabled; + vl_api_nat_timeouts_t timeouts; + vl_api_nat_log_level_t log_level; + vl_api_nat44_config_flags_t flags; +}; + +/** \brief Run nat44 garbage collection + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +autoreply define nat44_session_cleanup { + option deprecated; + u32 client_index; + u32 context; +}; + +/** \brief NAT44 set session limit + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param session_limit - session limit + @param vrf_id - vrf id +*/ +autoreply define nat44_set_session_limit { + u32 client_index; + u32 context; + u32 session_limit; + u32 vrf_id; +}; + +/** \brief Set NAT logging level + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param log_level - logging level +*/ +autoreply define nat_set_log_level { + option deprecated; + u32 client_index; + u32 context; + vl_api_nat_log_level_t log_level; +}; + +/** \brief Set NAT workers + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param worker_mask - NAT workers mask +*/ +autoreply define nat_set_workers { + u32 client_index; + u32 context; + u64 worker_mask; +}; + +/** \brief Dump NAT workers + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat_worker_dump { + u32 client_index; + u32 context; +}; + +/** \brief NAT workers details response + @param context - sender context, to match reply w/ request + @param worker_index - worker index + @param lcore_id - lcore ID + @param name - worker name +*/ +define nat_worker_details { + u32 context; + u32 worker_index; + u32 lcore_id; + string name[64]; +}; + +/** \brief Enable/disable NAT IPFIX logging + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param domain_id - observation domain ID + @param src_port - source port number + @param enable - true if enable, false if disable +*/ +autoreply define nat_ipfix_enable_disable { + option deprecated; + u32 client_index; + u32 context; + u32 domain_id; + u16 src_port; + bool enable; +}; + +/** \brief Set values of timeouts for NAT sessions (seconds) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param udp - UDP timeout (default 300sec) + @param tcp_established - TCP established timeout (default 7440sec) + @param tcp_transitory - TCP transitory timeout (default 240sec) + @param icmp - ICMP timeout (default 60sec) +*/ +autoreply define nat_set_timeouts { + option deprecated; + u32 client_index; + u32 context; + u32 udp; + u32 tcp_established; + u32 tcp_transitory; + u32 icmp; +}; + +/** \brief Get values of timeouts for NAT sessions (seconds) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat_get_timeouts { + option deprecated; + u32 client_index; + u32 context; +}; + +/** \brief Get values of timeouts for NAT sessions reply + @param context - sender context, to match reply w/ request + @param retval - return code + @param udp - UDP timeout + @param tcp_established - TCP established timeout + @param tcp_transitory - TCP transitory timeout + @param icmp - ICMP timeout +*/ +define nat_get_timeouts_reply { + option deprecated; + u32 context; + i32 retval; + u32 udp; + u32 tcp_established; + u32 tcp_transitory; + u32 icmp; +}; + +/** \brief Set address and port assignment algorithm + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param alg - address and port assignment algorithm: + 0 - default, 1 - MAP-E, 2 - port range + (see nat_addr_and_port_alloc_alg_t in nat.h) + @param psid_offset - number of offset bits (valid only for MAP-E alg) + @param psid_length - length of PSID (valid only for MAP-E alg) + @param psid - Port Set Identifier (PSID) value (valid only for MAP-E alg) + @param start_port - beginning of the port range + @param end_port - end of the port range +*/ +autoreply define nat_set_addr_and_port_alloc_alg { + u32 client_index; + u32 context; + u8 alg; + u8 psid_offset; + u8 psid_length; + u16 psid; + u16 start_port; + u16 end_port; +}; + +/** \brief Get address and port assignment algorithm + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat_get_addr_and_port_alloc_alg { + u32 client_index; + u32 context; +}; + +/** \brief Get address and port assignment algorithm reply + @param context - sender context, to match reply w/ request + @param retval - return code + @param alg - address and port assignment algorithm: + 0 - default, 1 - MAP-E, 2 - port range + (see nat_addr_and_port_alloc_alg_t in nat.h) + @param psid_offset - number of offset bits (valid only for MAP-E alg) + @param psid_length - length of PSID (valid only for MAP-E alg) + @param psid - Port Set Identifier (PSID) value (valid only for MAP-E alg) + @param start_port - beginning of the port range + @param end_port - end of the port range +*/ +define nat_get_addr_and_port_alloc_alg_reply { + u32 context; + i32 retval; + u8 alg; + u8 psid_offset; + u8 psid_length; + u16 psid; + u16 start_port; + u16 end_port; +}; + +/** \brief Set TCP MSS rewriting configuration + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mss_value - MSS value to be used for MSS rewriting + @param enable - if true enable MSS rewriting feature else disable +*/ +autoreply define nat_set_mss_clamping { + u32 client_index; + u32 context; + u16 mss_value; + bool enable; +}; + +/** \brief Get TCP MSS rewriting configuration + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat_get_mss_clamping { + u32 client_index; + u32 context; +}; + +/** \brief Get TCP MSS rewriting configuration reply + @param context - sender context, to match reply w/ request + @param retval - return code + @param mss_value - MSS value to be used for MSS rewriting + @param enable - if true enable MSS rewriting feature else disable +*/ +define nat_get_mss_clamping_reply { + u32 context; + i32 retval; + u16 mss_value; + bool enable; +}; + +/** \brief Set HA listener (local settings) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param ip_address - local IP4 address + @param port - local UDP port number + @param path_mtu - path MTU between local and failover +*/ +autoreply define nat_ha_set_listener { + u32 client_index; + u32 context; + vl_api_ip4_address_t ip_address; + u16 port; + u32 path_mtu; +}; + +/** \brief Set HA failover (remote settings) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param ip_address - failover IP4 address + @param port - failvoer UDP port number + @param session_refresh_interval - number of seconds after which to send + session counters refresh +*/ +autoreply define nat_ha_set_failover { + u32 client_index; + u32 context; + vl_api_ip4_address_t ip_address; + u16 port; + u32 session_refresh_interval; +}; + +/** \brief Get HA listener/local configuration + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat_ha_get_listener { + u32 client_index; + u32 context; +}; + +/** \brief Get HA listener/local configuration reply + @param context - sender context, to match reply w/ request + @param retval - return code + @param ip_address - local IP4 address + @param port - local UDP port number + @param path_mtu - Path MTU between local and failover +*/ +define nat_ha_get_listener_reply { + u32 context; + i32 retval; + vl_api_ip4_address_t ip_address; + u16 port; + u32 path_mtu; +}; + +/** \brief Get HA failover/remote settings + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat_ha_get_failover { + u32 client_index; + u32 context; +}; + +/** \brief Get HA failover/remote settings reply + @param context - sender context, to match reply w/ request + @param retval - return code + @param ip_address - failover IP4 address + @param port - failvoer UDP port number + @param session_refresh_interval - number of seconds after which to send + session counters refresh +*/ +define nat_ha_get_failover_reply { + u32 context; + i32 retval; + vl_api_ip4_address_t ip_address; + u16 port; + u32 session_refresh_interval; +}; + +/** \brief Flush the current HA data (for testing) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +autoreply define nat_ha_flush { + u32 client_index; + u32 context; +}; + +/** \brief Resync HA (resend existing sessions to new failover) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param want_resync_event - resync completed event sent to the sender via + nat_ha_resync_completed_event API message if + non-zero + @param pid - sender's pid +*/ +autoreply define nat_ha_resync +{ + u32 client_index; + u32 context; + u8 want_resync_event; + u32 pid; +}; + +/** \brief Tell client about a HA resync completion event + @param client_index - opaque cookie to identify the sender + @param pid - client pid registered to receive notification + @param missed_count - number of missed (not ACKed) messages +*/ +define nat_ha_resync_completed_event +{ + u32 client_index; + u32 pid; + u32 missed_count; +}; + +service { + rpc nat_ha_resync returns nat_ha_resync_reply events nat_ha_resync_completed_event; +}; + +/** \brief Del NAT44 user + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param ip_address - IPv4 address + @param fib_index - FIB index +*/ +autoreply define nat44_del_user { + u32 client_index; + u32 context; + vl_api_ip4_address_t ip_address; + u32 fib_index; +}; + +/** \brief Add/del NAT44 address range + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param first_ip_address - first IPv4 address + @param last_ip_address - last IPv4 address + @param vrf_id - VRF id of tenant, ~0 means independent of VRF + @param is_add - true if add, false if delete + @param flags - flag NAT_IS_TWICE_NAT if NAT address range for external hosts + +*/ +autoreply define nat44_add_del_address_range { + u32 client_index; + u32 context; + vl_api_ip4_address_t first_ip_address; + vl_api_ip4_address_t last_ip_address; + u32 vrf_id; + bool is_add; + vl_api_nat_config_flags_t flags; +}; + +/** \brief Dump NAT44 addresses + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat44_address_dump { + u32 client_index; + u32 context; +}; + +/** \brief NAT44 address details response + @param context - sender context, to match reply w/ request + @param ip_address - IPv4 address + @param flags - flag NAT_IS_TWICE_NAT if NAT address range for external hosts + @param vrf_id - VRF id of tenant, ~0 means independent of VRF +*/ +define nat44_address_details { + u32 context; + vl_api_ip4_address_t ip_address; + vl_api_nat_config_flags_t flags; + u32 vrf_id; +}; + +/** \brief Enable/disable NAT44 feature on the interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - true if add, false if delete + @param flags - flag NAT_IS_INSIDE if interface is inside else + interface is outside + @param sw_if_index - software index of the interface +*/ +autoreply define nat44_interface_add_del_feature { + u32 client_index; + u32 context; + bool is_add; + vl_api_nat_config_flags_t flags; + vl_api_interface_index_t sw_if_index; +}; + +/** \brief Dump interfaces with NAT44 feature + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat44_interface_dump { + u32 client_index; + u32 context; +}; + +/** \brief NAT44 interface details response + @param context - sender context, to match reply w/ request + @param sw_if_index - software index of the interface + @param flags - flag NAT_IS_INSIDE if interface is inside, + flag NAT_IS_OUTSIDE if interface is outside + and if both flags are set the interface is + both inside and outside +*/ +define nat44_interface_details { + u32 context; + vl_api_nat_config_flags_t flags; + vl_api_interface_index_t sw_if_index; +}; + +/** \brief Enable/disbale NAT44 as an interface output feature (postrouting + in2out translation) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - true if add, false if delete + @param flags - flag NAT_IS_INSIDE if interface is inside else + interface is outside + @param sw_if_index - software index of the interface +*/ +autoreply define nat44_interface_add_del_output_feature { + u32 client_index; + u32 context; + bool is_add; + vl_api_nat_config_flags_t flags; + vl_api_interface_index_t sw_if_index; +}; + +/** \brief Dump interfaces with NAT44 output feature + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat44_interface_output_feature_dump { + u32 client_index; + u32 context; +}; + +/** \brief NAT44 interface with output feature details response + @param context - sender context, to match reply w/ request + @param flags - flag NAT_IS_INSIDE if interface is inside else + interface is outside + @param sw_if_index - software index of the interface +*/ +define nat44_interface_output_feature_details { + u32 context; + vl_api_nat_config_flags_t flags; + vl_api_interface_index_t sw_if_index; +}; + +/** \brief Add/delete NAT44 static mapping + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - true if add, false if delete + @param flags - flag NAT_IS_ADDR_ONLY if address only mapping, + flag nat_is_twice_nat if nat address range for external hosts, + flag NAT_IS_SELF_TWICE_NAT if translate external host address + and port whenever external host address equals local + address of internal host, + flag NAT_IS_OUT2IN_ONLY if rule match only out2in direction + @param local_ip_address - local IPv4 address + @param external_ip_address - external IPv4 address + @param protocol - IP protocol, used only if addr_only=0 + @param local_port - local port number, used only if addr_only=0 + @param external_port - external port number, used only if addr_only=0 + @param external_sw_if_index - external interface (if set + external_ip_address is ignored, ~0 means not + used) + @param vfr_id - VRF ID + @param tag - opaque string tag +*/ +autoreply define nat44_add_del_static_mapping { + u32 client_index; + u32 context; + bool is_add; + vl_api_nat_config_flags_t flags; + vl_api_ip4_address_t local_ip_address; + vl_api_ip4_address_t external_ip_address; + u8 protocol; + u16 local_port; + u16 external_port; + vl_api_interface_index_t external_sw_if_index; + u32 vrf_id; + string tag[64]; +}; + +/** \brief Add/delete NAT44 static mapping + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - true if add, false if delete + @param match_pool - true if use specific pool_ip_address + @param flags - flag NAT_IS_ADDR_ONLY if address only mapping, + flag nat_is_twice_nat if nat address range for external hosts, + flag NAT_IS_SELF_TWICE_NAT if translate external host address + and port whenever external host address equals local + address of internal host, + flag NAT_IS_OUT2IN_ONLY if rule match only out2in direction + @param pool_ip_address - pool IPv4 address to match with pool + @param local_ip_address - local IPv4 address + @param external_ip_address - external IPv4 address + @param protocol - IP protocol, used only if addr_only=0 + @param local_port - local port number, used only if addr_only=0 + @param external_port - external port number, used only if addr_only=0 + @param external_sw_if_index - external interface (if set + external_ip_address is ignored, ~0 means not + used) + @param vfr_id - VRF ID + @param tag - opaque string tag +*/ +autoreply define nat44_add_del_static_mapping_v2 { + option in_progress; + u32 client_index; + u32 context; + bool is_add; + bool match_pool; + vl_api_nat_config_flags_t flags; + vl_api_ip4_address_t pool_ip_address; + vl_api_ip4_address_t local_ip_address; + vl_api_ip4_address_t external_ip_address; + u8 protocol; + u16 local_port; + u16 external_port; + vl_api_interface_index_t external_sw_if_index; + u32 vrf_id; + string tag[64]; +}; + +/** \brief Dump NAT44 static mappings + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat44_static_mapping_dump { + u32 client_index; + u32 context; +}; + +/** \brief NAT44 static mapping details response + @param context - sender context, to match reply w/ request + @param flags - flag NAT_ADDR_ONLY if address only mapping, + flag NAT_TWICE_NAT if NAT address range for external hosts, + flag NAT_SELF_TWICE_NAT if translate external host address + and port whenever external host address equals local + address of internal host, + flag NAT_OUT2IN_ONLY if rule match only out2in direction + @param local_ip_address - local IPv4 address + @param external_ip_address - external IPv4 address + @param protocol - IP protocol, valid only if no NAT_ADDR_ONLY flag + @param local_port - local port number, valid only if no NAT_ADDR_ONLY flag + @param external_port - external port number, valid only if no NAT_ADDR_ONLY flag + @param external_sw_if_index - external interface + @param vfr_id - VRF ID + @param tag - opaque string tag +*/ +define nat44_static_mapping_details { + u32 context; + vl_api_nat_config_flags_t flags; + vl_api_ip4_address_t local_ip_address; + vl_api_ip4_address_t external_ip_address; + u8 protocol; + u16 local_port; + u16 external_port; + vl_api_interface_index_t external_sw_if_index; + u32 vrf_id; + string tag[64]; +}; + +/** \brief Add/delete NAT44 identity mapping + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - true if add, false if delete + @param flags - flag NAT_ADDR_ONLY if address only mapping + @param ip_address - IPv4 address + @param protocol - IP protocol + @param port - port number + @param sw_if_index - interface (if set ip_address is ignored, ~0 means not + used) + @param vfr_id - VRF ID (if ~0 use default VRF) + @param tag - opaque string tag +*/ +autoreply define nat44_add_del_identity_mapping { + u32 client_index; + u32 context; + bool is_add; + vl_api_nat_config_flags_t flags; + vl_api_ip4_address_t ip_address; + u8 protocol; + u16 port; + vl_api_interface_index_t sw_if_index; + u32 vrf_id; + string tag[64]; +}; + +/** \brief Dump NAT44 identity mappings + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat44_identity_mapping_dump { + u32 client_index; + u32 context; +}; + +/** \brief NAT44 identity mapping details response + @param context - sender context, to match reply w/ request + @param flags - flag NAT_ADDR_ONLY if address only mapping + @param ip_address - IPv4 address + @param protocol - IP protocol + @param port - port number + @param sw_if_index - interface + @param vfr_id - VRF ID + @param tag - opaque string tag +*/ +define nat44_identity_mapping_details { + u32 context; + vl_api_nat_config_flags_t flags; + vl_api_ip4_address_t ip_address; + u8 protocol; + u16 port; + vl_api_interface_index_t sw_if_index; + u32 vrf_id; + string tag[64]; +}; + +/** \brief Add/delete NAT44 pool address from specific interfce + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - true if add, false if delete + @param sw_if_index - software index of the interface + @param flags - flag NAT_TWICE_NAT if NAT address range for external hosts +*/ +autoreply define nat44_add_del_interface_addr { + u32 client_index; + u32 context; + bool is_add; + vl_api_interface_index_t sw_if_index; + vl_api_nat_config_flags_t flags; +}; + +/** \brief Dump NAT44 pool addresses interfaces + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat44_interface_addr_dump { + u32 client_index; + u32 context; +}; + +/** \brief NAT44 pool addresses interfaces details response + @param context - sender context, to match reply w/ request + @param sw_if_index - software index of the interface + @param flags - flag NAT_TWICE_NAT if NAT address range for external hosts + +*/ +define nat44_interface_addr_details { + u32 context; + vl_api_interface_index_t sw_if_index; + vl_api_nat_config_flags_t flags; +}; + +/** \brief Dump NAT44 users + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat44_user_dump { + u32 client_index; + u32 context; +}; + +/** \brief NAT44 users response + @param context - sender context, to match reply w/ request + @vrf_id - VRF ID + @param ip_address - IPv4 address + @param nsessions - number of dynamic sessions + @param nstaticsessions - number of static sessions +*/ +define nat44_user_details { + u32 context; + u32 vrf_id; + vl_api_ip4_address_t ip_address; + u32 nsessions; + u32 nstaticsessions; +}; + +/** \brief NAT44 user's sessions + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param ip_address - IPv4 address of the user to dump + @param vrf_id - VRF_ID +*/ +define nat44_user_session_dump { + u32 client_index; + u32 context; + vl_api_ip4_address_t ip_address; + u32 vrf_id; +}; + +/** \brief NAT44 user's sessions response + @param context - sender context, to match reply w/ request + @param outside_ip_address - outside IPv4 address + @param outside_port - outside port + @param inside_ip_address - inside IPv4 address + @param inside_port - inside port + @param protocol - protocol + @param flags - flag NAT_IS_STATIC if session is static, + flag NAT_IS_TWICE_NAT if session is twice-nat, + flag NAT_IS_EXT_HOST_VALID if external host address + and port are valid + @param last_heard - last heard timer + @param total_bytes - count of bytes sent through session + @param total_pkts - count of pakets sent through session + @param ext_host_address - external host IPv4 address + @param ext_host_port - external host port + @param ext_host_nat_address - post-NAT external host IPv4 address (valid + only if twice-nat session) + @param ext_host_nat_port - post-NAT external host port (valid only if + twice-nat session) +*/ +define nat44_user_session_details { + u32 context; + vl_api_ip4_address_t outside_ip_address; + u16 outside_port; + vl_api_ip4_address_t inside_ip_address; + u16 inside_port; + u16 protocol; + vl_api_nat_config_flags_t flags; + u64 last_heard; + u64 total_bytes; + u32 total_pkts; + vl_api_ip4_address_t ext_host_address; + u16 ext_host_port; + vl_api_ip4_address_t ext_host_nat_address; + u16 ext_host_nat_port; +}; + +/** \brief NAT44 load-balancing address and port pair + @param addr - IPv4 address of the internal node + @param port - L4 port number of the internal node + @param probability - probability of the internal node to be randomly matched + @param vrf_id - VRF id +*/ +typedef nat44_lb_addr_port { + vl_api_ip4_address_t addr; + u16 port; + u8 probability; + u32 vrf_id; +}; + +/** \brief Add/delete NAT44 load-balancing static mapping rule + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - true if add, false if delete + @param flags - flag NAT_TWICE_NAT if NAT address range for external hosts, + flag NAT_SELF_TWICE_NAT if translate external host address + and port whenever external host address equals local + address of internal host, + flag NAT_OUT2IN_ONLY if rule match only out2in direction + @param external_addr - external IPv4 address of the service + @param external_port - external L4 port number of the service + @param protocol - IP protocol number of the service + @param affinity - if 0 disabled, otherwise client IP affinity sticky time + in seconds + @param local_num - number of local network nodes + @param locals - local network nodes + @param tag - opaque string tag +*/ +autoreply define nat44_add_del_lb_static_mapping { + u32 client_index; + u32 context; + bool is_add; + vl_api_nat_config_flags_t flags; + vl_api_ip4_address_t external_addr; + u16 external_port; + u8 protocol; + u32 affinity; + string tag[64]; + u32 local_num; + vl_api_nat44_lb_addr_port_t locals[local_num]; +}; + +/** \brief Add/delete NAT44 load-balancing static mapping rule backend + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - true if add, false if delete + @param external_addr - external IPv4 address of the service + @param external_port - external L4 port number of the service + @param protocol - IP protocol number of the service + @param local - local network node +*/ +autoreply define nat44_lb_static_mapping_add_del_local { + u32 client_index; + u32 context; + bool is_add; + vl_api_ip4_address_t external_addr; + u16 external_port; + u8 protocol; + vl_api_nat44_lb_addr_port_t local; +}; + +/** \brief Dump NAT44 load-balancing static mapping rules + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat44_lb_static_mapping_dump { + u32 client_index; + u32 context; +}; + +/** \brief NAT44 load-balancing static mapping rule details response + @param context - sender context, to match reply w/ request + @param external_addr - external IPv4 address of the service + @param external_port - external L4 port number of the service + @param protocol - IP protocol number of the service + @param flags - flag NAT_TWICE_NAT if NAT address range for external hosts, + flag NAT_SELF_TWICE_NAT if translate external host address + and port whenever external host address equals local + address of internal host, + flag NAT_OUT2IN_ONLY if rule match only out2in direction + @param affinity - if 0 disabled, otherwise client IP affinity sticky time + in seconds + @param local_num - number of local network nodes + @param locals - local network nodes + @param tag - opaque string tag +*/ +define nat44_lb_static_mapping_details { + u32 context; + vl_api_ip4_address_t external_addr; + u16 external_port; + u8 protocol; + vl_api_nat_config_flags_t flags; + u32 affinity; + string tag[64]; + u32 local_num; + vl_api_nat44_lb_addr_port_t locals[local_num]; +}; + +/** \brief Delete NAT44 session + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param ip_address - IPv4 address + @param protocol - IP protocol + @param port - port number + @param vfr_id - VRF ID + @param flags - flag NAT_IS_INSIDE if interface is inside or + interface is outside, + flag NAT_IS_EXT_HOST_VALID if external host address and + port are valid + @param ext_host_address - external host IPv4 address + @param ext_host_port - external host port +*/ +autoreply define nat44_del_session { + u32 client_index; + u32 context; + vl_api_ip4_address_t address; + u8 protocol; + u16 port; + u32 vrf_id; + vl_api_nat_config_flags_t flags; + vl_api_ip4_address_t ext_host_address; + u16 ext_host_port; +}; + +/** \brief Enable/disable forwarding for NAT44 + Forward packets which don't match existing translation + or static mapping instead of dropping them. + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable - true for enable, false for disable +*/ +autoreply define nat44_forwarding_enable_disable { + option deprecated; + u32 client_index; + u32 context; + bool enable; +}; + +/** \brief Check if forwarding is enabled or disabled + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat44_forwarding_is_enabled { + option deprecated; + u32 client_index; + u32 context; +}; + +/** \brief Response to check if forwarding is enabled or disabled + @param context - sender context, to match reply w/ request + @param enabled - true if enabled, false if disabled +*/ +define nat44_forwarding_is_enabled_reply { + option deprecated; + u32 context; + bool enabled; +}; + +/** \brief Set NAT handoff frame queue options + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param frame_queue_nelts - number of worker handoff frame queue elements +*/ +autoreply define nat44_ed_set_fq_options { + option in_progress; + u32 client_index; + u32 context; + u32 frame_queue_nelts; +}; + +/** \brief Show NAT handoff frame queue options + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat44_ed_show_fq_options +{ + option in_progress; + u32 client_index; + u32 context; +}; + +/** \brief Show NAT handoff frame queue options reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param frame_queue_nelts - number of worker handoff frame queue elements +*/ +define nat44_ed_show_fq_options_reply +{ + option in_progress; + u32 context; + i32 retval; + u32 frame_queue_nelts; +}; diff --git a/src/plugins/nat/nat44-ed/nat44_ed.c b/src/plugins/nat/nat44-ed/nat44_ed.c new file mode 100644 index 00000000000..d9d35fc55bc --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed.c @@ -0,0 +1,3610 @@ +/* + * snat.c - simple nat plugin + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +snat_main_t snat_main; + +static_always_inline void nat_validate_interface_counters (snat_main_t *sm, + u32 sw_if_index); + +#define skip_if_disabled() \ + do \ + { \ + snat_main_t *sm = &snat_main; \ + if (PREDICT_FALSE (!sm->enabled)) \ + return; \ + } \ + while (0) + +#define fail_if_enabled() \ + do \ + { \ + snat_main_t *sm = &snat_main; \ + if (PREDICT_FALSE (sm->enabled)) \ + { \ + nat_log_err ("plugin enabled"); \ + return 1; \ + } \ + } \ + while (0) + +#define fail_if_disabled() \ + do \ + { \ + snat_main_t *sm = &snat_main; \ + if (PREDICT_FALSE (!sm->enabled)) \ + { \ + nat_log_err ("plugin disabled"); \ + return 1; \ + } \ + } \ + while (0) + +/* *INDENT-OFF* */ +/* Hook up input features */ +VNET_FEATURE_INIT (nat_pre_in2out, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat-pre-in2out", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa", + "ip4-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (nat_pre_out2in, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat-pre-out2in", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa", + "ip4-dhcp-client-detect", + "ip4-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-in2out-worker-handoff", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), +}; +VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-out2in-worker-handoff", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa", + "ip4-dhcp-client-detect"), +}; +VNET_FEATURE_INIT (ip4_snat_in2out, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-in2out", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (ip4_snat_out2in, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-out2in", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature", + "ip4-dhcp-client-detect"), +}; +VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-ed-in2out", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-ed-out2in", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature", + "ip4-dhcp-client-detect"), +}; +VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-ed-classify", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-handoff-classify", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-in2out-fast", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-out2in-fast", + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature", + "ip4-dhcp-client-detect"), +}; + +/* Hook up output features */ +VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = { + .arc_name = "ip4-output", + .node_name = "nat44-in2out-output", + .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"), +}; +VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = { + .arc_name = "ip4-output", + .node_name = "nat44-in2out-output-worker-handoff", + .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa","ip4-sv-reassembly-output-feature"), +}; +VNET_FEATURE_INIT (nat_pre_in2out_output, static) = { + .arc_name = "ip4-output", + .node_name = "nat-pre-in2out-output", + .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"), + .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"), +}; +VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = { + .arc_name = "ip4-output", + .node_name = "nat44-ed-in2out-output", + .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"), + .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"), +}; + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Network Address Translation (NAT)", +}; +/* *INDENT-ON* */ + +static void nat44_ed_db_init (u32 translations, u32 translation_buckets); + +static void nat44_ed_db_free (); + +static u32 +nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip, + u32 rx_fib_index, u8 is_output); + +static u32 +nat44_ed_get_worker_in2out_cb (ip4_header_t * ip, u32 rx_fib_index, + u8 is_output); + +u32 nat_calc_bihash_buckets (u32 n_elts); + +u8 * +format_session_kvp (u8 * s, va_list * args) +{ + clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *); + + s = format (s, "%U thread-index %llu session-index %llu", format_snat_key, + v->key, nat_value_get_thread_index (v), + nat_value_get_session_index (v)); + + return s; +} + +u8 * +format_static_mapping_kvp (u8 * s, va_list * args) +{ + clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *); + + s = format (s, "%U static-mapping-index %llu", + format_snat_key, v->key, v->value); + + return s; +} + +u8 * +format_ed_session_kvp (u8 * s, va_list * args) +{ + clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *); + + u8 proto; + u16 r_port, l_port; + ip4_address_t l_addr, r_addr; + u32 fib_index; + + split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port); + s = format (s, + "local %U:%d remote %U:%d proto %U fib %d thread-index %u " + "session-index %u", + format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port), + format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port), + format_ip_protocol, proto, fib_index, + ed_value_get_thread_index (v), ed_value_get_session_index (v)); + + return s; +} + +void +nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index, + u8 is_ha) +{ + per_vrf_sessions_unregister_session (s, thread_index); + + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0)) + nat_elog_warn (sm, "flow hash del failed"); + + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0)) + nat_elog_warn (sm, "flow hash del failed"); + + if (is_fwd_bypass_session (s)) + { + return; + } + + if (is_affinity_sessions (s)) + nat_affinity_unlock (s->ext_host_addr, s->out2in.addr, + s->nat_proto, s->out2in.port); + + if (!is_ha) + nat_syslog_nat44_sdel ( + 0, s->in2out.fib_index, &s->in2out.addr, s->in2out.port, + &s->ext_host_nat_addr, s->ext_host_nat_port, &s->out2in.addr, + s->out2in.port, &s->ext_host_addr, s->ext_host_port, s->nat_proto, + is_twice_nat_session (s)); + + if (snat_is_unk_proto_session (s)) + return; + + if (!is_ha) + { + /* log NAT event */ + nat_ipfix_logging_nat44_ses_delete (thread_index, + s->in2out.addr.as_u32, + s->out2in.addr.as_u32, + s->nat_proto, + s->in2out.port, + s->out2in.port, + s->in2out.fib_index); + } + + /* Twice NAT address and port for external host */ + if (is_twice_nat_session (s)) + { + snat_free_outside_address_and_port (sm->twice_nat_addresses, + thread_index, + &s->ext_host_nat_addr, + s->ext_host_nat_port, s->nat_proto); + } + + if (snat_is_session_static (s)) + return; + + snat_free_outside_address_and_port (sm->addresses, thread_index, + &s->out2in.addr, s->out2in.port, + s->nat_proto); +} + +void +snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index, + int is_add) +{ + snat_main_t *sm = &snat_main; + fib_prefix_t prefix = { + .fp_len = p_len, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = addr->as_u32, + }, + }; + u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + + if (is_add) + fib_table_entry_update_one_path (fib_index, + &prefix, + sm->fib_src_low, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL | + FIB_ENTRY_FLAG_EXCLUSIVE), + DPO_PROTO_IP4, + NULL, + sw_if_index, + ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); + else + fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low); +} + +int +snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id, + u8 twice_nat) +{ + snat_address_t *ap; + snat_interface_t *i; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + /* Check if address already exists */ + /* *INDENT-OFF* */ + vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses) + { + if (ap->addr.as_u32 == addr->as_u32) + { + nat_log_err ("address exist"); + return VNET_API_ERROR_VALUE_EXIST; + } + } + /* *INDENT-ON* */ + + if (twice_nat) + vec_add2 (sm->twice_nat_addresses, ap, 1); + else + vec_add2 (sm->addresses, ap, 1); + + ap->addr = *addr; + if (vrf_id != ~0) + ap->fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + sm->fib_src_low); + else + ap->fib_index = ~0; + + /* *INDENT-OFF* */ + #define _(N, i, n, s) \ + clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\ + ap->busy_##n##_ports = 0; \ + ap->busy_##n##_ports_per_thread = 0;\ + vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0); + foreach_nat_protocol + #undef _ + /* *INDENT-ON* */ + + if (twice_nat) + return 0; + + /* Add external address to FIB */ + /* *INDENT-OFF* */ + pool_foreach (i, sm->interfaces) + { + if (nat_interface_is_inside (i)) + continue; + + snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1); + break; + } + pool_foreach (i, sm->output_feature_interfaces) + { + if (nat_interface_is_inside (i)) + continue; + + snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1); + break; + } + /* *INDENT-ON* */ + + return 0; +} + +static int +is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr) +{ + snat_static_mapping_t *m; + /* *INDENT-OFF* */ + pool_foreach (m, sm->static_mappings) + { + if (is_addr_only_static_mapping (m) || + is_out2in_only_static_mapping (m) || + is_identity_static_mapping (m)) + continue; + if (m->external_addr.as_u32 == addr.as_u32) + return 1; + } + /* *INDENT-ON* */ + + return 0; +} + +static void +snat_add_static_mapping_when_resolved (snat_main_t *sm, ip4_address_t l_addr, + u16 l_port, u32 sw_if_index, u16 e_port, + u32 vrf_id, nat_protocol_t proto, + int addr_only, u8 *tag, int twice_nat, + int out2in_only, int identity_nat, + ip4_address_t pool_addr, int exact) +{ + snat_static_map_resolve_t *rp; + + vec_add2 (sm->to_resolve, rp, 1); + rp->l_addr.as_u32 = l_addr.as_u32; + rp->l_port = l_port; + rp->sw_if_index = sw_if_index; + rp->e_port = e_port; + rp->vrf_id = vrf_id; + rp->proto = proto; + rp->addr_only = addr_only; + rp->twice_nat = twice_nat; + rp->out2in_only = out2in_only; + rp->identity_nat = identity_nat; + rp->tag = vec_dup (tag); + rp->pool_addr = pool_addr; + rp->exact = exact; +} + +u32 +get_thread_idx_by_port (u16 e_port) +{ + snat_main_t *sm = &snat_main; + u32 thread_idx = sm->num_workers; + if (sm->num_workers > 1) + { + thread_idx = + sm->first_worker_index + + sm->workers[(e_port - 1024) / sm->port_per_thread]; + } + return thread_idx; +} + +void +nat_ed_static_mapping_del_sessions (snat_main_t * sm, + snat_main_per_thread_data_t * tsm, + ip4_address_t l_addr, + u16 l_port, + u8 protocol, + u32 fib_index, int addr_only, + ip4_address_t e_addr, u16 e_port) +{ + snat_session_t *s; + u32 *indexes_to_free = NULL; + /* *INDENT-OFF* */ + pool_foreach (s, tsm->sessions) { + if (s->in2out.fib_index != fib_index || + s->in2out.addr.as_u32 != l_addr.as_u32) + { + continue; + } + if (!addr_only) + { + if ((s->out2in.addr.as_u32 != e_addr.as_u32) || + s->out2in.port != e_port || + s->in2out.port != l_port || + s->nat_proto != protocol) + continue; + } + + if (is_lb_session (s)) + continue; + if (!snat_is_session_static (s)) + continue; + nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0); + vec_add1 (indexes_to_free, s - tsm->sessions); + if (!addr_only) + break; + } + /* *INDENT-ON* */ + u32 *ses_index; + vec_foreach (ses_index, indexes_to_free) + { + s = pool_elt_at_index (tsm->sessions, *ses_index); + nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1); + } + vec_free (indexes_to_free); +} + +int +snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr, + u16 l_port, u16 e_port, u32 vrf_id, int addr_only, + u32 sw_if_index, nat_protocol_t proto, int is_add, + twice_nat_type_t twice_nat, u8 out2in_only, u8 *tag, + u8 identity_nat, ip4_address_t pool_addr, int exact) +{ + snat_main_t *sm = &snat_main; + snat_static_mapping_t *m; + clib_bihash_kv_8_8_t kv, value; + snat_address_t *a = 0; + u32 fib_index = ~0; + snat_interface_t *interface; + snat_main_per_thread_data_t *tsm; + snat_static_map_resolve_t *rp, *rp_match = 0; + nat44_lb_addr_port_t *local; + u32 find = ~0; + int i; + + /* If the external address is a specific interface address */ + if (sw_if_index != ~0) + { + ip4_address_t *first_int_addr; + + for (i = 0; i < vec_len (sm->to_resolve); i++) + { + rp = sm->to_resolve + i; + if (rp->sw_if_index != sw_if_index || + rp->l_addr.as_u32 != l_addr.as_u32 || + rp->vrf_id != vrf_id || rp->addr_only != addr_only) + continue; + + if (!addr_only) + { + if ((rp->l_port != l_port && rp->e_port != e_port) + || rp->proto != proto) + continue; + } + + rp_match = rp; + break; + } + + /* Might be already set... */ + first_int_addr = ip4_interface_first_address + (sm->ip4_main, sw_if_index, 0 /* just want the address */ ); + + if (is_add) + { + if (rp_match) + return VNET_API_ERROR_VALUE_EXIST; + + snat_add_static_mapping_when_resolved ( + sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto, addr_only, + tag, twice_nat, out2in_only, identity_nat, pool_addr, exact); + + /* DHCP resolution required? */ + if (first_int_addr == 0) + { + return 0; + } + else + { + e_addr.as_u32 = first_int_addr->as_u32; + /* Identity mapping? */ + if (l_addr.as_u32 == 0) + l_addr.as_u32 = e_addr.as_u32; + } + } + else + { + if (!rp_match) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + vec_del1 (sm->to_resolve, i); + + if (first_int_addr) + { + e_addr.as_u32 = first_int_addr->as_u32; + /* Identity mapping? */ + if (l_addr.as_u32 == 0) + l_addr.as_u32 = e_addr.as_u32; + } + else + return 0; + } + } + + init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + m = 0; + else + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (is_add) + { + if (m) + { + if (is_identity_static_mapping (m)) + { + /* *INDENT-OFF* */ + pool_foreach (local, m->locals) + { + if (local->vrf_id == vrf_id) + return VNET_API_ERROR_VALUE_EXIST; + } + /* *INDENT-ON* */ + pool_get (m->locals, local); + local->vrf_id = vrf_id; + local->fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + sm->fib_src_low); + init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index, + m->proto, 0, m - sm->static_mappings); + clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1); + return 0; + } + else + return VNET_API_ERROR_VALUE_EXIST; + } + + if (twice_nat && addr_only) + return VNET_API_ERROR_UNSUPPORTED; + + /* Convert VRF id to FIB index */ + if (vrf_id != ~0) + fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + sm->fib_src_low); + /* If not specified use inside VRF id from SNAT plugin startup config */ + else + { + fib_index = sm->inside_fib_index; + vrf_id = sm->inside_vrf_id; + fib_table_lock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low); + } + + if (!(out2in_only || identity_nat)) + { + init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index, + addr_only ? 0 : proto); + if (!clib_bihash_search_8_8 + (&sm->static_mapping_by_local, &kv, &value)) + return VNET_API_ERROR_VALUE_EXIST; + } + + /* Find external address in allocated addresses and reserve port for + address and port pair mapping when dynamic translations enabled */ + if (!(addr_only || sm->static_mapping_only || out2in_only)) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + /* External port must be unused */ + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + if (a->busy_##n##_port_refcounts[e_port]) \ + return VNET_API_ERROR_INVALID_VALUE; \ + ++a->busy_##n##_port_refcounts[e_port]; \ + if (e_port > 1024) \ + { \ + a->busy_##n##_ports++; \ + a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \ + } \ + break; + foreach_nat_protocol +#undef _ + default : nat_elog_info (sm, "unknown protocol"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + break; + } + } + /* External address must be allocated */ + if (!a && (l_addr.as_u32 != e_addr.as_u32)) + { + if (sw_if_index != ~0) + { + for (i = 0; i < vec_len (sm->to_resolve); i++) + { + rp = sm->to_resolve + i; + if (rp->addr_only) + continue; + if (rp->sw_if_index != sw_if_index && + rp->l_addr.as_u32 != l_addr.as_u32 && + rp->vrf_id != vrf_id && rp->l_port != l_port && + rp->e_port != e_port && rp->proto != proto) + continue; + + vec_del1 (sm->to_resolve, i); + break; + } + } + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + } + + pool_get (sm->static_mappings, m); + clib_memset (m, 0, sizeof (*m)); + m->tag = vec_dup (tag); + m->local_addr = l_addr; + m->external_addr = e_addr; + m->twice_nat = twice_nat; + + if (twice_nat == TWICE_NAT && exact) + { + m->flags |= NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS; + m->pool_addr = pool_addr; + } + + if (out2in_only) + m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY; + if (addr_only) + m->flags |= NAT_STATIC_MAPPING_FLAG_ADDR_ONLY; + if (identity_nat) + { + m->flags |= NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT; + pool_get (m->locals, local); + local->vrf_id = vrf_id; + local->fib_index = fib_index; + } + else + { + m->vrf_id = vrf_id; + m->fib_index = fib_index; + } + if (!addr_only) + { + m->local_port = l_port; + m->external_port = e_port; + m->proto = proto; + } + + if (sm->num_workers > 1) + { + ip4_header_t ip = { + .src_address = m->local_addr, + }; + vec_add1 (m->workers, sm->worker_in2out_cb (&ip, m->fib_index, 0)); + tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]); + } + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + + if (!out2in_only) + { + init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto, + 0, m - sm->static_mappings); + clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1); + } + + init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0, + m - sm->static_mappings); + clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1); + } + else + { + if (!m) + { + if (sw_if_index != ~0) + return 0; + else + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + if (identity_nat) + { + if (vrf_id == ~0) + vrf_id = sm->inside_vrf_id; + + /* *INDENT-OFF* */ + pool_foreach (local, m->locals) + { + if (local->vrf_id == vrf_id) + find = local - m->locals; + } + /* *INDENT-ON* */ + if (find == ~0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + local = pool_elt_at_index (m->locals, find); + fib_index = local->fib_index; + pool_put (m->locals, local); + } + else + fib_index = m->fib_index; + + /* Free external address port */ + if (!(addr_only || sm->static_mapping_only || out2in_only)) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + --a->busy_##n##_port_refcounts[e_port]; \ + if (e_port > 1024) \ + { \ + a->busy_##n##_ports--; \ + a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \ + } \ + break; + foreach_nat_protocol +#undef _ + default : nat_elog_info (sm, "unknown protocol"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + break; + } + } + } + + if (sm->num_workers > 1) + tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]); + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + + init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto); + if (!out2in_only) + clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0); + + /* Delete session(s) for static mapping if exist */ + if (!(sm->static_mapping_only) || + (sm->static_mapping_only && sm->static_mapping_connection_tracking)) + { + nat_ed_static_mapping_del_sessions ( + sm, tsm, m->local_addr, m->local_port, m->proto, fib_index, + addr_only, e_addr, e_port); + } + + fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low); + if (pool_elts (m->locals)) + return 0; + + init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto); + clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0); + + vec_free (m->tag); + vec_free (m->workers); + /* Delete static mapping from pool */ + pool_put (sm->static_mappings, m); + } + + if (!addr_only || (l_addr.as_u32 == e_addr.as_u32)) + return 0; + + /* Add/delete external address to FIB */ + /* *INDENT-OFF* */ + pool_foreach (interface, sm->interfaces) + { + if (nat_interface_is_inside (interface)) + continue; + + snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add); + break; + } + pool_foreach (interface, sm->output_feature_interfaces) + { + if (nat_interface_is_inside (interface)) + continue; + + snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, is_add); + break; + } + /* *INDENT-ON* */ + + return 0; +} + +int +nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, + nat_protocol_t proto, + nat44_lb_addr_port_t * locals, u8 is_add, + twice_nat_type_t twice_nat, u8 out2in_only, + u8 * tag, u32 affinity) +{ + snat_main_t *sm = &snat_main; + snat_static_mapping_t *m; + clib_bihash_kv_8_8_t kv, value; + snat_address_t *a = 0; + int i; + nat44_lb_addr_port_t *local; + snat_main_per_thread_data_t *tsm; + snat_session_t *s; + uword *bitmap = 0; + + init_nat_k (&kv, e_addr, e_port, 0, proto); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + m = 0; + else + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (is_add) + { + if (m) + return VNET_API_ERROR_VALUE_EXIST; + + if (vec_len (locals) < 2) + return VNET_API_ERROR_INVALID_VALUE; + + /* Find external address in allocated addresses and reserve port for + address and port pair mapping when dynamic translations enabled */ + if (!(sm->static_mapping_only || out2in_only)) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + /* External port must be unused */ + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + if (a->busy_##n##_port_refcounts[e_port]) \ + return VNET_API_ERROR_INVALID_VALUE; \ + ++a->busy_##n##_port_refcounts[e_port]; \ + if (e_port > 1024) \ + { \ + a->busy_##n##_ports++; \ + a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]++; \ + } \ + break; + foreach_nat_protocol +#undef _ + default : nat_elog_info (sm, "unknown protocol"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + break; + } + } + /* External address must be allocated */ + if (!a) + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + pool_get (sm->static_mappings, m); + clib_memset (m, 0, sizeof (*m)); + m->tag = vec_dup (tag); + m->external_addr = e_addr; + m->external_port = e_port; + m->proto = proto; + m->twice_nat = twice_nat; + m->flags |= NAT_STATIC_MAPPING_FLAG_LB; + if (out2in_only) + m->flags |= NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY; + m->affinity = affinity; + + if (affinity) + m->affinity_per_service_list_head_index = + nat_affinity_get_per_service_list_head_index (); + else + m->affinity_per_service_list_head_index = ~0; + + init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0, + m - sm->static_mappings); + if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1)) + { + nat_elog_err (sm, "static_mapping_by_external key add failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + + for (i = 0; i < vec_len (locals); i++) + { + locals[i].fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + locals[i].vrf_id, + sm->fib_src_low); + if (!out2in_only) + { + init_nat_kv (&kv, locals[i].addr, locals[i].port, + locals[i].fib_index, m->proto, 0, + m - sm->static_mappings); + clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1); + } + locals[i].prefix = (i == 0) ? locals[i].probability : + (locals[i - 1].prefix + locals[i].probability); + pool_get (m->locals, local); + *local = locals[i]; + if (sm->num_workers > 1) + { + ip4_header_t ip = { + .src_address = locals[i].addr, + }; + bitmap = + clib_bitmap_set (bitmap, + sm->worker_in2out_cb (&ip, m->fib_index, 0), + 1); + } + } + + /* Assign workers */ + if (sm->num_workers > 1) + { + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, bitmap) + { + vec_add1(m->workers, i); + } + /* *INDENT-ON* */ + } + } + else + { + if (!m) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + if (!is_lb_static_mapping (m)) + return VNET_API_ERROR_INVALID_VALUE; + + /* Free external address port */ + if (!(sm->static_mapping_only || out2in_only)) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + --a->busy_##n##_port_refcounts[e_port]; \ + if (e_port > 1024) \ + { \ + a->busy_##n##_ports--; \ + a->busy_##n##_ports_per_thread[get_thread_idx_by_port(e_port)]--; \ + } \ + break; + foreach_nat_protocol +#undef _ + default : nat_elog_info (sm, "unknown protocol"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + break; + } + } + } + + init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto); + if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0)) + { + nat_elog_err (sm, "static_mapping_by_external key del failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + + /* *INDENT-OFF* */ + pool_foreach (local, m->locals) + { + fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4, + sm->fib_src_low); + if (!out2in_only) + { +init_nat_k(& kv, local->addr, local->port, local->fib_index, m->proto); + if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0)) + { + nat_elog_err (sm, "static_mapping_by_local key del failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + } + + if (sm->num_workers > 1) + { + ip4_header_t ip = { + .src_address = local->addr, + }; + tsm = + vec_elt_at_index (sm->per_thread_data, + sm->worker_in2out_cb (&ip, m->fib_index, 0)); + } + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + + /* Delete sessions */ + pool_foreach (s, tsm->sessions) + { + if (!(is_lb_session (s))) + continue; + + if ((s->in2out.addr.as_u32 != local->addr.as_u32) || + s->in2out.port != local->port) + continue; + + nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0); + nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1); + } + } + /* *INDENT-ON* */ + if (m->affinity) + nat_affinity_flush_service (m->affinity_per_service_list_head_index); + pool_free (m->locals); + vec_free (m->tag); + vec_free (m->workers); + + pool_put (sm->static_mappings, m); + } + + return 0; +} + +int +nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port, + ip4_address_t l_addr, u16 l_port, + nat_protocol_t proto, u32 vrf_id, + u8 probability, u8 is_add) +{ + snat_main_t *sm = &snat_main; + snat_static_mapping_t *m = 0; + clib_bihash_kv_8_8_t kv, value; + nat44_lb_addr_port_t *local, *prev_local, *match_local = 0; + snat_main_per_thread_data_t *tsm; + snat_session_t *s; + u32 *locals = 0; + uword *bitmap = 0; + int i; + + init_nat_k (&kv, e_addr, e_port, 0, proto); + if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (!m) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + if (!is_lb_static_mapping (m)) + return VNET_API_ERROR_INVALID_VALUE; + + /* *INDENT-OFF* */ + pool_foreach (local, m->locals) + { + if ((local->addr.as_u32 == l_addr.as_u32) && (local->port == l_port) && + (local->vrf_id == vrf_id)) + { + match_local = local; + break; + } + } + /* *INDENT-ON* */ + + if (is_add) + { + if (match_local) + return VNET_API_ERROR_VALUE_EXIST; + + pool_get (m->locals, local); + clib_memset (local, 0, sizeof (*local)); + local->addr.as_u32 = l_addr.as_u32; + local->port = l_port; + local->probability = probability; + local->vrf_id = vrf_id; + local->fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + sm->fib_src_low); + + if (!is_out2in_only_static_mapping (m)) + { + init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto, 0, + m - sm->static_mappings); + if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1)) + nat_elog_err (sm, "static_mapping_by_local key add failed"); + } + } + else + { + if (!match_local) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + if (pool_elts (m->locals) < 3) + return VNET_API_ERROR_UNSPECIFIED; + + fib_table_unlock (match_local->fib_index, FIB_PROTOCOL_IP4, + sm->fib_src_low); + + if (!is_out2in_only_static_mapping (m)) + { + init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto); + if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0)) + nat_elog_err (sm, "static_mapping_by_local key del failed"); + } + + if (sm->num_workers > 1) + { + ip4_header_t ip = { + .src_address = local->addr, + }; + tsm = vec_elt_at_index (sm->per_thread_data, + sm->worker_in2out_cb (&ip, m->fib_index, + 0)); + } + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + + /* Delete sessions */ + /* *INDENT-OFF* */ + pool_foreach (s, tsm->sessions) { + if (!(is_lb_session (s))) + continue; + + if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) || + s->in2out.port != match_local->port) + continue; + + nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0); + nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1); + } + /* *INDENT-ON* */ + + pool_put (m->locals, match_local); + } + + vec_free (m->workers); + + /* *INDENT-OFF* */ + pool_foreach (local, m->locals) + { + vec_add1 (locals, local - m->locals); + if (sm->num_workers > 1) + { + ip4_header_t ip; + ip.src_address.as_u32 = local->addr.as_u32, + bitmap = clib_bitmap_set (bitmap, + sm->worker_in2out_cb (&ip, local->fib_index, 0), + 1); + } + } + /* *INDENT-ON* */ + + ASSERT (vec_len (locals) > 1); + + local = pool_elt_at_index (m->locals, locals[0]); + local->prefix = local->probability; + for (i = 1; i < vec_len (locals); i++) + { + local = pool_elt_at_index (m->locals, locals[i]); + prev_local = pool_elt_at_index (m->locals, locals[i - 1]); + local->prefix = local->probability + prev_local->prefix; + } + + /* Assign workers */ + if (sm->num_workers > 1) + { + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, bitmap) { vec_add1(m->workers, i); } + /* *INDENT-ON* */ + } + + return 0; +} + +int +snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm, + u8 twice_nat) +{ + snat_address_t *a = 0; + snat_session_t *ses; + u32 *ses_to_be_removed = 0, *ses_index; + snat_main_per_thread_data_t *tsm; + snat_static_mapping_t *m; + snat_interface_t *interface; + int i; + snat_address_t *addresses = + twice_nat ? sm->twice_nat_addresses : sm->addresses; + + /* Find SNAT address */ + for (i = 0; i < vec_len (addresses); i++) + { + if (addresses[i].addr.as_u32 == addr.as_u32) + { + a = addresses + i; + break; + } + } + if (!a) + { + nat_log_err ("no such address"); + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + if (delete_sm) + { + ip4_address_t pool_addr = { 0 }; + /* *INDENT-OFF* */ + pool_foreach (m, sm->static_mappings) + { + if (m->external_addr.as_u32 == addr.as_u32) + (void) snat_add_static_mapping (m->local_addr, m->external_addr, + m->local_port, m->external_port, + m->vrf_id, + is_addr_only_static_mapping(m), ~0, + m->proto, 0 /* is_add */, + m->twice_nat, + is_out2in_only_static_mapping(m), + m->tag, + is_identity_static_mapping(m), + pool_addr, 0); + } + /* *INDENT-ON* */ + } + else + { + /* Check if address is used in some static mapping */ + if (is_snat_address_used_in_static_mapping (sm, addr)) + { + nat_log_err ("address used in static mapping"); + return VNET_API_ERROR_UNSPECIFIED; + } + } + + if (a->fib_index != ~0) + fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low); + + /* Delete sessions using address */ + if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports) + { + vec_foreach (tsm, sm->per_thread_data) + { + pool_foreach (ses, tsm->sessions) { + if (ses->out2in.addr.as_u32 == addr.as_u32) + { + nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0); + vec_add1 (ses_to_be_removed, ses - tsm->sessions); + } + } + + vec_foreach (ses_index, ses_to_be_removed) + { + ses = pool_elt_at_index (tsm->sessions, ses_index[0]); + nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1); + } + + vec_free (ses_to_be_removed); + } + } + +#define _(N, i, n, s) \ + vec_free (a->busy_##n##_ports_per_thread); + foreach_nat_protocol +#undef _ + + if (twice_nat) + { + vec_del1 (sm->twice_nat_addresses, i); + return 0; + } + else vec_del1 (sm->addresses, i); + + /* Delete external address from FIB */ + pool_foreach (interface, sm->interfaces) + { + if (nat_interface_is_inside (interface)) + continue; + + snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0); + break; + } + pool_foreach (interface, sm->output_feature_interfaces) + { + if (nat_interface_is_inside (interface)) + continue; + + snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0); + break; + } + + return 0; +} + +void +expire_per_vrf_sessions (u32 fib_index) +{ + per_vrf_sessions_t *per_vrf_sessions; + snat_main_per_thread_data_t *tsm; + snat_main_t *sm = &snat_main; + + /* *INDENT-OFF* */ + vec_foreach (tsm, sm->per_thread_data) + { + vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec) + { + if ((per_vrf_sessions->rx_fib_index == fib_index) || + (per_vrf_sessions->tx_fib_index == fib_index)) + { + per_vrf_sessions->expired = 1; + } + } + } + /* *INDENT-ON* */ +} + +void +update_per_vrf_sessions_vec (u32 fib_index, int is_del) +{ + snat_main_t *sm = &snat_main; + nat_fib_t *fib; + + // we don't care if it is outside/inside fib + // we just care about their ref_count + // if it reaches 0 sessions should expire + // because the fib isn't valid for NAT anymore + + vec_foreach (fib, sm->fibs) + { + if (fib->fib_index == fib_index) + { + if (is_del) + { + fib->ref_count--; + if (!fib->ref_count) + { + vec_del1 (sm->fibs, fib - sm->fibs); + expire_per_vrf_sessions (fib_index); + } + return; + } + else + fib->ref_count++; + } + } + if (!is_del) + { + vec_add2 (sm->fibs, fib, 1); + fib->ref_count = 1; + fib->fib_index = fib_index; + } +} + +int +snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del) +{ + snat_main_t *sm = &snat_main; + snat_interface_t *i; + const char *feature_name, *del_feature_name; + snat_address_t *ap; + snat_static_mapping_t *m; + nat_outside_fib_t *outside_fib; + u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, + sw_if_index); + + if (!sm->enabled) + { + nat_log_err ("nat44 is disabled"); + return VNET_API_ERROR_UNSUPPORTED; + } + + /* *INDENT-OFF* */ + pool_foreach (i, sm->output_feature_interfaces) + { + if (i->sw_if_index == sw_if_index) + { + nat_log_err ("error interface already configured"); + return VNET_API_ERROR_VALUE_EXIST; + } + } + /* *INDENT-ON* */ + + if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)) + feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast"; + else + { + if (sm->num_workers > 1) + feature_name = + is_inside ? "nat44-in2out-worker-handoff" : + "nat44-out2in-worker-handoff"; + else + feature_name = is_inside ? "nat-pre-in2out" : "nat-pre-out2in"; + } + + ASSERT (sm->frame_queue_nelts > 0); + + if (sm->fq_in2out_index == ~0 && sm->num_workers > 1) + sm->fq_in2out_index = vlib_frame_queue_main_init (sm->in2out_node_index, + sm->frame_queue_nelts); + + if (sm->fq_out2in_index == ~0 && sm->num_workers > 1) + sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index, + sm->frame_queue_nelts); + + update_per_vrf_sessions_vec (fib_index, is_del); + + if (!is_inside) + { + /* *INDENT-OFF* */ + vec_foreach (outside_fib, sm->outside_fibs) + { + if (outside_fib->fib_index == fib_index) + { + if (is_del) + { + outside_fib->refcount--; + if (!outside_fib->refcount) + vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs); + } + else + outside_fib->refcount++; + goto feature_set; + } + } + /* *INDENT-ON* */ + if (!is_del) + { + vec_add2 (sm->outside_fibs, outside_fib, 1); + outside_fib->refcount = 1; + outside_fib->fib_index = fib_index; + } + } + +feature_set: + /* *INDENT-OFF* */ + pool_foreach (i, sm->interfaces) + { + if (i->sw_if_index == sw_if_index) + { + if (is_del) + { + if (nat_interface_is_inside(i) && nat_interface_is_outside(i)) + { + if (is_inside) + i->flags &= ~NAT_INTERFACE_FLAG_IS_INSIDE; + else + i->flags &= ~NAT_INTERFACE_FLAG_IS_OUTSIDE; + + if (sm->num_workers > 1) + { + del_feature_name = "nat44-handoff-classify"; + feature_name = !is_inside ? "nat44-in2out-worker-handoff" : + "nat44-out2in-worker-handoff"; + } + else + { + del_feature_name = "nat44-ed-classify"; + feature_name = + !is_inside ? "nat-pre-in2out" : "nat-pre-out2in"; + } + + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0); + if (rv) + return rv; + vnet_feature_enable_disable ("ip4-unicast", del_feature_name, + sw_if_index, 0, 0, 0); + vnet_feature_enable_disable ("ip4-unicast", feature_name, + sw_if_index, 1, 0, 0); + } + else + { + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0); + if (rv) + return rv; + vnet_feature_enable_disable ("ip4-unicast", feature_name, + sw_if_index, 0, 0, 0); + pool_put (sm->interfaces, i); + } + } + else + { + if ((nat_interface_is_inside (i) && is_inside) || + (nat_interface_is_outside (i) && !is_inside)) + return 0; + + if (sm->num_workers > 1) + { + del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" : + "nat44-out2in-worker-handoff"; + feature_name = "nat44-handoff-classify"; + } + else + { + del_feature_name = + !is_inside ? "nat-pre-in2out" : "nat-pre-out2in"; + + feature_name = "nat44-ed-classify"; + } + + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1); + if (rv) + return rv; + vnet_feature_enable_disable ("ip4-unicast", del_feature_name, + sw_if_index, 0, 0, 0); + vnet_feature_enable_disable ("ip4-unicast", feature_name, + sw_if_index, 1, 0, 0); + goto set_flags; + } + + goto fib; + } + } + /* *INDENT-ON* */ + + if (is_del) + { + nat_log_err ("error interface couldn't be found"); + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + pool_get (sm->interfaces, i); + i->sw_if_index = sw_if_index; + i->flags = 0; + nat_validate_interface_counters (sm, sw_if_index); + + vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, 1, 0, + 0); + + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1); + if (rv) + return rv; + +set_flags: + if (is_inside) + { + i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE; + return 0; + } + else + i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE; + + /* Add/delete external addresses to FIB */ +fib: + /* *INDENT-OFF* */ + vec_foreach (ap, sm->addresses) + snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del); + + pool_foreach (m, sm->static_mappings) + { + if (!(is_addr_only_static_mapping(m)) || (m->local_addr.as_u32 == m->external_addr.as_u32)) + continue; + + snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del); + } + /* *INDENT-ON* */ + + return 0; +} + +int +snat_interface_add_del_output_feature (u32 sw_if_index, + u8 is_inside, int is_del) +{ + snat_main_t *sm = &snat_main; + snat_interface_t *i; + snat_address_t *ap; + snat_static_mapping_t *m; + nat_outside_fib_t *outside_fib; + u32 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, + sw_if_index); + + if (!sm->enabled) + { + nat_log_err ("nat44 is disabled"); + return VNET_API_ERROR_UNSUPPORTED; + } + + if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)) + { + nat_log_err ("error unsupported"); + return VNET_API_ERROR_UNSUPPORTED; + } + + /* *INDENT-OFF* */ + pool_foreach (i, sm->interfaces) + { + if (i->sw_if_index == sw_if_index) + { + nat_log_err ("error interface already configured"); + return VNET_API_ERROR_VALUE_EXIST; + } + } + /* *INDENT-ON* */ + + update_per_vrf_sessions_vec (fib_index, is_del); + + if (!is_inside) + { + /* *INDENT-OFF* */ + vec_foreach (outside_fib, sm->outside_fibs) + { + if (outside_fib->fib_index == fib_index) + { + if (is_del) + { + outside_fib->refcount--; + if (!outside_fib->refcount) + vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs); + } + else + outside_fib->refcount++; + goto feature_set; + } + } + /* *INDENT-ON* */ + if (!is_del) + { + vec_add2 (sm->outside_fibs, outside_fib, 1); + outside_fib->refcount = 1; + outside_fib->fib_index = fib_index; + } + } + +feature_set: + if (is_inside) + { + int rv = + ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del); + if (rv) + return rv; + rv = + ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, + !is_del); + if (rv) + return rv; + goto fq; + } + + if (sm->num_workers > 1) + { + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del); + if (rv) + return rv; + rv = + ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del); + if (rv) + return rv; + vnet_feature_enable_disable ("ip4-unicast", + "nat44-out2in-worker-handoff", + sw_if_index, !is_del, 0, 0); + vnet_feature_enable_disable ("ip4-output", + "nat44-in2out-output-worker-handoff", + sw_if_index, !is_del, 0, 0); + } + else + { + int rv = + ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del); + if (rv) + return rv; + rv = + ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, + !is_del); + if (rv) + return rv; + vnet_feature_enable_disable ("ip4-unicast", "nat-pre-out2in", + sw_if_index, !is_del, 0, 0); + vnet_feature_enable_disable ("ip4-output", "nat-pre-in2out-output", + sw_if_index, !is_del, 0, 0); + } + +fq: + if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1) + sm->fq_in2out_output_index = + vlib_frame_queue_main_init (sm->in2out_output_node_index, 0); + + if (sm->fq_out2in_index == ~0 && sm->num_workers > 1) + sm->fq_out2in_index = + vlib_frame_queue_main_init (sm->out2in_node_index, 0); + + /* *INDENT-OFF* */ + pool_foreach (i, sm->output_feature_interfaces) + { + if (i->sw_if_index == sw_if_index) + { + if (is_del) + pool_put (sm->output_feature_interfaces, i); + else + return VNET_API_ERROR_VALUE_EXIST; + + goto fib; + } + } + /* *INDENT-ON* */ + + if (is_del) + { + nat_log_err ("error interface couldn't be found"); + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + pool_get (sm->output_feature_interfaces, i); + i->sw_if_index = sw_if_index; + i->flags = 0; + nat_validate_interface_counters (sm, sw_if_index); + if (is_inside) + i->flags |= NAT_INTERFACE_FLAG_IS_INSIDE; + else + i->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE; + + /* Add/delete external addresses to FIB */ +fib: + if (is_inside) + return 0; + + /* *INDENT-OFF* */ + vec_foreach (ap, sm->addresses) + snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del); + + pool_foreach (m, sm->static_mappings) + { + if (!((is_addr_only_static_mapping(m))) || (m->local_addr.as_u32 == m->external_addr.as_u32)) + continue; + + snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del); + } + /* *INDENT-ON* */ + + return 0; +} + +int +snat_set_workers (uword * bitmap) +{ + snat_main_t *sm = &snat_main; + int i, j = 0; + + if (sm->num_workers < 2) + return VNET_API_ERROR_FEATURE_DISABLED; + + if (clib_bitmap_last_set (bitmap) >= sm->num_workers) + return VNET_API_ERROR_INVALID_WORKER; + + vec_free (sm->workers); + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, bitmap) + { + vec_add1(sm->workers, i); + sm->per_thread_data[sm->first_worker_index + i].snat_thread_index = j; + sm->per_thread_data[sm->first_worker_index + i].thread_index = i; + j++; + } + /* *INDENT-ON* */ + + sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers); + + return 0; +} + +int +nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts) +{ + fail_if_enabled (); + snat_main_t *sm = &snat_main; + sm->frame_queue_nelts = frame_queue_nelts; + return 0; +} + +static void +snat_update_outside_fib (ip4_main_t * im, uword opaque, + u32 sw_if_index, u32 new_fib_index, + u32 old_fib_index) +{ + snat_main_t *sm = &snat_main; + nat_outside_fib_t *outside_fib; + snat_interface_t *i; + u8 is_add = 1; + u8 match = 0; + + if (!sm->enabled || (new_fib_index == old_fib_index) + || (!vec_len (sm->outside_fibs))) + { + return; + } + + /* *INDENT-OFF* */ + pool_foreach (i, sm->interfaces) + { + if (i->sw_if_index == sw_if_index) + { + if (!(nat_interface_is_outside (i))) + return; + match = 1; + } + } + + pool_foreach (i, sm->output_feature_interfaces) + { + if (i->sw_if_index == sw_if_index) + { + if (!(nat_interface_is_outside (i))) + return; + match = 1; + } + } + /* *INDENT-ON* */ + + if (!match) + return; + + vec_foreach (outside_fib, sm->outside_fibs) + { + if (outside_fib->fib_index == old_fib_index) + { + outside_fib->refcount--; + if (!outside_fib->refcount) + vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs); + break; + } + } + + vec_foreach (outside_fib, sm->outside_fibs) + { + if (outside_fib->fib_index == new_fib_index) + { + outside_fib->refcount++; + is_add = 0; + break; + } + } + + if (is_add) + { + vec_add2 (sm->outside_fibs, outside_fib, 1); + outside_fib->refcount = 1; + outside_fib->fib_index = new_fib_index; + } +} + +static void +snat_update_outside_fib (ip4_main_t * im, uword opaque, + u32 sw_if_index, u32 new_fib_index, + u32 old_fib_index); + +static void +snat_ip4_add_del_interface_address_cb (ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, u32 is_delete); + +static void +nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, u32 is_delete); + +void +test_key_calc_split () +{ + ip4_address_t l_addr; + l_addr.as_u8[0] = 1; + l_addr.as_u8[1] = 1; + l_addr.as_u8[2] = 1; + l_addr.as_u8[3] = 1; + ip4_address_t r_addr; + r_addr.as_u8[0] = 2; + r_addr.as_u8[1] = 2; + r_addr.as_u8[2] = 2; + r_addr.as_u8[3] = 2; + u16 l_port = 40001; + u16 r_port = 40301; + u8 proto = 9; + u32 fib_index = 9000001; + u32 thread_index = 3000000001; + u32 session_index = 3000000221; + clib_bihash_kv_16_8_t kv; + init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto, + thread_index, session_index); + ip4_address_t l_addr2; + ip4_address_t r_addr2; + clib_memset (&l_addr2, 0, sizeof (l_addr2)); + clib_memset (&r_addr2, 0, sizeof (r_addr2)); + u16 l_port2 = 0; + u16 r_port2 = 0; + u8 proto2 = 0; + u32 fib_index2 = 0; + split_ed_kv (&kv, &l_addr2, &r_addr2, &proto2, &fib_index2, &l_port2, + &r_port2); + ASSERT (l_addr.as_u32 == l_addr2.as_u32); + ASSERT (r_addr.as_u32 == r_addr2.as_u32); + ASSERT (l_port == l_port2); + ASSERT (r_port == r_port2); + ASSERT (proto == proto2); + ASSERT (fib_index == fib_index2); + ASSERT (thread_index == ed_value_get_thread_index (&kv)); + ASSERT (session_index == ed_value_get_session_index (&kv)); + + fib_index = 7001; + proto = 5; + nat_protocol_t proto3 = ~0; + u64 key = calc_nat_key (l_addr, l_port, fib_index, proto); + split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3); + ASSERT (l_addr.as_u32 == l_addr2.as_u32); + ASSERT (l_port == l_port2); + ASSERT (proto == proto3); + ASSERT (fib_index == fib_index2); +} + +static clib_error_t * +nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add) +{ + u32 fib_index; + + // TODO: consider removing all NAT interfaces + if (!is_add) + { + fib_index = ip4_fib_index_from_table_id (table_id); + if (fib_index != ~0) + expire_per_vrf_sessions (fib_index); + } + return 0; +} + +VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del); + +void +nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm) +{ + vlib_node_t *node; + + node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in"); + sm->in2out_node_index = node->index; + + node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out"); + sm->out2in_node_index = node->index; + + node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output"); + sm->in2out_output_node_index = node->index; +} + +#define nat_validate_simple_counter(c, i) \ + do \ + { \ + vlib_validate_simple_counter (&c, i); \ + vlib_zero_simple_counter (&c, i); \ + } \ + while (0); + +#define nat_init_simple_counter(c, n, sn) \ + do \ + { \ + c.name = n; \ + c.stat_segment_name = sn; \ + nat_validate_simple_counter (c, 0); \ + } \ + while (0); + +static_always_inline void +nat_validate_interface_counters (snat_main_t *sm, u32 sw_if_index) +{ +#define _(x) \ + nat_validate_simple_counter (sm->counters.fastpath.in2out.x, sw_if_index); \ + nat_validate_simple_counter (sm->counters.fastpath.out2in.x, sw_if_index); \ + nat_validate_simple_counter (sm->counters.slowpath.in2out.x, sw_if_index); \ + nat_validate_simple_counter (sm->counters.slowpath.out2in.x, sw_if_index); + foreach_nat_counter; +#undef _ + nat_validate_simple_counter (sm->counters.hairpinning, sw_if_index); +} + +static clib_error_t * +nat_init (vlib_main_t * vm) +{ + snat_main_t *sm = &snat_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_thread_registration_t *tr; + ip4_add_del_interface_address_callback_t cbi = { 0 }; + ip4_table_bind_callback_t cbt = { 0 }; + u32 i, num_threads = 0; + uword *p, *bitmap = 0; + + clib_memset (sm, 0, sizeof (*sm)); + + // required + sm->vnet_main = vnet_get_main (); + // convenience + sm->ip4_main = &ip4_main; + sm->api_main = vlibapi_get_main (); + sm->ip4_lookup_main = &ip4_main.lookup_main; + + // frame queue indices used for handoff + sm->fq_out2in_index = ~0; + sm->fq_in2out_index = ~0; + sm->fq_in2out_output_index = ~0; + + sm->log_level = NAT_LOG_ERROR; + + nat44_set_node_indexes (sm, vm); + sm->log_class = vlib_log_register_class ("nat", 0); + nat_ipfix_logging_init (vm); + + nat_init_simple_counter (sm->total_sessions, "total-sessions", + "/nat44-ed/total-sessions"); + +#define _(x) \ + nat_init_simple_counter (sm->counters.fastpath.in2out.x, #x, \ + "/nat44-ed/in2out/fastpath/" #x); \ + nat_init_simple_counter (sm->counters.fastpath.out2in.x, #x, \ + "/nat44-ed/out2in/fastpath/" #x); \ + nat_init_simple_counter (sm->counters.slowpath.in2out.x, #x, \ + "/nat44-ed/in2out/slowpath/" #x); \ + nat_init_simple_counter (sm->counters.slowpath.out2in.x, #x, \ + "/nat44-ed/out2in/slowpath/" #x); + foreach_nat_counter; +#undef _ + nat_init_simple_counter (sm->counters.hairpinning, "hairpinning", + "/nat44-ed/hairpinning"); + + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + if (p) + { + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + sm->num_workers = tr->count; + sm->first_worker_index = tr->first_index; + } + } + num_threads = tm->n_vlib_mains - 1; + sm->port_per_thread = 0xffff - 1024; + vec_validate (sm->per_thread_data, num_threads); + + /* Use all available workers by default */ + if (sm->num_workers > 1) + { + + for (i = 0; i < sm->num_workers; i++) + bitmap = clib_bitmap_set (bitmap, i, 1); + snat_set_workers (bitmap); + clib_bitmap_free (bitmap); + } + else + sm->per_thread_data[0].snat_thread_index = 0; + + /* callbacks to call when interface address changes. */ + cbi.function = snat_ip4_add_del_interface_address_cb; + vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi); + cbi.function = nat_ip4_add_del_addr_only_sm_cb; + vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi); + + /* callbacks to call when interface to table biding changes */ + cbt.function = snat_update_outside_fib; + vec_add1 (sm->ip4_main->table_bind_callbacks, cbt); + + sm->fib_src_low = + fib_source_allocate ("nat-low", FIB_SOURCE_PRIORITY_LOW, + FIB_SOURCE_BH_SIMPLE); + sm->fib_src_hi = + fib_source_allocate ("nat-hi", FIB_SOURCE_PRIORITY_HI, + FIB_SOURCE_BH_SIMPLE); + + nat_affinity_init (vm); + test_key_calc_split (); + + return nat44_api_hookup (vm); +} + +VLIB_INIT_FUNCTION (nat_init); + +int +nat44_plugin_enable (nat44_config_t c) +{ + snat_main_t *sm = &snat_main; + + fail_if_enabled (); + + // UPDATE based on these appropriate API/CLI + // c.static_mapping_only + c.connection_tracking + // - supported in NAT EI & NAT ED + // c.out2in_dpo, c.static_mapping_only + // - supported in NAT EI + + if (c.static_mapping_only && !c.connection_tracking) + { + nat_log_err ("unsupported combination of configuration"); + return 1; + } + + // nat44 feature configuration + sm->static_mapping_only = c.static_mapping_only; + sm->static_mapping_connection_tracking = c.connection_tracking; + + sm->forwarding_enabled = 0; + sm->mss_clamping = 0; + sm->pat = (!c.static_mapping_only || + (c.static_mapping_only && c.connection_tracking)); + + if (!c.sessions) + c.sessions = 63 * 1024; + + sm->max_translations_per_thread = c.sessions; + sm->translation_buckets = nat_calc_bihash_buckets (c.sessions); + + // ED only feature + vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread); + + sm->inside_vrf_id = c.inside_vrf; + sm->inside_fib_index = + fib_table_find_or_create_and_lock + (FIB_PROTOCOL_IP4, c.inside_vrf, sm->fib_src_hi); + + sm->outside_vrf_id = c.outside_vrf; + sm->outside_fib_index = fib_table_find_or_create_and_lock ( + FIB_PROTOCOL_IP4, c.outside_vrf, sm->fib_src_hi); + + sm->worker_in2out_cb = nat44_ed_get_worker_in2out_cb; + sm->worker_out2in_cb = nat44_ed_get_worker_out2in_cb; + + nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets); + + nat_affinity_enable (); + + nat_reset_timeouts (&sm->timeouts); + + vlib_zero_simple_counter (&sm->total_sessions, 0); + + if (!sm->frame_queue_nelts) + sm->frame_queue_nelts = NAT_FQ_NELTS_DEFAULT; + + sm->enabled = 1; + sm->rconfig = c; + + return 0; +} + +void +nat44_addresses_free (snat_address_t ** addresses) +{ + snat_address_t *ap; + vec_foreach (ap, *addresses) + { + #define _(N, i, n, s) \ + vec_free (ap->busy_##n##_ports_per_thread); + foreach_nat_protocol + #undef _ + } + vec_free (*addresses); + *addresses = 0; +} + +int +nat44_plugin_disable () +{ + snat_main_t *sm = &snat_main; + snat_interface_t *i, *vec; + int error = 0; + + fail_if_disabled (); + + // first unregister all nodes from interfaces + vec = vec_dup (sm->interfaces); + vec_foreach (i, vec) + { + if (nat_interface_is_inside(i)) + error = snat_interface_add_del (i->sw_if_index, 1, 1); + if (nat_interface_is_outside(i)) + error = snat_interface_add_del (i->sw_if_index, 0, 1); + + if (error) + { + nat_log_err ("error occurred while removing interface %u", + i->sw_if_index); + } + } + vec_free (vec); + sm->interfaces = 0; + + vec = vec_dup (sm->output_feature_interfaces); + vec_foreach (i, vec) + { + if (nat_interface_is_inside(i)) + error = snat_interface_add_del_output_feature (i->sw_if_index, 1, 1); + if (nat_interface_is_outside(i)) + error = snat_interface_add_del_output_feature (i->sw_if_index, 0, 1); + + if (error) + { + nat_log_err ("error occurred while removing interface %u", + i->sw_if_index); + } + } + vec_free (vec); + sm->output_feature_interfaces = 0; + + vec_free (sm->max_translations_per_fib); + + nat44_ed_db_free (); + + nat44_addresses_free (&sm->addresses); + nat44_addresses_free (&sm->twice_nat_addresses); + + vec_free (sm->to_resolve); + vec_free (sm->auto_add_sw_if_indices); + vec_free (sm->auto_add_sw_if_indices_twice_nat); + + sm->to_resolve = 0; + sm->auto_add_sw_if_indices = 0; + sm->auto_add_sw_if_indices_twice_nat = 0; + + sm->forwarding_enabled = 0; + + sm->enabled = 0; + clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig)); + + return 0; +} + +void +nat44_ed_forwarding_enable_disable (u8 is_enable) +{ + snat_main_per_thread_data_t *tsm; + snat_main_t *sm = &snat_main; + snat_session_t *s; + + u32 *ses_to_be_removed = 0, *ses_index; + + sm->forwarding_enabled = is_enable != 0; + + if (is_enable) + return; + + vec_foreach (tsm, sm->per_thread_data) + { + pool_foreach (s, tsm->sessions) + { + if (is_fwd_bypass_session (s)) + { + vec_add1 (ses_to_be_removed, s - tsm->sessions); + } + } + vec_foreach (ses_index, ses_to_be_removed) + { + s = pool_elt_at_index (tsm->sessions, ses_index[0]); + nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0); + nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1); + } + + vec_free (ses_to_be_removed); + } +} + +void +snat_free_outside_address_and_port (snat_address_t *addresses, + u32 thread_index, ip4_address_t *addr, + u16 port, nat_protocol_t protocol) +{ + snat_main_t *sm = &snat_main; + snat_address_t *a; + u32 address_index; + u16 port_host_byte_order = clib_net_to_host_u16 (port); + + for (address_index = 0; address_index < vec_len (addresses); + address_index++) + { + if (addresses[address_index].addr.as_u32 == addr->as_u32) + break; + } + + ASSERT (address_index < vec_len (addresses)); + + a = addresses + address_index; + + switch (protocol) + { +#define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \ + --a->busy_##n##_port_refcounts[port_host_byte_order]; \ + a->busy_##n##_ports--; \ + a->busy_##n##_ports_per_thread[thread_index]--; \ + break; + foreach_nat_protocol +#undef _ + default : nat_elog_info (sm, "unknown protocol"); + return; + } +} + +int +nat_set_outside_address_and_port (snat_address_t *addresses, u32 thread_index, + ip4_address_t addr, u16 port, + nat_protocol_t protocol) +{ + snat_main_t *sm = &snat_main; + snat_address_t *a = 0; + u32 address_index; + u16 port_host_byte_order = clib_net_to_host_u16 (port); + + for (address_index = 0; address_index < vec_len (addresses); + address_index++) + { + if (addresses[address_index].addr.as_u32 != addr.as_u32) + continue; + + a = addresses + address_index; + switch (protocol) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + if (a->busy_##n##_port_refcounts[port_host_byte_order]) \ + return VNET_API_ERROR_INSTANCE_IN_USE; \ + ++a->busy_##n##_port_refcounts[port_host_byte_order]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + return 0; + foreach_nat_protocol +#undef _ + default : nat_elog_info (sm, "unknown protocol"); + return 1; + } + } + + return VNET_API_ERROR_NO_SUCH_ENTRY; +} + +int +snat_static_mapping_match (snat_main_t * sm, + ip4_address_t match_addr, + u16 match_port, + u32 match_fib_index, + nat_protocol_t match_protocol, + ip4_address_t * mapping_addr, + u16 * mapping_port, + u32 * mapping_fib_index, + u8 by_external, + u8 * is_addr_only, + twice_nat_type_t * twice_nat, + lb_nat_type_t * lb, ip4_address_t * ext_host_addr, + u8 * is_identity_nat, snat_static_mapping_t ** out) +{ + clib_bihash_kv_8_8_t kv, value; + clib_bihash_8_8_t *mapping_hash; + snat_static_mapping_t *m; + u32 rand, lo = 0, hi, mid, *tmp = 0, i; + nat44_lb_addr_port_t *local; + u8 backend_index; + + if (!by_external) + { + mapping_hash = &sm->static_mapping_by_local; + init_nat_k (&kv, match_addr, match_port, match_fib_index, + match_protocol); + if (clib_bihash_search_8_8 (mapping_hash, &kv, &value)) + { + /* Try address only mapping */ + init_nat_k (&kv, match_addr, 0, match_fib_index, 0); + if (clib_bihash_search_8_8 (mapping_hash, &kv, &value)) + return 1; + } + } + else + { + mapping_hash = &sm->static_mapping_by_external; + init_nat_k (&kv, match_addr, match_port, 0, match_protocol); + if (clib_bihash_search_8_8 (mapping_hash, &kv, &value)) + { + /* Try address only mapping */ + init_nat_k (&kv, match_addr, 0, 0, 0); + if (clib_bihash_search_8_8 (mapping_hash, &kv, &value)) + return 1; + } + } + + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (by_external) + { + if (is_lb_static_mapping (m)) + { + if (PREDICT_FALSE (lb != 0)) + *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT; + if (m->affinity && !nat_affinity_find_and_lock (ext_host_addr[0], + match_addr, + match_protocol, + match_port, + &backend_index)) + { + local = pool_elt_at_index (m->locals, backend_index); + *mapping_addr = local->addr; + *mapping_port = local->port; + *mapping_fib_index = local->fib_index; + goto end; + } + // pick locals matching this worker + if (PREDICT_FALSE (sm->num_workers > 1)) + { + u32 thread_index = vlib_get_thread_index (); + pool_foreach_index (i, m->locals) + { + local = pool_elt_at_index (m->locals, i); + + ip4_header_t ip = { + .src_address = local->addr, + }; + + if (sm->worker_in2out_cb (&ip, m->fib_index, 0) == + thread_index) + { + vec_add1 (tmp, i); + } + } + ASSERT (vec_len (tmp) != 0); + } + else + { + pool_foreach_index (i, m->locals) + { + vec_add1 (tmp, i); + } + } + hi = vec_len (tmp) - 1; + local = pool_elt_at_index (m->locals, tmp[hi]); + rand = 1 + (random_u32 (&sm->random_seed) % local->prefix); + while (lo < hi) + { + mid = ((hi - lo) >> 1) + lo; + local = pool_elt_at_index (m->locals, tmp[mid]); + (rand > local->prefix) ? (lo = mid + 1) : (hi = mid); + } + local = pool_elt_at_index (m->locals, tmp[lo]); + if (!(local->prefix >= rand)) + return 1; + *mapping_addr = local->addr; + *mapping_port = local->port; + *mapping_fib_index = local->fib_index; + if (m->affinity) + { + if (nat_affinity_create_and_lock (ext_host_addr[0], match_addr, + match_protocol, match_port, + tmp[lo], m->affinity, + m->affinity_per_service_list_head_index)) + nat_elog_info (sm, "create affinity record failed"); + } + vec_free (tmp); + } + else + { + if (PREDICT_FALSE (lb != 0)) + *lb = NO_LB_NAT; + *mapping_fib_index = m->fib_index; + *mapping_addr = m->local_addr; + /* Address only mapping doesn't change port */ + *mapping_port = is_addr_only_static_mapping (m) ? match_port + : m->local_port; + } + } + else + { + *mapping_addr = m->external_addr; + /* Address only mapping doesn't change port */ + *mapping_port = is_addr_only_static_mapping (m) ? match_port + : m->external_port; + *mapping_fib_index = sm->outside_fib_index; + } + +end: + if (PREDICT_FALSE (is_addr_only != 0)) + *is_addr_only = is_addr_only_static_mapping (m); + + if (PREDICT_FALSE (twice_nat != 0)) + *twice_nat = m->twice_nat; + + if (PREDICT_FALSE (is_identity_nat != 0)) + *is_identity_nat = is_identity_static_mapping (m); + + if (out != 0) + *out = m; + + return 0; +} + +static u32 +nat44_ed_get_worker_in2out_cb (ip4_header_t *ip, u32 rx_fib_index, + u8 is_output) +{ + snat_main_t *sm = &snat_main; + u32 next_worker_index = sm->first_worker_index; + u32 hash; + + clib_bihash_kv_16_8_t kv16, value16; + snat_main_per_thread_data_t *tsm; + udp_header_t *udp; + + if (PREDICT_FALSE (is_output)) + { + u32 fib_index = sm->outside_fib_index; + nat_outside_fib_t *outside_fib; + fib_node_index_t fei = FIB_NODE_INDEX_INVALID; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = 32, + .fp_addr = { + .ip4.as_u32 = ip->dst_address.as_u32, + } + , + }; + + udp = ip4_next_header (ip); + + switch (vec_len (sm->outside_fibs)) + { + case 0: + fib_index = sm->outside_fib_index; + break; + case 1: + fib_index = sm->outside_fibs[0].fib_index; + break; + default: + /* *INDENT-OFF* */ + vec_foreach (outside_fib, sm->outside_fibs) + { + fei = fib_table_lookup (outside_fib->fib_index, &pfx); + if (FIB_NODE_INDEX_INVALID != fei) + { + if (fib_entry_get_resolving_interface (fei) != ~0) + { + fib_index = outside_fib->fib_index; + break; + } + } + } + /* *INDENT-ON* */ + break; + } + + init_ed_k (&kv16, ip->src_address, udp->src_port, ip->dst_address, + udp->dst_port, fib_index, ip->protocol); + + if (PREDICT_TRUE ( + !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))) + { + tsm = + vec_elt_at_index (sm->per_thread_data, + ed_value_get_thread_index (&value16)); + next_worker_index += tsm->thread_index; + + nat_elog_debug_handoff ( + sm, "HANDOFF IN2OUT-OUTPUT-FEATURE (session)", next_worker_index, + fib_index, clib_net_to_host_u32 (ip->src_address.as_u32), + clib_net_to_host_u32 (ip->dst_address.as_u32)); + + return next_worker_index; + } + } + + hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) + + (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24); + + if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers)))) + next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)]; + else + next_worker_index += sm->workers[hash % _vec_len (sm->workers)]; + + if (PREDICT_TRUE (!is_output)) + { + nat_elog_debug_handoff (sm, "HANDOFF IN2OUT", next_worker_index, + rx_fib_index, + clib_net_to_host_u32 (ip->src_address.as_u32), + clib_net_to_host_u32 (ip->dst_address.as_u32)); + } + else + { + nat_elog_debug_handoff (sm, "HANDOFF IN2OUT-OUTPUT-FEATURE", + next_worker_index, rx_fib_index, + clib_net_to_host_u32 (ip->src_address.as_u32), + clib_net_to_host_u32 (ip->dst_address.as_u32)); + } + + return next_worker_index; +} + +static u32 +nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip, + u32 rx_fib_index, u8 is_output) +{ + snat_main_t *sm = &snat_main; + clib_bihash_kv_8_8_t kv, value; + clib_bihash_kv_16_8_t kv16, value16; + snat_main_per_thread_data_t *tsm; + + u32 proto, next_worker_index = 0; + udp_header_t *udp; + u16 port; + snat_static_mapping_t *m; + u32 hash; + + proto = ip_proto_to_nat_proto (ip->protocol); + + if (PREDICT_TRUE (proto == NAT_PROTOCOL_UDP || proto == NAT_PROTOCOL_TCP)) + { + udp = ip4_next_header (ip); + + init_ed_k (&kv16, ip->dst_address, udp->dst_port, ip->src_address, + udp->src_port, rx_fib_index, ip->protocol); + + if (PREDICT_TRUE ( + !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))) + { + tsm = + vec_elt_at_index (sm->per_thread_data, + ed_value_get_thread_index (&value16)); + vnet_buffer2 (b)->nat.cached_session_index = + ed_value_get_session_index (&value16); + next_worker_index = sm->first_worker_index + tsm->thread_index; + nat_elog_debug_handoff ( + sm, "HANDOFF OUT2IN (session)", next_worker_index, rx_fib_index, + clib_net_to_host_u32 (ip->src_address.as_u32), + clib_net_to_host_u32 (ip->dst_address.as_u32)); + return next_worker_index; + } + } + else if (proto == NAT_PROTOCOL_ICMP) + { + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + if (!nat_get_icmp_session_lookup_values ( + b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport, + &lookup_protocol)) + { + init_ed_k (&kv16, lookup_saddr, lookup_sport, lookup_daddr, + lookup_dport, rx_fib_index, lookup_protocol); + if (PREDICT_TRUE ( + !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))) + { + tsm = + vec_elt_at_index (sm->per_thread_data, + ed_value_get_thread_index (&value16)); + next_worker_index = sm->first_worker_index + tsm->thread_index; + nat_elog_debug_handoff ( + sm, "HANDOFF OUT2IN (session)", next_worker_index, + rx_fib_index, clib_net_to_host_u32 (ip->src_address.as_u32), + clib_net_to_host_u32 (ip->dst_address.as_u32)); + return next_worker_index; + } + } + } + + /* first try static mappings without port */ + if (PREDICT_FALSE (pool_elts (sm->static_mappings))) + { + init_nat_k (&kv, ip->dst_address, 0, 0, 0); + if (!clib_bihash_search_8_8 + (&sm->static_mapping_by_external, &kv, &value)) + { + m = pool_elt_at_index (sm->static_mappings, value.value); + next_worker_index = m->workers[0]; + goto done; + } + } + + /* unknown protocol */ + if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER)) + { + /* use current thread */ + next_worker_index = vlib_get_thread_index (); + goto done; + } + + udp = ip4_next_header (ip); + port = udp->dst_port; + + if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP)) + { + icmp46_header_t *icmp = (icmp46_header_t *) udp; + icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) + port = vnet_buffer (b)->ip.reass.l4_src_port; + else + { + /* if error message, then it's not fragmented and we can access it */ + ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); + proto = ip_proto_to_nat_proto (inner_ip->protocol); + void *l4_header = ip4_next_header (inner_ip); + switch (proto) + { + case NAT_PROTOCOL_ICMP: + icmp = (icmp46_header_t *) l4_header; + echo = (icmp_echo_header_t *) (icmp + 1); + port = echo->identifier; + break; + case NAT_PROTOCOL_UDP: + case NAT_PROTOCOL_TCP: + port = ((tcp_udp_header_t *) l4_header)->src_port; + break; + default: + next_worker_index = vlib_get_thread_index (); + goto done; + } + } + } + + /* try static mappings with port */ + if (PREDICT_FALSE (pool_elts (sm->static_mappings))) + { + init_nat_k (&kv, ip->dst_address, port, 0, proto); + if (!clib_bihash_search_8_8 + (&sm->static_mapping_by_external, &kv, &value)) + { + m = pool_elt_at_index (sm->static_mappings, value.value); + if (!is_lb_static_mapping (m)) + { + next_worker_index = m->workers[0]; + goto done; + } + + hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) + + (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24); + + if (PREDICT_TRUE (is_pow2 (_vec_len (m->workers)))) + next_worker_index = + m->workers[hash & (_vec_len (m->workers) - 1)]; + else + next_worker_index = m->workers[hash % _vec_len (m->workers)]; + goto done; + } + } + + /* worker by outside port */ + next_worker_index = sm->first_worker_index; + next_worker_index += + sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread]; + +done: + nat_elog_debug_handoff (sm, "HANDOFF OUT2IN", next_worker_index, + rx_fib_index, + clib_net_to_host_u32 (ip->src_address.as_u32), + clib_net_to_host_u32 (ip->dst_address.as_u32)); + return next_worker_index; +} + +u32 +nat44_get_max_session_limit () +{ + snat_main_t *sm = &snat_main; + u32 max_limit = 0, len = 0; + + for (; len < vec_len (sm->max_translations_per_fib); len++) + { + if (max_limit < sm->max_translations_per_fib[len]) + max_limit = sm->max_translations_per_fib[len]; + } + return max_limit; +} + +int +nat44_set_session_limit (u32 session_limit, u32 vrf_id) +{ + snat_main_t *sm = &snat_main; + u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id); + u32 len = vec_len (sm->max_translations_per_fib); + + if (len <= fib_index) + { + vec_validate (sm->max_translations_per_fib, fib_index + 1); + + for (; len < vec_len (sm->max_translations_per_fib); len++) + sm->max_translations_per_fib[len] = sm->max_translations_per_thread; + } + + sm->max_translations_per_fib[fib_index] = session_limit; + return 0; +} + +int +nat44_update_session_limit (u32 session_limit, u32 vrf_id) +{ + snat_main_t *sm = &snat_main; + + if (nat44_set_session_limit (session_limit, vrf_id)) + return 1; + sm->max_translations_per_thread = nat44_get_max_session_limit (); + + sm->translation_buckets = + nat_calc_bihash_buckets (sm->max_translations_per_thread); + + nat44_ed_sessions_clear (); + return 0; +} + +static void +nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations, + u32 translation_buckets) +{ + dlist_elt_t *head; + + pool_alloc (tsm->sessions, translations); + pool_alloc (tsm->lru_pool, translations); + + pool_get (tsm->lru_pool, head); + tsm->tcp_trans_lru_head_index = head - tsm->lru_pool; + clib_dlist_init (tsm->lru_pool, tsm->tcp_trans_lru_head_index); + + pool_get (tsm->lru_pool, head); + tsm->tcp_estab_lru_head_index = head - tsm->lru_pool; + clib_dlist_init (tsm->lru_pool, tsm->tcp_estab_lru_head_index); + + pool_get (tsm->lru_pool, head); + tsm->udp_lru_head_index = head - tsm->lru_pool; + clib_dlist_init (tsm->lru_pool, tsm->udp_lru_head_index); + + pool_get (tsm->lru_pool, head); + tsm->icmp_lru_head_index = head - tsm->lru_pool; + clib_dlist_init (tsm->lru_pool, tsm->icmp_lru_head_index); + + pool_get (tsm->lru_pool, head); + tsm->unk_proto_lru_head_index = head - tsm->lru_pool; + clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index); +} + +static void +reinit_ed_flow_hash () +{ + snat_main_t *sm = &snat_main; + // we expect 2 flows per session, so multiply translation_buckets by 2 + clib_bihash_init_16_8 ( + &sm->flow_hash, "ed-flow-hash", + clib_max (1, sm->num_workers) * 2 * sm->translation_buckets, 0); + clib_bihash_set_kvp_format_fn_16_8 (&sm->flow_hash, format_ed_session_kvp); +} + +static void +nat44_ed_db_init (u32 translations, u32 translation_buckets) +{ + snat_main_t *sm = &snat_main; + snat_main_per_thread_data_t *tsm; + u32 static_mapping_buckets = 1024; + u32 static_mapping_memory_size = 64 << 20; + + reinit_ed_flow_hash (); + + clib_bihash_init_8_8 (&sm->static_mapping_by_local, + "static_mapping_by_local", static_mapping_buckets, + static_mapping_memory_size); + clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local, + format_static_mapping_kvp); + + clib_bihash_init_8_8 (&sm->static_mapping_by_external, + "static_mapping_by_external", static_mapping_buckets, + static_mapping_memory_size); + clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external, + format_static_mapping_kvp); + + if (sm->pat) + { + vec_foreach (tsm, sm->per_thread_data) + { + nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread, + sm->translation_buckets); + } + } +} + +static void +nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm) +{ + pool_free (tsm->lru_pool); + pool_free (tsm->sessions); + vec_free (tsm->per_vrf_sessions_vec); +} + +static void +nat44_ed_db_free () +{ + snat_main_t *sm = &snat_main; + snat_main_per_thread_data_t *tsm; + + pool_free (sm->static_mappings); + clib_bihash_free_16_8 (&sm->flow_hash); + clib_bihash_free_8_8 (&sm->static_mapping_by_local); + clib_bihash_free_8_8 (&sm->static_mapping_by_external); + + if (sm->pat) + { + vec_foreach (tsm, sm->per_thread_data) + { + nat44_ed_worker_db_free (tsm); + } + } +} + +void +nat44_ed_sessions_clear () +{ + snat_main_t *sm = &snat_main; + snat_main_per_thread_data_t *tsm; + + reinit_ed_flow_hash (); + + if (sm->pat) + { + vec_foreach (tsm, sm->per_thread_data) + { + + nat44_ed_worker_db_free (tsm); + nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread, + sm->translation_buckets); + } + } + vlib_zero_simple_counter (&sm->total_sessions, 0); +} + +static void +nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, u32 is_delete) +{ + snat_main_t *sm = &snat_main; + snat_static_map_resolve_t *rp; + snat_static_mapping_t *m; + clib_bihash_kv_8_8_t kv, value; + int i, rv; + ip4_address_t l_addr; + + if (!sm->enabled) + return; + + for (i = 0; i < vec_len (sm->to_resolve); i++) + { + rp = sm->to_resolve + i; + if (rp->addr_only == 0) + continue; + if (rp->sw_if_index == sw_if_index) + goto match; + } + + return; + +match: + init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port, + sm->outside_fib_index, rp->addr_only ? 0 : rp->proto); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + m = 0; + else + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (!is_delete) + { + /* Don't trip over lease renewal, static config */ + if (m) + return; + } + else + { + if (!m) + return; + } + + /* Indetity mapping? */ + if (rp->l_addr.as_u32 == 0) + l_addr.as_u32 = address[0].as_u32; + else + l_addr.as_u32 = rp->l_addr.as_u32; + /* Add the static mapping */ + rv = snat_add_static_mapping (l_addr, + address[0], + rp->l_port, + rp->e_port, + rp->vrf_id, + rp->addr_only, ~0 /* sw_if_index */ , + rp->proto, !is_delete, rp->twice_nat, + rp->out2in_only, rp->tag, rp->identity_nat, + rp->pool_addr, rp->exact); + if (rv) + nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d", "i4", rv); +} + +static void +snat_ip4_add_del_interface_address_cb (ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, u32 is_delete) +{ + snat_main_t *sm = &snat_main; + snat_static_map_resolve_t *rp; + ip4_address_t l_addr; + int i, j; + int rv; + u8 twice_nat = 0; + snat_address_t *addresses = sm->addresses; + + if (!sm->enabled) + return; + + for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++) + { + if (sw_if_index == sm->auto_add_sw_if_indices[i]) + goto match; + } + + for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++) + { + twice_nat = 1; + addresses = sm->twice_nat_addresses; + if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i]) + goto match; + } + + return; + +match: + if (!is_delete) + { + /* Don't trip over lease renewal, static config */ + for (j = 0; j < vec_len (addresses); j++) + if (addresses[j].addr.as_u32 == address->as_u32) + return; + + (void) snat_add_address (sm, address, ~0, twice_nat); + /* Scan static map resolution vector */ + for (j = 0; j < vec_len (sm->to_resolve); j++) + { + rp = sm->to_resolve + j; + if (rp->addr_only) + continue; + /* On this interface? */ + if (rp->sw_if_index == sw_if_index) + { + /* Indetity mapping? */ + if (rp->l_addr.as_u32 == 0) + l_addr.as_u32 = address[0].as_u32; + else + l_addr.as_u32 = rp->l_addr.as_u32; + /* Add the static mapping */ + rv = snat_add_static_mapping ( + l_addr, address[0], rp->l_port, rp->e_port, rp->vrf_id, + rp->addr_only, ~0 /* sw_if_index */, rp->proto, 1, + rp->twice_nat, rp->out2in_only, rp->tag, rp->identity_nat, + rp->pool_addr, rp->exact); + if (rv) + nat_elog_notice_X1 (sm, "snat_add_static_mapping returned %d", + "i4", rv); + } + } + return; + } + else + { + (void) snat_del_address (sm, address[0], 1, twice_nat); + return; + } +} + +int +snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del, + u8 twice_nat) +{ + ip4_main_t *ip4_main = sm->ip4_main; + ip4_address_t *first_int_addr; + snat_static_map_resolve_t *rp; + u32 *indices_to_delete = 0; + int i, j; + u32 *auto_add_sw_if_indices = + twice_nat ? sm-> + auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices; + + first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0 /* just want the address */ + ); + + for (i = 0; i < vec_len (auto_add_sw_if_indices); i++) + { + if (auto_add_sw_if_indices[i] == sw_if_index) + { + if (is_del) + { + /* if have address remove it */ + if (first_int_addr) + (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat); + else + { + for (j = 0; j < vec_len (sm->to_resolve); j++) + { + rp = sm->to_resolve + j; + if (rp->sw_if_index == sw_if_index) + vec_add1 (indices_to_delete, j); + } + if (vec_len (indices_to_delete)) + { + for (j = vec_len (indices_to_delete) - 1; j >= 0; j--) + vec_del1 (sm->to_resolve, j); + vec_free (indices_to_delete); + } + } + if (twice_nat) + vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i); + else + vec_del1 (sm->auto_add_sw_if_indices, i); + } + else + return VNET_API_ERROR_VALUE_EXIST; + + return 0; + } + } + + if (is_del) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* add to the auto-address list */ + if (twice_nat) + vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index); + else + vec_add1 (sm->auto_add_sw_if_indices, sw_if_index); + + /* If the address is already bound - or static - add it now */ + if (first_int_addr) + (void) snat_add_address (sm, first_int_addr, ~0, twice_nat); + + return 0; +} + +int +nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port, + ip4_address_t * eh_addr, u16 eh_port, u8 proto, + u32 vrf_id, int is_in) +{ + ip4_header_t ip; + clib_bihash_kv_16_8_t kv, value; + u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id); + snat_session_t *s; + snat_main_per_thread_data_t *tsm; + + ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32; + if (sm->num_workers > 1) + tsm = + vec_elt_at_index (sm->per_thread_data, + sm->worker_in2out_cb (&ip, fib_index, 0)); + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + + init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto); + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) + { + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value))) + return VNET_API_ERROR_UNSPECIFIED; + s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); + nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0); + nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1); + return 0; +} + +VLIB_NODE_FN (nat_default_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat_default_node) = { + .name = "nat-default", + .vector_size = sizeof (u32), + .format_trace = 0, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, + .n_next_nodes = NAT_N_NEXT, + .next_nodes = { + [NAT_NEXT_DROP] = "error-drop", + [NAT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [NAT_NEXT_IN2OUT_ED_FAST_PATH] = "nat44-ed-in2out", + [NAT_NEXT_IN2OUT_ED_SLOW_PATH] = "nat44-ed-in2out-slowpath", + [NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH] = "nat44-ed-in2out-output", + [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath", + [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in", + [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath", + [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff", + [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff", + }, +}; +/* *INDENT-ON* */ + +void +nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f) +{ + f->l3_csum_delta = 0; + f->l4_csum_delta = 0; + if (f->ops & NAT_FLOW_OP_SADDR_REWRITE && + f->rewrite.saddr.as_u32 != f->match.saddr.as_u32) + { + f->l3_csum_delta = + ip_csum_add_even (f->l3_csum_delta, f->rewrite.saddr.as_u32); + f->l3_csum_delta = + ip_csum_sub_even (f->l3_csum_delta, f->match.saddr.as_u32); + } + else + { + f->rewrite.saddr.as_u32 = f->match.saddr.as_u32; + } + if (f->ops & NAT_FLOW_OP_DADDR_REWRITE && + f->rewrite.daddr.as_u32 != f->match.daddr.as_u32) + { + f->l3_csum_delta = + ip_csum_add_even (f->l3_csum_delta, f->rewrite.daddr.as_u32); + f->l3_csum_delta = + ip_csum_sub_even (f->l3_csum_delta, f->match.daddr.as_u32); + } + else + { + f->rewrite.daddr.as_u32 = f->match.daddr.as_u32; + } + if (f->ops & NAT_FLOW_OP_SPORT_REWRITE && f->rewrite.sport != f->match.sport) + { + f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.sport); + f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport); + } + else + { + f->rewrite.sport = f->match.sport; + } + if (f->ops & NAT_FLOW_OP_DPORT_REWRITE && f->rewrite.dport != f->match.dport) + { + f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.dport); + f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.dport); + } + else + { + f->rewrite.dport = f->match.dport; + } + if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE && + f->rewrite.icmp_id != f->match.icmp_id) + { + f->l4_csum_delta = + ip_csum_add_even (f->l4_csum_delta, f->rewrite.icmp_id); + f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.icmp_id); + } + else + { + f->rewrite.icmp_id = f->match.icmp_id; + } + if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE) + { + } + else + { + f->rewrite.fib_index = f->match.fib_index; + } +} + +static_always_inline int nat_6t_flow_icmp_translate (snat_main_t *sm, + vlib_buffer_t *b, + ip4_header_t *ip, + nat_6t_flow_t *f); + +static_always_inline void +nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + nat_6t_flow_t *f, nat_protocol_t proto, + int is_icmp_inner_ip4) +{ + udp_header_t *udp = ip4_next_header (ip); + tcp_header_t *tcp = (tcp_header_t *) udp; + + if ((NAT_PROTOCOL_TCP == proto || NAT_PROTOCOL_UDP == proto) && + !vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + if (!is_icmp_inner_ip4) + { // regular case + ip->src_address = f->rewrite.saddr; + ip->dst_address = f->rewrite.daddr; + udp->src_port = f->rewrite.sport; + udp->dst_port = f->rewrite.dport; + } + else + { // icmp inner ip4 - reversed saddr/daddr + ip->src_address = f->rewrite.daddr; + ip->dst_address = f->rewrite.saddr; + udp->src_port = f->rewrite.dport; + udp->dst_port = f->rewrite.sport; + } + + if (NAT_PROTOCOL_TCP == proto) + { + ip_csum_t tcp_sum = tcp->checksum; + tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta); + tcp_sum = ip_csum_sub_even (tcp_sum, f->l4_csum_delta); + mss_clamping (sm->mss_clamping, tcp, &tcp_sum); + tcp->checksum = ip_csum_fold (tcp_sum); + } + else if (proto == NAT_PROTOCOL_UDP && udp->checksum) + { + ip_csum_t udp_sum = udp->checksum; + udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta); + udp_sum = ip_csum_sub_even (udp_sum, f->l4_csum_delta); + udp->checksum = ip_csum_fold (udp_sum); + } + } + else + { + if (!is_icmp_inner_ip4) + { // regular case + ip->src_address = f->rewrite.saddr; + ip->dst_address = f->rewrite.daddr; + } + else + { // icmp inner ip4 - reversed saddr/daddr + ip->src_address = f->rewrite.daddr; + ip->dst_address = f->rewrite.saddr; + } + } + + ip_csum_t ip_sum = ip->checksum; + ip_sum = ip_csum_sub_even (ip_sum, f->l3_csum_delta); + ip->checksum = ip_csum_fold (ip_sum); + ASSERT (ip->checksum == ip4_header_checksum (ip)); +} + +static_always_inline int +nat_6t_flow_icmp_translate (snat_main_t *sm, vlib_buffer_t *b, + ip4_header_t *ip, nat_6t_flow_t *f) +{ + if (IP_PROTOCOL_ICMP != ip->protocol) + return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED; + + icmp46_header_t *icmp = ip4_next_header (ip); + icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); + + if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment)) + { + if (icmp->checksum == 0) + icmp->checksum = 0xffff; + + if (!icmp_type_is_error_message (icmp->type)) + { + if ((f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) && + (f->rewrite.icmp_id != echo->identifier)) + { + ip_csum_t sum = icmp->checksum; + sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id, + icmp_echo_header_t, + identifier /* changed member */); + echo->identifier = f->rewrite.icmp_id; + icmp->checksum = ip_csum_fold (sum); + } + } + else + { + // errors are not fragmented + ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); + + if (!ip4_header_checksum_is_valid (inner_ip)) + { + return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED; + } + + nat_protocol_t inner_proto = + ip_proto_to_nat_proto (inner_ip->protocol); + + ip_csum_t icmp_sum = icmp->checksum; + + switch (inner_proto) + { + case NAT_PROTOCOL_UDP: + case NAT_PROTOCOL_TCP: + nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto, + 1 /* is_icmp_inner_ip4 */); + icmp_sum = ip_csum_sub_even (icmp_sum, f->l3_csum_delta); + icmp->checksum = ip_csum_fold (icmp_sum); + break; + case NAT_PROTOCOL_ICMP: + if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) + { + icmp46_header_t *inner_icmp = ip4_next_header (inner_ip); + icmp_echo_header_t *inner_echo = + (icmp_echo_header_t *) (inner_icmp + 1); + if (f->rewrite.icmp_id != inner_echo->identifier) + { + ip_csum_t sum = icmp->checksum; + sum = ip_csum_update ( + sum, inner_echo->identifier, f->rewrite.icmp_id, + icmp_echo_header_t, identifier /* changed member */); + icmp->checksum = ip_csum_fold (sum); + ip_csum_t inner_sum = inner_icmp->checksum; + inner_sum = ip_csum_update ( + sum, inner_echo->identifier, f->rewrite.icmp_id, + icmp_echo_header_t, identifier /* changed member */); + inner_icmp->checksum = ip_csum_fold (inner_sum); + inner_echo->identifier = f->rewrite.icmp_id; + } + } + break; + default: + clib_warning ("unexpected NAT protocol value `%d'", inner_proto); + return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED; + } + } + } + return NAT_ED_TRNSL_ERR_SUCCESS; +} + +nat_translation_error_e +nat_6t_flow_buf_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + nat_6t_flow_t *f, nat_protocol_t proto, + int is_output_feature) +{ + if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE) + { + vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index; + } + + nat_6t_flow_ip4_translate (sm, b, ip, f, proto, 0 /* is_icmp_inner_ip4 */); + + if (NAT_PROTOCOL_ICMP == proto) + { + return nat_6t_flow_icmp_translate (sm, b, ip, f); + } + + return NAT_ED_TRNSL_ERR_SUCCESS; +} + +u8 * +format_nat_6t (u8 *s, va_list *args) +{ + nat_6t_t *t = va_arg (*args, nat_6t_t *); + + s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u", + format_ip4_address, t->saddr.as_u8, + clib_net_to_host_u16 (t->sport), format_ip4_address, + t->daddr.as_u8, clib_net_to_host_u16 (t->dport), + format_ip_protocol, t->proto, t->fib_index); + return s; +} + +u8 * +format_nat_ed_translation_error (u8 *s, va_list *args) +{ + nat_translation_error_e e = va_arg (*args, nat_translation_error_e); + + switch (e) + { + case NAT_ED_TRNSL_ERR_SUCCESS: + s = format (s, "success"); + break; + case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED: + s = format (s, "translation-failed"); + break; + case NAT_ED_TRNSL_ERR_FLOW_MISMATCH: + s = format (s, "flow-mismatch"); + break; + } + return s; +} + +u8 * +format_nat_6t_flow (u8 *s, va_list *args) +{ + nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *); + + s = format (s, "match: %U ", format_nat_6t, &f->match); + int r = 0; + if (f->ops & NAT_FLOW_OP_SADDR_REWRITE) + { + s = format (s, "rewrite: saddr %U ", format_ip4_address, + f->rewrite.saddr.as_u8); + r = 1; + } + if (f->ops & NAT_FLOW_OP_SPORT_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport)); + } + if (f->ops & NAT_FLOW_OP_DADDR_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8); + } + if (f->ops & NAT_FLOW_OP_DPORT_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport)); + } + if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id)); + } + if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "txfib %u ", f->rewrite.fib_index); + } + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ed/nat44_ed.h b/src/plugins/nat/nat44-ed/nat44_ed.h new file mode 100644 index 00000000000..10d1207fec8 --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed.h @@ -0,0 +1,1183 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file nat.c + * NAT plugin global declarations + */ +#ifndef __included_nat44_ed_h__ +#define __included_nat44_ed_h__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* default number of worker handoff frame queue elements */ +#define NAT_FQ_NELTS_DEFAULT 64 + +/* NAT buffer flags */ +#define SNAT_FLAG_HAIRPINNING (1 << 0) + +/* NAT44 API Configuration flags */ +#define foreach_nat44_config_flag \ + _(0x00, IS_ENDPOINT_INDEPENDENT) \ + _(0x01, IS_ENDPOINT_DEPENDENT) \ + _(0x02, IS_STATIC_MAPPING_ONLY) \ + _(0x04, IS_CONNECTION_TRACKING) \ + _(0x08, IS_OUT2IN_DPO) + +typedef enum nat44_config_flags_t_ +{ +#define _(n,f) NAT44_API_##f = n, + foreach_nat44_config_flag +#undef _ +} nat44_config_flags_t; + +typedef struct +{ + /* nat44 plugin features */ + u8 static_mapping_only; + u8 connection_tracking; + + u32 inside_vrf; + u32 outside_vrf; + + /* maximum number of sessions */ + u32 sessions; + +} nat44_config_t; + +typedef enum +{ + NAT_NEXT_DROP, + NAT_NEXT_ICMP_ERROR, + NAT_NEXT_IN2OUT_ED_FAST_PATH, + NAT_NEXT_IN2OUT_ED_SLOW_PATH, + NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH, + NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH, + NAT_NEXT_OUT2IN_ED_FAST_PATH, + NAT_NEXT_OUT2IN_ED_SLOW_PATH, + NAT_NEXT_IN2OUT_CLASSIFY, + NAT_NEXT_OUT2IN_CLASSIFY, + NAT_N_NEXT, +} nat_next_t; + +typedef struct +{ + u32 next_index; + u32 arc_next_index; +} nat_pre_trace_t; + +/* External address and port allocation modes */ +#define foreach_nat_addr_and_port_alloc_alg \ + _(0, DEFAULT, "default") \ + _(1, MAPE, "map-e") \ + _(2, RANGE, "port-range") + +typedef enum +{ +#define _(v, N, s) NAT_ADDR_AND_PORT_ALLOC_ALG_##N = v, + foreach_nat_addr_and_port_alloc_alg +#undef _ +} nat_addr_and_port_alloc_alg_t; + +/* Session state */ +#define foreach_snat_session_state \ + _(0, UNKNOWN, "unknown") \ + _(1, UDP_ACTIVE, "udp-active") \ + _(2, TCP_SYN_SENT, "tcp-syn-sent") \ + _(3, TCP_ESTABLISHED, "tcp-established") \ + _(4, TCP_FIN_WAIT, "tcp-fin-wait") \ + _(5, TCP_CLOSE_WAIT, "tcp-close-wait") \ + _(6, TCP_CLOSING, "tcp-closing") \ + _(7, TCP_LAST_ACK, "tcp-last-ack") \ + _(8, TCP_CLOSED, "tcp-closed") \ + _(9, ICMP_ACTIVE, "icmp-active") + +typedef enum +{ +#define _(v, N, s) SNAT_SESSION_##N = v, + foreach_snat_session_state +#undef _ +} snat_session_state_t; + +#define foreach_nat_in2out_ed_error \ +_(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ +_(OUT_OF_PORTS, "out of ports") \ +_(BAD_ICMP_TYPE, "unsupported ICMP type") \ +_(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \ +_(NON_SYN, "non-SYN packet try to create session") \ +_(TCP_CLOSED, "drops due to TCP in transitory timeout") + +typedef enum +{ +#define _(sym,str) NAT_IN2OUT_ED_ERROR_##sym, + foreach_nat_in2out_ed_error +#undef _ + NAT_IN2OUT_ED_N_ERROR, +} nat_in2out_ed_error_t; + +#define foreach_nat_out2in_ed_error \ + _ (UNSUPPORTED_PROTOCOL, "unsupported protocol") \ + _ (OUT_OF_PORTS, "out of ports") \ + _ (BAD_ICMP_TYPE, "unsupported ICMP type") \ + _ (NO_TRANSLATION, "no translation") \ + _ (MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \ + _ (NON_SYN, "non-SYN packet try to create session") \ + _ (TCP_CLOSED, "drops due to TCP in transitory timeout") \ + _ (HASH_ADD_FAILED, "hash table add failed") + +typedef enum +{ +#define _(sym,str) NAT_OUT2IN_ED_ERROR_##sym, + foreach_nat_out2in_ed_error +#undef _ + NAT_OUT2IN_ED_N_ERROR, +} nat_out2in_ed_error_t; + + +/* Endpoint dependent TCP session state */ +#define NAT44_SES_I2O_FIN 1 +#define NAT44_SES_O2I_FIN 2 +#define NAT44_SES_I2O_FIN_ACK 4 +#define NAT44_SES_O2I_FIN_ACK 8 +#define NAT44_SES_I2O_SYN 16 +#define NAT44_SES_O2I_SYN 32 +#define NAT44_SES_RST 64 + +/* Session flags */ +#define SNAT_SESSION_FLAG_STATIC_MAPPING (1 << 0) +#define SNAT_SESSION_FLAG_UNKNOWN_PROTO (1 << 1) +#define SNAT_SESSION_FLAG_LOAD_BALANCING (1 << 2) +#define SNAT_SESSION_FLAG_TWICE_NAT (1 << 3) +#define SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT (1 << 4) +#define SNAT_SESSION_FLAG_FWD_BYPASS (1 << 5) +#define SNAT_SESSION_FLAG_AFFINITY (1 << 6) +#define SNAT_SESSION_FLAG_EXACT_ADDRESS (1 << 7) +#define SNAT_SESSION_FLAG_HAIRPINNING (1 << 8) + +/* NAT interface flags */ +#define NAT_INTERFACE_FLAG_IS_INSIDE 1 +#define NAT_INTERFACE_FLAG_IS_OUTSIDE 2 + +/* Static mapping flags */ +#define NAT_STATIC_MAPPING_FLAG_ADDR_ONLY 1 +#define NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY 2 +#define NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT 4 +#define NAT_STATIC_MAPPING_FLAG_LB 8 +#define NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS 16 + +/* *INDENT-OFF* */ +typedef CLIB_PACKED(struct +{ + // number of sessions in this vrf + u32 ses_count; + + u32 rx_fib_index; + u32 tx_fib_index; + + // is this vrf expired + u8 expired; +}) per_vrf_sessions_t; +/* *INDENT-ON* */ + +typedef struct +{ + ip4_address_t saddr, daddr; + u32 fib_index; + u16 sport, dport; + u16 icmp_id; + u8 proto; +} nat_6t_t; + +typedef struct +{ +#define NAT_FLOW_OP_SADDR_REWRITE (1 << 1) +#define NAT_FLOW_OP_SPORT_REWRITE (1 << 2) +#define NAT_FLOW_OP_DADDR_REWRITE (1 << 3) +#define NAT_FLOW_OP_DPORT_REWRITE (1 << 4) +#define NAT_FLOW_OP_ICMP_ID_REWRITE (1 << 5) +#define NAT_FLOW_OP_TXFIB_REWRITE (1 << 6) + int ops; + nat_6t_t match; + nat_6t_t rewrite; + uword l3_csum_delta; + uword l4_csum_delta; +} nat_6t_flow_t; + +void nat44_ed_forwarding_enable_disable (u8 is_enable); + +always_inline void +nat_6t_flow_saddr_rewrite_set (nat_6t_flow_t *f, u32 saddr) +{ + f->ops |= NAT_FLOW_OP_SADDR_REWRITE; + f->rewrite.saddr.as_u32 = saddr; +} + +always_inline void +nat_6t_flow_daddr_rewrite_set (nat_6t_flow_t *f, u32 daddr) +{ + f->ops |= NAT_FLOW_OP_DADDR_REWRITE; + f->rewrite.daddr.as_u32 = daddr; +} + +always_inline void +nat_6t_flow_sport_rewrite_set (nat_6t_flow_t *f, u32 sport) +{ + f->ops |= NAT_FLOW_OP_SPORT_REWRITE; + f->rewrite.sport = sport; +} + +always_inline void +nat_6t_flow_dport_rewrite_set (nat_6t_flow_t *f, u32 dport) +{ + f->ops |= NAT_FLOW_OP_DPORT_REWRITE; + f->rewrite.dport = dport; +} + +always_inline void +nat_6t_flow_txfib_rewrite_set (nat_6t_flow_t *f, u32 tx_fib_index) +{ + f->ops |= NAT_FLOW_OP_TXFIB_REWRITE; + f->rewrite.fib_index = tx_fib_index; +} + +always_inline void +nat_6t_flow_icmp_id_rewrite_set (nat_6t_flow_t *f, u16 id) +{ + f->ops |= NAT_FLOW_OP_ICMP_ID_REWRITE; + f->rewrite.icmp_id = id; +} + +/* *INDENT-OFF* */ +typedef CLIB_PACKED(struct +{ + /* Outside network tuple */ + struct + { + ip4_address_t addr; + u32 fib_index; + u16 port; + } out2in; + + /* Inside network tuple */ + struct + { + ip4_address_t addr; + u32 fib_index; + u16 port; + } in2out; + + nat_protocol_t nat_proto; + + nat_6t_flow_t i2o; + nat_6t_flow_t o2i; + + /* Flags */ + u32 flags; + + /* head of LRU list in which this session is tracked */ + u32 lru_head_index; + /* index in global LRU list */ + u32 lru_index; + f64 last_lru_update; + + /* Last heard timer */ + f64 last_heard; + + /* Last HA refresh */ + f64 ha_last_refreshed; + + /* Counters */ + u64 total_bytes; + u32 total_pkts; + + /* External host address and port */ + ip4_address_t ext_host_addr; + u16 ext_host_port; + + /* External host address and port after translation */ + ip4_address_t ext_host_nat_addr; + u16 ext_host_nat_port; + + /* TCP session state */ + u8 state; + u32 i2o_fin_seq; + u32 o2i_fin_seq; + u64 tcp_closed_timestamp; + + /* per vrf sessions index */ + u32 per_vrf_sessions_index; + +}) snat_session_t; +/* *INDENT-ON* */ + +typedef struct +{ + ip4_address_t addr; + u32 fib_index; +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + u32 busy_##n##_ports; \ + u32 * busy_##n##_ports_per_thread; \ + u32 busy_##n##_port_refcounts[65535]; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ +} snat_address_t; + +typedef struct +{ + u32 fib_index; + u32 ref_count; +} nat_fib_t; + +typedef struct +{ + u32 fib_index; + u32 refcount; +} nat_outside_fib_t; + +typedef struct +{ + /* backend IP address */ + ip4_address_t addr; + /* backend port number */ + u16 port; + /* probability of the backend to be randomly matched */ + u8 probability; + u8 prefix; + /* backend FIB table */ + u32 vrf_id; + u32 fib_index; +} nat44_lb_addr_port_t; + +typedef enum +{ + /* twice-nat disabled */ + TWICE_NAT_DISABLED, + /* twice-nat enabled */ + TWICE_NAT, + /* twice-nat only when src IP equals dst IP after translation */ + TWICE_NAT_SELF, +} twice_nat_type_t; + +typedef enum +{ + /* no load-balancing */ + NO_LB_NAT, + /* load-balancing */ + LB_NAT, + /* load-balancing with affinity */ + AFFINITY_LB_NAT, +} lb_nat_type_t; + +typedef struct +{ + /* prefered pool address */ + ip4_address_t pool_addr; + /* local IP address */ + ip4_address_t local_addr; + /* external IP address */ + ip4_address_t external_addr; + /* local port */ + u16 local_port; + /* external port */ + u16 external_port; + /* is twice-nat */ + twice_nat_type_t twice_nat; + /* local FIB table */ + u32 vrf_id; + u32 fib_index; + /* protocol */ + nat_protocol_t proto; + /* 0 = disabled, otherwise client IP affinity sticky time in seconds */ + u32 affinity; + /* worker threads used by backends/local host */ + u32 *workers; + /* opaque string tag */ + u8 *tag; + /* backends for load-balancing mode */ + nat44_lb_addr_port_t *locals; + /* affinity per service lis */ + u32 affinity_per_service_list_head_index; + /* flags */ + u32 flags; +} snat_static_mapping_t; + +typedef struct +{ + u32 sw_if_index; + u8 flags; +} snat_interface_t; + +typedef struct +{ + ip4_address_t l_addr; + ip4_address_t pool_addr; + u16 l_port; + u16 e_port; + u32 sw_if_index; + u32 vrf_id; + nat_protocol_t proto; + u32 flags; + int addr_only; + int twice_nat; + int out2in_only; + int identity_nat; + int exact; + u8 *tag; +} snat_static_map_resolve_t; + +typedef struct +{ + /* Session pool */ + snat_session_t *sessions; + + /* Pool of doubly-linked list elements */ + dlist_elt_t *list_pool; + + /* LRU session list - head is stale, tail is fresh */ + dlist_elt_t *lru_pool; + u32 tcp_trans_lru_head_index; + u32 tcp_estab_lru_head_index; + u32 udp_lru_head_index; + u32 icmp_lru_head_index; + u32 unk_proto_lru_head_index; + + /* NAT thread index */ + u32 snat_thread_index; + + /* real thread index */ + u32 thread_index; + + per_vrf_sessions_t *per_vrf_sessions_vec; + +} snat_main_per_thread_data_t; + +struct snat_main_s; + +/* Return worker thread index for given packet */ +typedef u32 (snat_get_worker_in2out_function_t) (ip4_header_t * ip, + u32 rx_fib_index, + u8 is_output); + +typedef u32 (snat_get_worker_out2in_function_t) (vlib_buffer_t * b, + ip4_header_t * ip, + u32 rx_fib_index, + u8 is_output); + +/* NAT address and port allocation function */ +typedef int (nat_alloc_out_addr_and_port_function_t) (snat_address_t * + addresses, + u32 fib_index, + u32 thread_index, + nat_protocol_t proto, + ip4_address_t * addr, + u16 * port, + u16 port_per_thread, + u32 snat_thread_index); + +typedef struct snat_main_s +{ + /* Thread settings */ + u32 num_workers; + u32 first_worker_index; + u32 *workers; + snat_get_worker_in2out_function_t *worker_in2out_cb; + snat_get_worker_out2in_function_t *worker_out2in_cb; + u16 port_per_thread; + + /* Per thread data */ + snat_main_per_thread_data_t *per_thread_data; + + /* Find a static mapping by local */ + clib_bihash_8_8_t static_mapping_by_local; + + /* Find a static mapping by external */ + clib_bihash_8_8_t static_mapping_by_external; + + /* Static mapping pool */ + snat_static_mapping_t *static_mappings; + + /* Endpoint independent lookup tables */ + clib_bihash_8_8_t in2out; + clib_bihash_8_8_t out2in; + + /* Endpoint dependent lookup table */ + clib_bihash_16_8_t flow_hash; + + /* Interface pool */ + snat_interface_t *interfaces; + snat_interface_t *output_feature_interfaces; + + /* Vector of outside addresses */ + snat_address_t *addresses; + /* Address and port allocation function */ + nat_alloc_out_addr_and_port_function_t *alloc_addr_and_port; + /* Address and port allocation type */ + nat_addr_and_port_alloc_alg_t addr_and_port_alloc_alg; + /* Port set parameters (MAP-E) */ + u8 psid_offset; + u8 psid_length; + u16 psid; + /* Port range parameters */ + u16 start_port; + u16 end_port; + + /* vector of fibs */ + nat_fib_t *fibs; + + /* vector of outside fibs */ + nat_outside_fib_t *outside_fibs; + + /* Vector of twice NAT addresses for external hosts */ + snat_address_t *twice_nat_addresses; + + /* sw_if_indices whose intfc addresses should be auto-added */ + u32 *auto_add_sw_if_indices; + u32 *auto_add_sw_if_indices_twice_nat; + + /* vector of interface address static mappings to resolve. */ + snat_static_map_resolve_t *to_resolve; + + /* Randomize port allocation order */ + u32 random_seed; + + /* Worker handoff frame-queue index */ + u32 fq_in2out_index; + u32 fq_in2out_output_index; + u32 fq_out2in_index; + + u32 out2in_node_index; + u32 in2out_node_index; + u32 in2out_output_node_index; + + nat44_config_t rconfig; + //nat44_config_t cconfig; + + /* If forwarding is enabled */ + u8 forwarding_enabled; + + /* static mapping config */ + u8 static_mapping_only; + u8 static_mapping_connection_tracking; + + /* Is translation memory size calculated or user defined */ + u8 translation_memory_size_set; + + u32 translation_buckets; + u32 max_translations_per_thread; + u32 *max_translations_per_fib; + + u32 outside_vrf_id; + u32 outside_fib_index; + u32 inside_vrf_id; + u32 inside_fib_index; + + nat_timeouts_t timeouts; + + /* TCP MSS clamping */ + u16 mss_clamping; + + /* counters */ + vlib_simple_counter_main_t total_sessions; + +#define _(x) vlib_simple_counter_main_t x; + struct + { + struct + { + struct + { + foreach_nat_counter; + } in2out; + + struct + { + foreach_nat_counter; + } out2in; + } fastpath; + + struct + { + struct + { + foreach_nat_counter; + } in2out; + + struct + { + foreach_nat_counter; + } out2in; + } slowpath; + + vlib_simple_counter_main_t hairpinning; + } counters; +#undef _ + + /* API message ID base */ + u16 msg_id_base; + + /* log class */ + vlib_log_class_t log_class; + /* logging level */ + u8 log_level; + + /* convenience */ + api_main_t *api_main; + ip4_main_t *ip4_main; + ip_lookup_main_t *ip4_lookup_main; + + fib_source_t fib_src_hi; + fib_source_t fib_src_low; + + /* pat - dynamic mapping enabled or conneciton tracking */ + u8 pat; + + /* number of worker handoff frame queue elements */ + u32 frame_queue_nelts; + + /* nat44 plugin enabled */ + u8 enabled; + + vnet_main_t *vnet_main; + +} snat_main_t; + +typedef struct +{ + u32 thread_index; + f64 now; +} nat44_is_idle_session_ctx_t; + +typedef struct +{ + u32 cached_sw_if_index; + u32 cached_ip4_address; +} snat_runtime_t; + +extern snat_main_t snat_main; + +// nat pre ed next_node feature classification +extern vlib_node_registration_t nat_default_node; +extern vlib_node_registration_t nat_pre_in2out_node; +extern vlib_node_registration_t nat_pre_out2in_node; + +extern vlib_node_registration_t snat_in2out_node; +extern vlib_node_registration_t snat_in2out_output_node; +extern vlib_node_registration_t snat_out2in_node; +extern vlib_node_registration_t snat_in2out_worker_handoff_node; +extern vlib_node_registration_t snat_in2out_output_worker_handoff_node; +extern vlib_node_registration_t snat_out2in_worker_handoff_node; +extern vlib_node_registration_t nat44_ed_in2out_node; +extern vlib_node_registration_t nat44_ed_in2out_output_node; +extern vlib_node_registration_t nat44_ed_out2in_node; + +extern fib_source_t nat_fib_src_hi; +extern fib_source_t nat_fib_src_low; + +/* format functions */ +format_function_t format_snat_static_mapping; +format_function_t format_snat_static_map_to_resolve; +format_function_t format_snat_session; +format_function_t format_snat_key; +format_function_t format_static_mapping_key; +format_function_t format_nat_protocol; +format_function_t format_nat_addr_and_port_alloc_alg; +/* unformat functions */ +unformat_function_t unformat_nat_protocol; + +/** \brief Check if SNAT session is created from static mapping. + @param s SNAT session + @return 1 if SNAT session is created from static mapping otherwise 0 +*/ +#define snat_is_session_static(s) (s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING) + +/** \brief Check if SNAT session for unknown protocol. + @param s SNAT session + @return 1 if SNAT session for unknown protocol otherwise 0 +*/ +#define snat_is_unk_proto_session(s) (s->flags & SNAT_SESSION_FLAG_UNKNOWN_PROTO) + +/** \brief Check if NAT session is twice NAT. + @param s NAT session + @return 1 if NAT session is twice NAT +*/ +#define is_twice_nat_session(s) (s->flags & SNAT_SESSION_FLAG_TWICE_NAT) + +/** \brief Check if NAT session is load-balancing. + @param s NAT session + @return 1 if NAT session is load-balancing +*/ +#define is_lb_session(s) (s->flags & SNAT_SESSION_FLAG_LOAD_BALANCING) + +/** \brief Check if NAT session is forwarding bypass. + @param s NAT session + @return 1 if NAT session is load-balancing +*/ +#define is_fwd_bypass_session(s) (s->flags & SNAT_SESSION_FLAG_FWD_BYPASS) + +/** \brief Check if NAT session is endpoint dependent. + @param s NAT session + @return 1 if NAT session is endpoint dependent +*/ +#define is_ed_session(s) (s->flags & SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT) + +/** \brief Check if NAT session has affinity record. + @param s NAT session + @return 1 if NAT session has affinity record +*/ +#define is_affinity_sessions(s) (s->flags & SNAT_SESSION_FLAG_AFFINITY) + +/** \brief Check if exact pool address should be used. + @param s SNAT session + @return 1 if exact pool address or 0 +*/ +#define is_exact_address_session(s) (s->flags & SNAT_SESSION_FLAG_EXACT_ADDRESS) + +/** \brief Check if NAT interface is inside. + @param i NAT interface + @return 1 if inside interface +*/ +#define nat_interface_is_inside(i) i->flags & NAT_INTERFACE_FLAG_IS_INSIDE + +/** \brief Check if NAT interface is outside. + @param i NAT interface + @return 1 if outside interface +*/ +#define nat_interface_is_outside(i) i->flags & NAT_INTERFACE_FLAG_IS_OUTSIDE + +/** \brief Check if NAT44 endpoint-dependent TCP session is closed. + @param s NAT session + @return 1 if session is closed +*/ +#define nat44_is_ses_closed(s) s->state == 0xf + +/** \brief Check if NAT static mapping is address only (1:1NAT). + @param sm NAT static mapping + @return 1 if 1:1NAT, 0 if 1:1NAPT +*/ +#define is_addr_only_static_mapping(sm) (sm->flags & NAT_STATIC_MAPPING_FLAG_ADDR_ONLY) + +/** \brief Check if NAT static mapping match only out2in direction. + @param sm NAT static mapping + @return 1 if rule match only out2in direction +*/ +#define is_out2in_only_static_mapping(sm) (sm->flags & NAT_STATIC_MAPPING_FLAG_OUT2IN_ONLY) + +/** \brief Check if NAT static mapping is identity NAT. + @param sm NAT static mapping + @return 1 if identity NAT +*/ +#define is_identity_static_mapping(sm) (sm->flags & NAT_STATIC_MAPPING_FLAG_IDENTITY_NAT) + +/** \brief Check if NAT static mapping is load-balancing. + @param sm NAT static mapping + @return 1 if load-balancing +*/ +#define is_lb_static_mapping(sm) (sm->flags & NAT_STATIC_MAPPING_FLAG_LB) + +/** \brief Check if exact pool address should be used. + @param s SNAT session + @return 1 if exact pool address or 0 +*/ +#define is_exact_address(s) (s->flags & NAT_STATIC_MAPPING_FLAG_EXACT_ADDRESS) + +/** \brief Check if client initiating TCP connection (received SYN from client) + @param t TCP header + @return 1 if client initiating TCP connection +*/ +always_inline bool +tcp_flags_is_init (u8 f) +{ + return (f & TCP_FLAG_SYN) && !(f & TCP_FLAG_ACK); +} + +/* logging */ +#define nat_log_err(...) \ + vlib_log(VLIB_LOG_LEVEL_ERR, snat_main.log_class, __VA_ARGS__) +#define nat_log_warn(...) \ + vlib_log(VLIB_LOG_LEVEL_WARNING, snat_main.log_class, __VA_ARGS__) +#define nat_log_notice(...) \ + vlib_log(VLIB_LOG_LEVEL_NOTICE, snat_main.log_class, __VA_ARGS__) +#define nat_log_info(...) \ + vlib_log(VLIB_LOG_LEVEL_INFO, snat_main.log_class, __VA_ARGS__) +#define nat_log_debug(...)\ + vlib_log(VLIB_LOG_LEVEL_DEBUG, snat_main.log_class, __VA_ARGS__) + +/** + * @brief Enable NAT44 plugin + * + * @param c nat44_config_t + * + * @return 0 on success, non-zero value otherwise + */ +int nat44_plugin_enable (nat44_config_t c); + +/** + * @brief Disable NAT44 plugin + * + * @return 0 on success, non-zero value otherwise + */ +int nat44_plugin_disable (); + +/** + * @brief Add external address to NAT44 pool + * + * @param sm snat global configuration data + * @param addr IPv4 address + * @param vrf_id VRF id of tenant, ~0 means independent of VRF + * @param twice_nat 1 if twice NAT address + * + * @return 0 on success, non-zero value otherwise + */ +int snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id, + u8 twice_nat); + +/** + * @brief Delete external address from NAT44 pool + * + * @param sm snat global configuration data + * @param addr IPv4 address + * @param delete_sm 1 if delete static mapping using address + * @param twice_nat 1 if twice NAT address + * + * @return 0 on success, non-zero value otherwise + */ +int snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm, + u8 twice_nat); + +/** + * @brief Add/delete external address to FIB DPO (out2in DPO mode) + * + * @param addr IPv4 address + * @param is_add 1 = add, 0 = delete + * + * @return 0 on success, non-zero value otherwise + */ +void nat44_add_del_address_dpo (ip4_address_t addr, u8 is_add); + +/** + * @brief Add/delete NAT44 static mapping + * + * @param l_addr local IPv4 address + * @param e_addr external IPv4 address + * @param l_port local port number + * @param e_port external port number + * @param vrf_id local VRF ID + * @param addr_only 1 = 1:1NAT, 0 = 1:1NAPT + * @param sw_if_index use interface address as external IPv4 address + * @param proto L4 protocol + * @param is_add 1 = add, 0 = delete + * @param twice_nat twice-nat mode + * @param out2in_only if 1 rule match only out2in direction + * @param tag opaque string tag + * @param identity_nat identity NAT + * @param pool_addr pool IPv4 address + * @param exact 1 = exact pool address + * + * @return 0 on success, non-zero value otherwise + */ +int snat_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr, + u16 l_port, u16 e_port, u32 vrf_id, + int addr_only, u32 sw_if_index, + nat_protocol_t proto, int is_add, + twice_nat_type_t twice_nat, u8 out2in_only, + u8 * tag, u8 identity_nat, + ip4_address_t pool_addr, int exact); + +/** + * @brief Add/delete static mapping with load-balancing (multiple backends) + * + * @param e_addr external IPv4 address + * @param e_port external port number + * @param proto L4 protocol + * @param locals list of local backends + * @param is_add 1 = add, 0 = delete + * @param twice_nat twice-nat mode + * @param out2in_only if 1 rule match only out2in direction + * @param tag opaque string tag + * @param affinity 0 = disabled, otherwise client IP affinity sticky time + * + * @return 0 on success, non-zero value otherwise + */ +int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, + nat_protocol_t proto, + nat44_lb_addr_port_t * locals, u8 is_add, + twice_nat_type_t twice_nat, + u8 out2in_only, u8 * tag, u32 affinity); + +int nat44_lb_static_mapping_add_del_local (ip4_address_t e_addr, u16 e_port, + ip4_address_t l_addr, u16 l_port, + nat_protocol_t proto, u32 vrf_id, + u8 probability, u8 is_add); + +clib_error_t *nat44_api_hookup (vlib_main_t * vm); + +/** + * @brief Set NAT plugin workers + * + * @param bitmap NAT workers bitmap + * + * @return 0 on success, non-zero value otherwise + */ +int snat_set_workers (uword * bitmap); + +/** + * @brief Set NAT plugin number of frame queue elements + * + * @param frame_queue_nelts number of worker handoff frame queue elements + * + * @return 0 on success, non-zero value otherwise + */ +int snat_set_frame_queue_nelts (u32 frame_queue_nelts); + +/** + * @brief Enable/disable NAT44 feature on the interface + * + * @param sw_if_index software index of the interface + * @param is_inside 1 = inside, 0 = outside + * @param is_del 1 = delete, 0 = add + * + * @return 0 on success, non-zero value otherwise + */ +int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del); + +/** + * @brief Enable/disable NAT44 output feature on the interface (postrouting NAT) + * + * @param sw_if_index software index of the interface + * @param is_inside 1 = inside, 0 = outside + * @param is_del 1 = delete, 0 = add + * + * @return 0 on success, non-zero value otherwise + */ +int snat_interface_add_del_output_feature (u32 sw_if_index, u8 is_inside, + int is_del); + +/** + * @brief Add/delete NAT44 pool address from specific interface + * + * @param sw_if_index software index of the interface + * @param is_del 1 = delete, 0 = add + * @param twice_nat 1 = twice NAT address for external hosts + * + * @return 0 on success, non-zero value otherwise + */ +int snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del, + u8 twice_nat); + +/** + * @brief Delete NAT44 endpoint-dependent session + * + * @param sm snat global configuration data + * @param addr IPv4 address + * @param port L4 port number + * @param proto L4 protocol + * @param vrf_id VRF ID + * @param is_in 1 = inside network address and port pair, 0 = outside + * + * @return 0 on success, non-zero value otherwise + */ +int nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port, + ip4_address_t * eh_addr, u16 eh_port, u8 proto, + u32 vrf_id, int is_in); + +/** + * @brief Free NAT44 session data (lookup keys, external address port) + * + * @param sm snat global configuration data + * @param s NAT session + * @param thread_index thread index + * @param is_ha is HA event + */ +void nat_free_session_data (snat_main_t * sm, snat_session_t * s, + u32 thread_index, u8 is_ha); + +/** + * @brief Set NAT44 session limit (session limit, vrf id) + * + * @param session_limit Session limit + * @param vrf_id VRF id + * @return 0 on success, non-zero value otherwise + */ +int nat44_set_session_limit (u32 session_limit, u32 vrf_id); + +/** + * @brief Update NAT44 session limit flushing all data (session limit, vrf id) + * + * @param session_limit Session limit + * @param vrf_id VRF id + * @return 0 on success, non-zero value otherwise + */ +int nat44_update_session_limit (u32 session_limit, u32 vrf_id); + +/** + * @brief Free outside address and port pair + * + * @param addresses vector of outside addresses + * @param thread_index thread index + * @param key address, port and protocol + */ +void +snat_free_outside_address_and_port (snat_address_t * addresses, + u32 thread_index, + ip4_address_t * addr, + u16 port, nat_protocol_t protocol); + +void expire_per_vrf_sessions (u32 fib_index); + +/** + * @brief Match NAT44 static mapping. + * + * @param key address and port to match + * @param addr external/local address of the matched mapping + * @param port port of the matched mapping + * @param fib_index fib index of the matched mapping + * @param by_external if 0 match by local address otherwise match by external + * address + * @param is_addr_only 1 if matched mapping is address only + * @param twice_nat matched mapping is twice NAT type + * @param lb 1 if matched mapping is load-balanced + * @param ext_host_addr external host address + * @param is_identity_nat 1 if indentity mapping + * @param out if !=0 set to pointer of the mapping structure + * + * @returns 0 if match found otherwise 1. + */ +int snat_static_mapping_match (snat_main_t * sm, + ip4_address_t match_addr, + u16 match_port, + u32 match_fib_index, + nat_protocol_t match_protocol, + ip4_address_t * mapping_addr, + u16 * mapping_port, + u32 * mapping_fib_index, + u8 by_external, + u8 * is_addr_only, + twice_nat_type_t * twice_nat, + lb_nat_type_t * lb, + ip4_address_t * ext_host_addr, + u8 * is_identity_nat, + snat_static_mapping_t ** out); + +/** + * @brief Add/del NAT address to FIB. + * + * Add the external NAT address to the FIB as receive entries. This ensures + * that VPP will reply to ARP for this address and we don't need to enable + * proxy ARP on the outside interface. + * + * @param addr IPv4 address + * @param plen address prefix length + * @param sw_if_index software index of the outside interface + * @param is_add 0 = delete, 1 = add. + */ +void snat_add_del_addr_to_fib (ip4_address_t * addr, + u8 p_len, u32 sw_if_index, int is_add); + +#if 0 +void +nat_ha_sadd_ed_cb (ip4_address_t * in_addr, u16 in_port, + ip4_address_t * out_addr, u16 out_port, + ip4_address_t * eh_addr, u16 eh_port, + ip4_address_t * ehn_addr, u16 ehn_port, u8 proto, + u32 fib_index, u16 flags, u32 thread_index); + +void +nat_ha_sdel_ed_cb (ip4_address_t * out_addr, u16 out_port, + ip4_address_t * eh_addr, u16 eh_port, u8 proto, + u32 fib_index, u32 ti); + +void +nat_ha_sref_ed_cb (ip4_address_t * out_addr, u16 out_port, + ip4_address_t * eh_addr, u16 eh_port, u8 proto, + u32 fib_index, u32 total_pkts, u64 total_bytes, + u32 thread_index); +#endif + +int nat_set_outside_address_and_port (snat_address_t *addresses, + u32 thread_index, ip4_address_t addr, + u16 port, nat_protocol_t protocol); + +/* + * Why is this here? Because we don't need to touch this layer to + * simply reply to an icmp. We need to change id to a unique + * value to NAT an echo request/reply. + */ + +typedef struct +{ + u16 identifier; + u16 sequence; +} icmp_echo_header_t; + +typedef struct +{ + u16 src_port, dst_port; +} tcp_udp_header_t; + +u32 get_thread_idx_by_port (u16 e_port); + +u8 *format_static_mapping_kvp (u8 *s, va_list *args); + +u8 *format_session_kvp (u8 *s, va_list *args); + +u32 nat_calc_bihash_buckets (u32 n_elts); + +void nat44_addresses_free (snat_address_t **addresses); + +void nat44_ed_sessions_clear (); + +int nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts); + +typedef enum +{ + NAT_ED_TRNSL_ERR_SUCCESS = 0, + NAT_ED_TRNSL_ERR_TRANSLATION_FAILED = 1, + NAT_ED_TRNSL_ERR_FLOW_MISMATCH = 2, +} nat_translation_error_e; + +nat_translation_error_e +nat_6t_flow_buf_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + nat_6t_flow_t *f, nat_protocol_t proto, + int is_output_feature); + +void nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f); + +format_function_t format_nat_ed_translation_error; +format_function_t format_nat_6t_flow; +format_function_t format_ed_session_kvp; + +#endif /* __included_nat44_ed_h__ */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ed/nat44_ed_affinity.c b/src/plugins/nat/nat44-ed/nat44_ed_affinity.c new file mode 100644 index 00000000000..6debe401292 --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed_affinity.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief NAT plugin client-IP based session affinity for load-balancing + */ + +#include + +#include +#include + +nat_affinity_main_t nat_affinity_main; + +#define AFFINITY_HASH_BUCKETS 65536 +#define AFFINITY_HASH_MEMORY (2 << 25) + +u8 * +format_affinity_kvp (u8 * s, va_list * args) +{ + clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *); + nat_affinity_key_t k; + + k.as_u64[0] = v->key[0]; + k.as_u64[1] = v->key[1]; + + s = format (s, "client %U backend %U:%d proto %U index %llu", + format_ip4_address, &k.client_addr, + format_ip4_address, &k.service_addr, + clib_net_to_host_u16 (k.service_port), + format_nat_protocol, k.proto); + + return s; +} + +void +nat_affinity_enable () +{ + nat_affinity_main_t *nam = &nat_affinity_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + if (tm->n_vlib_mains > 1) + clib_spinlock_init (&nam->affinity_lock); + clib_bihash_init_16_8 (&nam->affinity_hash, "nat-affinity", + AFFINITY_HASH_BUCKETS, AFFINITY_HASH_MEMORY); + clib_bihash_set_kvp_format_fn_16_8 (&nam->affinity_hash, + format_affinity_kvp); +} + +void +nat_affinity_disable () +{ + nat_affinity_main_t *nam = &nat_affinity_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + if (tm->n_vlib_mains > 1) + clib_spinlock_free (&nam->affinity_lock); + clib_bihash_free_16_8 (&nam->affinity_hash); +} + +clib_error_t * +nat_affinity_init (vlib_main_t * vm) +{ + nat_affinity_main_t *nam = &nat_affinity_main; + nam->vlib_main = vm; + return 0; +} + +static_always_inline void +make_affinity_kv (clib_bihash_kv_16_8_t * kv, ip4_address_t client_addr, + ip4_address_t service_addr, u8 proto, u16 service_port) +{ + nat_affinity_key_t *key = (nat_affinity_key_t *) kv->key; + + key->client_addr = client_addr; + key->service_addr = service_addr; + key->proto = proto; + key->service_port = service_port; + + kv->value = ~0ULL; +} + +u32 +nat_affinity_get_per_service_list_head_index (void) +{ + nat_affinity_main_t *nam = &nat_affinity_main; + dlist_elt_t *head_elt; + + clib_spinlock_lock_if_init (&nam->affinity_lock); + + pool_get (nam->list_pool, head_elt); + clib_dlist_init (nam->list_pool, head_elt - nam->list_pool); + + clib_spinlock_unlock_if_init (&nam->affinity_lock); + + return head_elt - nam->list_pool; +} + +void +nat_affinity_flush_service (u32 affinity_per_service_list_head_index) +{ + snat_main_t *sm = &snat_main; + nat_affinity_main_t *nam = &nat_affinity_main; + u32 elt_index; + dlist_elt_t *elt; + nat_affinity_t *a; + clib_bihash_kv_16_8_t kv; + + clib_spinlock_lock_if_init (&nam->affinity_lock); + + while ((elt_index = + clib_dlist_remove_head (nam->list_pool, + affinity_per_service_list_head_index)) != + ~0) + { + elt = pool_elt_at_index (nam->list_pool, elt_index); + a = pool_elt_at_index (nam->affinity_pool, elt->value); + kv.key[0] = a->key.as_u64[0]; + kv.key[1] = a->key.as_u64[1]; + pool_put_index (nam->affinity_pool, elt->value); + if (clib_bihash_add_del_16_8 (&nam->affinity_hash, &kv, 0)) + nat_elog_warn (sm, "affinity key del failed"); + pool_put_index (nam->list_pool, elt_index); + } + pool_put_index (nam->list_pool, affinity_per_service_list_head_index); + + clib_spinlock_unlock_if_init (&nam->affinity_lock); +} + +int +nat_affinity_find_and_lock (ip4_address_t client_addr, + ip4_address_t service_addr, u8 proto, + u16 service_port, u8 * backend_index) +{ + snat_main_t *sm = &snat_main; + nat_affinity_main_t *nam = &nat_affinity_main; + clib_bihash_kv_16_8_t kv, value; + nat_affinity_t *a; + int rv = 0; + + make_affinity_kv (&kv, client_addr, service_addr, proto, service_port); + clib_spinlock_lock_if_init (&nam->affinity_lock); + if (clib_bihash_search_16_8 (&nam->affinity_hash, &kv, &value)) + { + rv = 1; + goto unlock; + } + + a = pool_elt_at_index (nam->affinity_pool, value.value); + /* if already expired delete */ + if (a->ref_cnt == 0) + { + if (a->expire < vlib_time_now (nam->vlib_main)) + { + clib_dlist_remove (nam->list_pool, a->per_service_index); + pool_put_index (nam->list_pool, a->per_service_index); + pool_put_index (nam->affinity_pool, value.value); + if (clib_bihash_add_del_16_8 (&nam->affinity_hash, &kv, 0)) + nat_elog_warn (sm, "affinity key del failed"); + rv = 1; + goto unlock; + } + } + a->ref_cnt++; + *backend_index = a->backend_index; + +unlock: + clib_spinlock_unlock_if_init (&nam->affinity_lock); + return rv; +} + +static int +affinity_is_expired_cb (clib_bihash_kv_16_8_t * kv, void *arg) +{ + snat_main_t *sm = &snat_main; + nat_affinity_main_t *nam = &nat_affinity_main; + nat_affinity_t *a; + + a = pool_elt_at_index (nam->affinity_pool, kv->value); + if (a->ref_cnt == 0) + { + if (a->expire < vlib_time_now (nam->vlib_main)) + { + clib_dlist_remove (nam->list_pool, a->per_service_index); + pool_put_index (nam->list_pool, a->per_service_index); + pool_put_index (nam->affinity_pool, kv->value); + if (clib_bihash_add_del_16_8 (&nam->affinity_hash, kv, 0)) + nat_elog_warn (sm, "affinity key del failed"); + return 1; + } + } + + return 0; +} + +int +nat_affinity_create_and_lock (ip4_address_t client_addr, + ip4_address_t service_addr, u8 proto, + u16 service_port, u8 backend_index, + u32 sticky_time, + u32 affinity_per_service_list_head_index) +{ + snat_main_t *sm = &snat_main; + nat_affinity_main_t *nam = &nat_affinity_main; + clib_bihash_kv_16_8_t kv, value; + nat_affinity_t *a; + dlist_elt_t *list_elt; + int rv = 0; + + make_affinity_kv (&kv, client_addr, service_addr, proto, service_port); + clib_spinlock_lock_if_init (&nam->affinity_lock); + if (!clib_bihash_search_16_8 (&nam->affinity_hash, &kv, &value)) + { + rv = 1; + nat_elog_notice (sm, "affinity key already exist"); + goto unlock; + } + + pool_get (nam->affinity_pool, a); + kv.value = a - nam->affinity_pool; + rv = + clib_bihash_add_or_overwrite_stale_16_8 (&nam->affinity_hash, &kv, + affinity_is_expired_cb, NULL); + if (rv) + { + nat_elog_notice (sm, "affinity key add failed"); + pool_put (nam->affinity_pool, a); + goto unlock; + } + + pool_get (nam->list_pool, list_elt); + clib_dlist_init (nam->list_pool, list_elt - nam->list_pool); + list_elt->value = a - nam->affinity_pool; + a->per_service_index = list_elt - nam->list_pool; + a->backend_index = backend_index; + a->ref_cnt = 1; + a->sticky_time = sticky_time; + a->key.as_u64[0] = kv.key[0]; + a->key.as_u64[1] = kv.key[1]; + clib_dlist_addtail (nam->list_pool, affinity_per_service_list_head_index, + a->per_service_index); + +unlock: + clib_spinlock_unlock_if_init (&nam->affinity_lock); + return rv; +} + +void +nat_affinity_unlock (ip4_address_t client_addr, ip4_address_t service_addr, + u8 proto, u16 service_port) +{ + nat_affinity_main_t *nam = &nat_affinity_main; + clib_bihash_kv_16_8_t kv, value; + nat_affinity_t *a; + + make_affinity_kv (&kv, client_addr, service_addr, proto, service_port); + clib_spinlock_lock_if_init (&nam->affinity_lock); + if (clib_bihash_search_16_8 (&nam->affinity_hash, &kv, &value)) + goto unlock; + + a = pool_elt_at_index (nam->affinity_pool, value.value); + a->ref_cnt--; + if (a->ref_cnt == 0) + a->expire = (u64) a->sticky_time + vlib_time_now (nam->vlib_main); + +unlock: + clib_spinlock_unlock_if_init (&nam->affinity_lock); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ed/nat44_ed_affinity.h b/src/plugins/nat/nat44-ed/nat44_ed_affinity.h new file mode 100644 index 00000000000..2cfa9d29eb0 --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed_affinity.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief NAT plugin client-IP based session affinity for load-balancing + */ + +#ifndef __included_nat44_ed_affinity_h__ +#define __included_nat44_ed_affinity_h__ + +#include +#include +#include + +typedef struct +{ + union + { + struct + { + ip4_address_t service_addr; + ip4_address_t client_addr; + /* align by making this 4 octets even though its a 1 octet field */ + u32 proto; + /* align by making this 4 octets even though its a 2 octets field */ + u32 service_port; + }; + u64 as_u64[2]; + }; +} nat_affinity_key_t; + +/* *INDENT-OFF* */ +typedef CLIB_PACKED(struct +{ + nat_affinity_key_t key; + u32 sticky_time; + u32 ref_cnt; + u32 per_service_index; + u8 backend_index; + f64 expire; +}) nat_affinity_t; +/* *INDENT-ON* */ + +typedef struct +{ + nat_affinity_t *affinity_pool; + clib_bihash_16_8_t affinity_hash; + clib_spinlock_t affinity_lock; + dlist_elt_t *list_pool; + vlib_main_t *vlib_main; +} nat_affinity_main_t; + +extern nat_affinity_main_t nat_affinity_main; + +/** + * @brief Get new affinity per service list head index. + * + * @returns new affinity per service list head index. + */ +u32 nat_affinity_get_per_service_list_head_index (void); + +/** + * @brief Flush all service affinity data. + * + * @param affinity_per_service_list_head_index Per sevice list head index. + */ +void nat_affinity_flush_service (u32 affinity_per_service_list_head_index); + +/** + * @brief NAT affinity enable + */ +void nat_affinity_enable (); + +/** + * @brief NAT affinity disable + */ +void nat_affinity_disable (); + +/** + * @brief Initialize NAT client-IP based affinity. + * + * @param vm vlib main. + * + * @return error code. + */ +clib_error_t *nat_affinity_init (vlib_main_t * vm); + +/** + * @brief Find service backend index for client-IP and take a reference + * counting lock. + * + * @param client_addr Client IP address. + * @param service_addr Service IP address. + * @param proto IP protocol number. + * @param service_port Service L4 port number. + * @param backend_index Service backend index for client-IP if found. + * + * @return 0 on success, non-zero value otherwise. + */ +int nat_affinity_find_and_lock (ip4_address_t client_addr, + ip4_address_t service_addr, u8 proto, + u16 service_port, u8 * backend_index); + +/** + * @brief Create affinity record and take reference counting lock. + * @param client_addr Client IP address. + * @param service_addr Service IP address. + * @param proto IP protocol number. + * @param service_port Service L4 port number. + * @param backend_index Service backend index for client-IP. + * @param sticky_time Affinity sticky time in seconds. + * @param affinity_per_service_list_head_index Per sevice list head index. + * + * @return 0 on success, non-zero value otherwise. + */ +int nat_affinity_create_and_lock (ip4_address_t client_addr, + ip4_address_t service_addr, u8 proto, + u16 service_port, u8 backend_index, + u32 sticky_time, + u32 affinity_per_service_list_head_index); +/** + * @brief Release a reference counting lock for affinity. + * + * @param client_addr Client IP address. + * @param service_addr Service IP address. + * @param proto IP protocol number. + */ +void nat_affinity_unlock (ip4_address_t client_addr, + ip4_address_t service_addr, u8 proto, + u16 service_port); + +#endif /* __included_nat44_ed_affinity_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ed/nat44_ed_api.c b/src/plugins/nat/nat44-ed/nat44_ed_api.c new file mode 100644 index 00000000000..ea812da9633 --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed_api.c @@ -0,0 +1,1541 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief NAT44 plugin API implementation + */ + +#include +#include + +#include + +#include +#include + +#include + +#include +#include + +#define REPLY_MSG_ID_BASE sm->msg_id_base +#include + +static void +vl_api_nat_control_ping_t_handler (vl_api_nat_control_ping_t * mp) +{ + vl_api_nat_control_ping_reply_t *rmp; + snat_main_t *sm = &snat_main; + int rv = 0; + + REPLY_MACRO2 (VL_API_NAT_CONTROL_PING_REPLY, + ({ + rmp->vpe_pid = ntohl (getpid ()); + })); +} + +static void +vl_api_nat_show_config_t_handler (vl_api_nat_show_config_t * mp) +{ + vl_api_nat_show_config_reply_t *rmp; + snat_main_t *sm = &snat_main; + int rv = 0; + + REPLY_MACRO2_ZERO (VL_API_NAT_SHOW_CONFIG_REPLY, + ({ + rmp->translation_buckets = htonl (sm->translation_buckets); + rmp->user_buckets = 0; + rmp->max_translations_per_user = 0; + rmp->outside_vrf_id = htonl (sm->outside_vrf_id); + rmp->inside_vrf_id = htonl (sm->inside_vrf_id); + rmp->static_mapping_only = sm->static_mapping_only; + rmp->static_mapping_connection_tracking = + sm->static_mapping_connection_tracking; + rmp->endpoint_dependent = 1; + rmp->out2in_dpo = 0; + })); +} + +static void +vl_api_nat_show_config_2_t_handler (vl_api_nat_show_config_2_t * mp) +{ + vl_api_nat_show_config_2_reply_t *rmp; + snat_main_t *sm = &snat_main; + int rv = 0; + + REPLY_MACRO2_ZERO (VL_API_NAT_SHOW_CONFIG_2_REPLY, + ({ + rmp->translation_buckets = htonl (sm->translation_buckets); + rmp->user_buckets = 0; + rmp->max_translations_per_user = 0; + rmp->outside_vrf_id = htonl (sm->outside_vrf_id); + rmp->inside_vrf_id = htonl (sm->inside_vrf_id); + rmp->static_mapping_only = sm->static_mapping_only; + rmp->static_mapping_connection_tracking = + sm->static_mapping_connection_tracking; + rmp->endpoint_dependent = 1; + rmp->out2in_dpo = 0; + rmp->max_translations_per_thread = + clib_net_to_host_u32 (sm->max_translations_per_thread); + rmp->max_users_per_thread = 0; + })); +} + +static void +vl_api_nat44_show_running_config_t_handler (vl_api_nat44_show_running_config_t + * mp) +{ + vl_api_nat44_show_running_config_reply_t *rmp; + snat_main_t *sm = &snat_main; + nat44_config_t *rc = &sm->rconfig; + int rv = 0; + + REPLY_MACRO2_ZERO (VL_API_NAT44_SHOW_RUNNING_CONFIG_REPLY, + ({ + rmp->inside_vrf = htonl (rc->inside_vrf); + rmp->outside_vrf = htonl (rc->outside_vrf); + + rmp->sessions = htonl (rc->sessions); + rmp->translation_buckets = htonl (sm->translation_buckets); + + // OBSOLETE + rmp->users = 0; + rmp->user_buckets = 0; + rmp->user_sessions = 0; + + rmp->timeouts.udp = htonl (sm->timeouts.udp); + rmp->timeouts.tcp_established = htonl (sm->timeouts.tcp.established); + rmp->timeouts.tcp_transitory = htonl (sm->timeouts.tcp.transitory); + rmp->timeouts.icmp = htonl (sm->timeouts.icmp); + + rmp->forwarding_enabled = sm->forwarding_enabled == 1; + // consider how to split functionality between subplugins + rmp->ipfix_logging_enabled = nat_ipfix_logging_enabled (); + rmp->flags |= NAT44_IS_ENDPOINT_DEPENDENT; + if (rc->static_mapping_only) + rmp->flags |= NAT44_IS_STATIC_MAPPING_ONLY; + if (rc->connection_tracking) + rmp->flags |= NAT44_IS_CONNECTION_TRACKING; + })); +} + +static void +vl_api_nat_set_workers_t_handler (vl_api_nat_set_workers_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_set_workers_reply_t *rmp; + int rv = 0; + uword *bitmap = 0; + u64 mask; + + mask = clib_net_to_host_u64 (mp->worker_mask); + + if (sm->num_workers < 2) + { + rv = VNET_API_ERROR_FEATURE_DISABLED; + goto send_reply; + } + + bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask)); + rv = snat_set_workers (bitmap); + clib_bitmap_free (bitmap); + +send_reply: + REPLY_MACRO (VL_API_NAT_SET_WORKERS_REPLY); +} + +static void +send_nat_worker_details (u32 worker_index, vl_api_registration_t * reg, + u32 context) +{ + vl_api_nat_worker_details_t *rmp; + snat_main_t *sm = &snat_main; + vlib_worker_thread_t *w = + vlib_worker_threads + worker_index + sm->first_worker_index; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_NAT_WORKER_DETAILS + sm->msg_id_base); + rmp->context = context; + rmp->worker_index = htonl (worker_index); + rmp->lcore_id = htonl (w->cpu_id); + strncpy ((char *) rmp->name, (char *) w->name, ARRAY_LEN (rmp->name) - 1); + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_nat_worker_dump_t_handler (vl_api_nat_worker_dump_t * mp) +{ + vl_api_registration_t *reg; + snat_main_t *sm = &snat_main; + u32 *worker_index; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + vec_foreach (worker_index, sm->workers) + { + send_nat_worker_details (*worker_index, reg, mp->context); + } +} + +static void +vl_api_nat44_session_cleanup_t_handler (vl_api_nat44_session_cleanup_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_session_cleanup_reply_t *rmp; + int rv = VNET_API_ERROR_UNSUPPORTED; + REPLY_MACRO (VL_API_NAT44_SESSION_CLEANUP_REPLY); +} + +static void +vl_api_nat44_set_session_limit_t_handler (vl_api_nat44_set_session_limit_t * + mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_set_session_limit_reply_t *rmp; + int rv = 0; + + rv = nat44_set_session_limit + (ntohl (mp->session_limit), ntohl (mp->vrf_id)); + + REPLY_MACRO (VL_API_NAT44_SET_SESSION_LIMIT_REPLY); +} + +static void +vl_api_nat_set_log_level_t_handler (vl_api_nat_set_log_level_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_set_log_level_reply_t *rmp; + int rv = 0; + + if (sm->log_level > NAT_LOG_DEBUG) + rv = VNET_API_ERROR_UNSUPPORTED; + else + sm->log_level = mp->log_level; + + REPLY_MACRO (VL_API_NAT_SET_WORKERS_REPLY); +} + +static void + vl_api_nat44_plugin_enable_disable_t_handler + (vl_api_nat44_plugin_enable_disable_t * mp) +{ + snat_main_t *sm = &snat_main; + nat44_config_t c = { 0 }; + vl_api_nat44_plugin_enable_disable_reply_t *rmp; + int rv = 0; + + if (mp->enable) + { + if (mp->flags & NAT44_API_IS_OUT2IN_DPO || mp->users || + mp->user_sessions) + { + rv = VNET_API_ERROR_UNSUPPORTED; + } + else + { + c.static_mapping_only = mp->flags & NAT44_API_IS_STATIC_MAPPING_ONLY; + c.connection_tracking = mp->flags & NAT44_API_IS_CONNECTION_TRACKING; + + c.inside_vrf = ntohl (mp->inside_vrf); + c.outside_vrf = ntohl (mp->outside_vrf); + + c.sessions = ntohl (mp->sessions); + + rv = nat44_plugin_enable (c); + } + } + else + { + rv = nat44_plugin_disable (); + } + + REPLY_MACRO (VL_API_NAT44_PLUGIN_ENABLE_DISABLE_REPLY); +} + +static void +vl_api_nat_ipfix_enable_disable_t_handler (vl_api_nat_ipfix_enable_disable_t * + mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_ipfix_enable_disable_reply_t *rmp; + int rv = 0; + + rv = nat_ipfix_logging_enable_disable (mp->enable, + clib_host_to_net_u32 + (mp->domain_id), + clib_host_to_net_u16 (mp->src_port)); + + REPLY_MACRO (VL_API_NAT_IPFIX_ENABLE_DISABLE_REPLY); +} + +static void +vl_api_nat_set_timeouts_t_handler (vl_api_nat_set_timeouts_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_set_timeouts_reply_t *rmp; + int rv = 0; + + sm->timeouts.udp = ntohl (mp->udp); + sm->timeouts.tcp.established = ntohl (mp->tcp_established); + sm->timeouts.tcp.transitory = ntohl (mp->tcp_transitory); + sm->timeouts.icmp = ntohl (mp->icmp); + + REPLY_MACRO (VL_API_NAT_SET_TIMEOUTS_REPLY); +} + +static void +vl_api_nat_get_timeouts_t_handler (vl_api_nat_get_timeouts_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_get_timeouts_reply_t *rmp; + int rv = 0; + + REPLY_MACRO2 (VL_API_NAT_GET_TIMEOUTS_REPLY, + ({ + rmp->udp = htonl (sm->timeouts.udp); + rmp->tcp_established = htonl (sm->timeouts.tcp.established); + rmp->tcp_transitory = htonl (sm->timeouts.tcp.transitory); + rmp->icmp = htonl (sm->timeouts.icmp); + })) +} + +static void + vl_api_nat_set_addr_and_port_alloc_alg_t_handler + (vl_api_nat_set_addr_and_port_alloc_alg_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_set_addr_and_port_alloc_alg_reply_t *rmp; + int rv = VNET_API_ERROR_UNSUPPORTED; + REPLY_MACRO (VL_API_NAT_SET_ADDR_AND_PORT_ALLOC_ALG_REPLY); +} + +static void + vl_api_nat_get_addr_and_port_alloc_alg_t_handler + (vl_api_nat_get_addr_and_port_alloc_alg_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_get_addr_and_port_alloc_alg_reply_t *rmp; + int rv = VNET_API_ERROR_UNSUPPORTED; + REPLY_MACRO (VL_API_NAT_GET_ADDR_AND_PORT_ALLOC_ALG_REPLY); +} + +static void +vl_api_nat_set_mss_clamping_t_handler (vl_api_nat_set_mss_clamping_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_set_mss_clamping_reply_t *rmp; + int rv = 0; + + if (mp->enable) + sm->mss_clamping = ntohs (mp->mss_value); + else + sm->mss_clamping = 0; + + REPLY_MACRO (VL_API_NAT_SET_MSS_CLAMPING_REPLY); +} + +static void +vl_api_nat_get_mss_clamping_t_handler (vl_api_nat_get_mss_clamping_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_get_mss_clamping_reply_t *rmp; + int rv = 0; + + REPLY_MACRO2 (VL_API_NAT_GET_MSS_CLAMPING_REPLY, + ({ + rmp->enable = sm->mss_clamping ? 1 : 0; + rmp->mss_value = htons (sm->mss_clamping); + })) +} + +static void +vl_api_nat_ha_set_listener_t_handler (vl_api_nat_ha_set_listener_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_ha_set_listener_reply_t *rmp; + int rv = VNET_API_ERROR_UNSUPPORTED; + REPLY_MACRO (VL_API_NAT_HA_SET_LISTENER_REPLY); +} + +static void +vl_api_nat_ha_get_listener_t_handler (vl_api_nat_ha_get_listener_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_ha_get_listener_reply_t *rmp; + int rv = VNET_API_ERROR_UNSUPPORTED; + REPLY_MACRO (VL_API_NAT_HA_GET_LISTENER_REPLY); +} + +static void +vl_api_nat_ha_set_failover_t_handler (vl_api_nat_ha_set_failover_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_ha_set_failover_reply_t *rmp; + int rv = VNET_API_ERROR_UNSUPPORTED; + REPLY_MACRO (VL_API_NAT_HA_SET_FAILOVER_REPLY); +} + +static void +vl_api_nat_ha_get_failover_t_handler (vl_api_nat_ha_get_failover_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_ha_get_failover_reply_t *rmp; + int rv = VNET_API_ERROR_UNSUPPORTED; + REPLY_MACRO (VL_API_NAT_HA_GET_FAILOVER_REPLY); +} + +static void +vl_api_nat_ha_flush_t_handler (vl_api_nat_ha_flush_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_ha_flush_reply_t *rmp; + int rv = VNET_API_ERROR_UNSUPPORTED; + REPLY_MACRO (VL_API_NAT_HA_FLUSH_REPLY); +} + +static void +vl_api_nat_ha_resync_t_handler (vl_api_nat_ha_resync_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat_ha_resync_reply_t *rmp; + int rv = VNET_API_ERROR_UNSUPPORTED; + REPLY_MACRO (VL_API_NAT_HA_RESYNC_REPLY); +} + +static void +vl_api_nat44_del_user_t_handler (vl_api_nat44_del_user_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_del_user_reply_t *rmp; + int rv = VNET_API_ERROR_UNSUPPORTED; + REPLY_MACRO (VL_API_NAT44_DEL_USER_REPLY); +} + +static void + vl_api_nat44_add_del_address_range_t_handler + (vl_api_nat44_add_del_address_range_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_add_del_address_range_reply_t *rmp; + ip4_address_t this_addr; + u8 is_add, twice_nat; + u32 start_host_order, end_host_order; + u32 vrf_id; + int i, count; + int rv = 0; + u32 *tmp; + + if (sm->static_mapping_only) + { + rv = VNET_API_ERROR_FEATURE_DISABLED; + goto send_reply; + } + + is_add = mp->is_add; + twice_nat = mp->flags & NAT_API_IS_TWICE_NAT; + + tmp = (u32 *) mp->first_ip_address; + start_host_order = clib_host_to_net_u32 (tmp[0]); + tmp = (u32 *) mp->last_ip_address; + end_host_order = clib_host_to_net_u32 (tmp[0]); + + count = (end_host_order - start_host_order) + 1; + + vrf_id = clib_host_to_net_u32 (mp->vrf_id); + + if (count > 1024) + nat_log_info ("%U - %U, %d addresses...", + format_ip4_address, mp->first_ip_address, + format_ip4_address, mp->last_ip_address, count); + + memcpy (&this_addr.as_u8, mp->first_ip_address, 4); + + for (i = 0; i < count; i++) + { + if (is_add) + rv = snat_add_address (sm, &this_addr, vrf_id, twice_nat); + else + rv = snat_del_address (sm, this_addr, 0, twice_nat); + + if (rv) + goto send_reply; + + increment_v4_address (&this_addr); + } + +send_reply: + REPLY_MACRO (VL_API_NAT44_ADD_DEL_ADDRESS_RANGE_REPLY); +} + +static void +send_nat44_address_details (snat_address_t * a, + vl_api_registration_t * reg, u32 context, + u8 twice_nat) +{ + vl_api_nat44_address_details_t *rmp; + snat_main_t *sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_NAT44_ADDRESS_DETAILS + sm->msg_id_base); + clib_memcpy (rmp->ip_address, &(a->addr), 4); + if (a->fib_index != ~0) + { + fib_table_t *fib = fib_table_get (a->fib_index, FIB_PROTOCOL_IP4); + rmp->vrf_id = ntohl (fib->ft_table_id); + } + else + rmp->vrf_id = ~0; + if (twice_nat) + rmp->flags |= NAT_API_IS_TWICE_NAT; + rmp->context = context; + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_nat44_address_dump_t_handler (vl_api_nat44_address_dump_t * mp) +{ + vl_api_registration_t *reg; + snat_main_t *sm = &snat_main; + snat_address_t *a; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + vec_foreach (a, sm->addresses) + send_nat44_address_details (a, reg, mp->context, 0); + vec_foreach (a, sm->twice_nat_addresses) + send_nat44_address_details (a, reg, mp->context, 1); +} + +static void + vl_api_nat44_interface_add_del_feature_t_handler + (vl_api_nat44_interface_add_del_feature_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_interface_add_del_feature_reply_t *rmp; + u32 sw_if_index = ntohl (mp->sw_if_index); + u8 is_del; + int rv = 0; + + is_del = !mp->is_add; + + VALIDATE_SW_IF_INDEX (mp); + + rv = + snat_interface_add_del (sw_if_index, mp->flags & NAT_API_IS_INSIDE, + is_del); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_NAT44_INTERFACE_ADD_DEL_FEATURE_REPLY); +} + +static void +send_nat44_interface_details (snat_interface_t * i, + vl_api_registration_t * reg, u32 context) +{ + vl_api_nat44_interface_details_t *rmp; + snat_main_t *sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_NAT44_INTERFACE_DETAILS + sm->msg_id_base); + rmp->sw_if_index = ntohl (i->sw_if_index); + + if (nat_interface_is_inside (i)) + rmp->flags |= NAT_API_IS_INSIDE; + if (nat_interface_is_outside (i)) + rmp->flags |= NAT_API_IS_OUTSIDE; + + rmp->context = context; + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_nat44_interface_dump_t_handler (vl_api_nat44_interface_dump_t * mp) +{ + vl_api_registration_t *reg; + snat_main_t *sm = &snat_main; + snat_interface_t *i; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + pool_foreach (i, sm->interfaces) + { + send_nat44_interface_details(i, reg, mp->context); + } +} + +static void + vl_api_nat44_interface_add_del_output_feature_t_handler + (vl_api_nat44_interface_add_del_output_feature_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_interface_add_del_output_feature_reply_t *rmp; + u32 sw_if_index = ntohl (mp->sw_if_index); + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + rv = snat_interface_add_del_output_feature (sw_if_index, + mp->flags & NAT_API_IS_INSIDE, + !mp->is_add); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_NAT44_INTERFACE_ADD_DEL_OUTPUT_FEATURE_REPLY); +} + +static void +send_nat44_interface_output_feature_details (snat_interface_t * i, + vl_api_registration_t * reg, + u32 context) +{ + vl_api_nat44_interface_output_feature_details_t *rmp; + snat_main_t *sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (VL_API_NAT44_INTERFACE_OUTPUT_FEATURE_DETAILS + sm->msg_id_base); + rmp->sw_if_index = ntohl (i->sw_if_index); + rmp->context = context; + + if (nat_interface_is_inside (i)) + rmp->flags |= NAT_API_IS_INSIDE; + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void + vl_api_nat44_interface_output_feature_dump_t_handler + (vl_api_nat44_interface_output_feature_dump_t * mp) +{ + vl_api_registration_t *reg; + snat_main_t *sm = &snat_main; + snat_interface_t *i; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + pool_foreach (i, sm->output_feature_interfaces) + { + send_nat44_interface_output_feature_details (i, reg, mp->context); + } +} + +static void + vl_api_nat44_add_del_static_mapping_t_handler + (vl_api_nat44_add_del_static_mapping_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_add_del_static_mapping_reply_t *rmp; + ip4_address_t local_addr, external_addr, pool_addr = { 0 }; + u16 local_port = 0, external_port = 0; + u32 vrf_id, external_sw_if_index; + twice_nat_type_t twice_nat = TWICE_NAT_DISABLED; + int rv = 0; + nat_protocol_t proto; + u8 *tag = 0; + + memcpy (&local_addr.as_u8, mp->local_ip_address, 4); + memcpy (&external_addr.as_u8, mp->external_ip_address, 4); + + if (!(mp->flags & NAT_API_IS_ADDR_ONLY)) + { + local_port = mp->local_port; + external_port = mp->external_port; + } + + vrf_id = clib_net_to_host_u32 (mp->vrf_id); + external_sw_if_index = clib_net_to_host_u32 (mp->external_sw_if_index); + proto = ip_proto_to_nat_proto (mp->protocol); + + if (mp->flags & NAT_API_IS_TWICE_NAT) + twice_nat = TWICE_NAT; + else if (mp->flags & NAT_API_IS_SELF_TWICE_NAT) + twice_nat = TWICE_NAT_SELF; + mp->tag[sizeof (mp->tag) - 1] = 0; + tag = format (0, "%s", mp->tag); + vec_terminate_c_string (tag); + + rv = snat_add_static_mapping ( + local_addr, external_addr, local_port, external_port, vrf_id, + mp->flags & NAT_API_IS_ADDR_ONLY, external_sw_if_index, proto, mp->is_add, + twice_nat, mp->flags & NAT_API_IS_OUT2IN_ONLY, tag, 0, pool_addr, 0); + vec_free (tag); + + REPLY_MACRO (VL_API_NAT44_ADD_DEL_STATIC_MAPPING_REPLY); +} + +static void + vl_api_nat44_add_del_static_mapping_v2_t_handler + (vl_api_nat44_add_del_static_mapping_v2_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_add_del_static_mapping_v2_reply_t *rmp; + ip4_address_t local_addr, external_addr, pool_addr; + u16 local_port = 0, external_port = 0; + u32 vrf_id, external_sw_if_index; + twice_nat_type_t twice_nat = TWICE_NAT_DISABLED; + int rv = 0; + nat_protocol_t proto; + u8 *tag = 0; + + memcpy (&pool_addr.as_u8, mp->pool_ip_address, 4); + memcpy (&local_addr.as_u8, mp->local_ip_address, 4); + memcpy (&external_addr.as_u8, mp->external_ip_address, 4); + + if (!(mp->flags & NAT_API_IS_ADDR_ONLY)) + { + local_port = mp->local_port; + external_port = mp->external_port; + } + + vrf_id = clib_net_to_host_u32 (mp->vrf_id); + external_sw_if_index = clib_net_to_host_u32 (mp->external_sw_if_index); + proto = ip_proto_to_nat_proto (mp->protocol); + + if (mp->flags & NAT_API_IS_TWICE_NAT) + twice_nat = TWICE_NAT; + else if (mp->flags & NAT_API_IS_SELF_TWICE_NAT) + twice_nat = TWICE_NAT_SELF; + mp->tag[sizeof (mp->tag) - 1] = 0; + tag = format (0, "%s", mp->tag); + vec_terminate_c_string (tag); + + rv = snat_add_static_mapping (local_addr, external_addr, local_port, + external_port, vrf_id, + mp->flags & NAT_API_IS_ADDR_ONLY, + external_sw_if_index, proto, + mp->is_add, twice_nat, + mp->flags & NAT_API_IS_OUT2IN_ONLY, tag, 0, + pool_addr, mp->match_pool); + vec_free (tag); + + REPLY_MACRO (VL_API_NAT44_ADD_DEL_STATIC_MAPPING_V2_REPLY); +} + +static void +send_nat44_static_mapping_details (snat_static_mapping_t * m, + vl_api_registration_t * reg, u32 context) +{ + vl_api_nat44_static_mapping_details_t *rmp; + snat_main_t *sm = &snat_main; + u32 len = sizeof (*rmp); + + rmp = vl_msg_api_alloc (len); + clib_memset (rmp, 0, len); + rmp->_vl_msg_id = + ntohs (VL_API_NAT44_STATIC_MAPPING_DETAILS + sm->msg_id_base); + + clib_memcpy (rmp->local_ip_address, &(m->local_addr), 4); + clib_memcpy (rmp->external_ip_address, &(m->external_addr), 4); + rmp->external_sw_if_index = ~0; + rmp->vrf_id = htonl (m->vrf_id); + rmp->context = context; + + if (m->twice_nat == TWICE_NAT) + rmp->flags |= NAT_API_IS_TWICE_NAT; + else if (m->twice_nat == TWICE_NAT_SELF) + rmp->flags |= NAT_API_IS_SELF_TWICE_NAT; + + if (is_out2in_only_static_mapping (m)) + rmp->flags |= NAT_API_IS_OUT2IN_ONLY; + + if (is_addr_only_static_mapping (m)) + { + rmp->flags |= NAT_API_IS_ADDR_ONLY; + } + else + { + rmp->protocol = nat_proto_to_ip_proto (m->proto); + rmp->external_port = m->external_port; + rmp->local_port = m->local_port; + } + + if (m->tag) + strncpy ((char *) rmp->tag, (char *) m->tag, vec_len (m->tag)); + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +send_nat44_static_map_resolve_details (snat_static_map_resolve_t * m, + vl_api_registration_t * reg, + u32 context) +{ + vl_api_nat44_static_mapping_details_t *rmp; + snat_main_t *sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (VL_API_NAT44_STATIC_MAPPING_DETAILS + sm->msg_id_base); + clib_memcpy (rmp->local_ip_address, &(m->l_addr), 4); + rmp->external_sw_if_index = htonl (m->sw_if_index); + rmp->vrf_id = htonl (m->vrf_id); + rmp->context = context; + + if (m->twice_nat) + rmp->flags |= NAT_API_IS_TWICE_NAT; + + if (m->addr_only) + { + rmp->flags |= NAT_API_IS_ADDR_ONLY; + } + else + { + rmp->protocol = nat_proto_to_ip_proto (m->proto); + rmp->external_port = m->e_port; + rmp->local_port = m->l_port; + } + if (m->tag) + strncpy ((char *) rmp->tag, (char *) m->tag, vec_len (m->tag)); + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_nat44_static_mapping_dump_t_handler (vl_api_nat44_static_mapping_dump_t + * mp) +{ + vl_api_registration_t *reg; + snat_main_t *sm = &snat_main; + snat_static_mapping_t *m; + snat_static_map_resolve_t *rp; + int j; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + pool_foreach (m, sm->static_mappings) + { + if (!is_identity_static_mapping(m) && !is_lb_static_mapping (m)) + send_nat44_static_mapping_details (m, reg, mp->context); + } + + for (j = 0; j < vec_len (sm->to_resolve); j++) + { + rp = sm->to_resolve + j; + if (!rp->identity_nat) + send_nat44_static_map_resolve_details (rp, reg, mp->context); + } +} + +static void + vl_api_nat44_add_del_identity_mapping_t_handler + (vl_api_nat44_add_del_identity_mapping_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_add_del_identity_mapping_reply_t *rmp; + ip4_address_t addr, pool_addr = { 0 }; + u16 port = 0; + u32 vrf_id, sw_if_index; + int rv = 0; + nat_protocol_t proto = NAT_PROTOCOL_OTHER; + u8 *tag = 0; + + if (!(mp->flags & NAT_API_IS_ADDR_ONLY)) + { + port = mp->port; + proto = ip_proto_to_nat_proto (mp->protocol); + } + vrf_id = clib_net_to_host_u32 (mp->vrf_id); + sw_if_index = clib_net_to_host_u32 (mp->sw_if_index); + if (sw_if_index != ~0) + addr.as_u32 = 0; + else + memcpy (&addr.as_u8, mp->ip_address, 4); + mp->tag[sizeof (mp->tag) - 1] = 0; + tag = format (0, "%s", mp->tag); + vec_terminate_c_string (tag); + + rv = + snat_add_static_mapping (addr, addr, port, port, vrf_id, + mp->flags & NAT_API_IS_ADDR_ONLY, sw_if_index, + proto, mp->is_add, 0, 0, tag, 1, pool_addr, 0); + vec_free (tag); + + REPLY_MACRO (VL_API_NAT44_ADD_DEL_IDENTITY_MAPPING_REPLY); +} + +static void +send_nat44_identity_mapping_details (snat_static_mapping_t * m, int index, + vl_api_registration_t * reg, u32 context) +{ + vl_api_nat44_identity_mapping_details_t *rmp; + snat_main_t *sm = &snat_main; + nat44_lb_addr_port_t *local = pool_elt_at_index (m->locals, index); + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (VL_API_NAT44_IDENTITY_MAPPING_DETAILS + sm->msg_id_base); + + if (is_addr_only_static_mapping (m)) + rmp->flags |= NAT_API_IS_ADDR_ONLY; + + clib_memcpy (rmp->ip_address, &(m->local_addr), 4); + rmp->port = m->local_port; + rmp->sw_if_index = ~0; + rmp->vrf_id = htonl (local->vrf_id); + rmp->protocol = nat_proto_to_ip_proto (m->proto); + rmp->context = context; + if (m->tag) + strncpy ((char *) rmp->tag, (char *) m->tag, vec_len (m->tag)); + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +send_nat44_identity_map_resolve_details (snat_static_map_resolve_t * m, + vl_api_registration_t * reg, + u32 context) +{ + vl_api_nat44_identity_mapping_details_t *rmp; + snat_main_t *sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (VL_API_NAT44_IDENTITY_MAPPING_DETAILS + sm->msg_id_base); + + if (m->addr_only) + rmp->flags = (vl_api_nat_config_flags_t) NAT_API_IS_ADDR_ONLY; + + rmp->port = m->l_port; + rmp->sw_if_index = htonl (m->sw_if_index); + rmp->vrf_id = htonl (m->vrf_id); + rmp->protocol = nat_proto_to_ip_proto (m->proto); + rmp->context = context; + if (m->tag) + strncpy ((char *) rmp->tag, (char *) m->tag, vec_len (m->tag)); + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void + vl_api_nat44_identity_mapping_dump_t_handler + (vl_api_nat44_identity_mapping_dump_t * mp) +{ + vl_api_registration_t *reg; + snat_main_t *sm = &snat_main; + snat_static_mapping_t *m; + snat_static_map_resolve_t *rp; + int j; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + pool_foreach (m, sm->static_mappings) + { + if (is_identity_static_mapping(m) && !is_lb_static_mapping (m)) + { + pool_foreach_index (j, m->locals) + { + send_nat44_identity_mapping_details (m, j, reg, mp->context); + } + } + } + + for (j = 0; j < vec_len (sm->to_resolve); j++) + { + rp = sm->to_resolve + j; + if (rp->identity_nat) + send_nat44_identity_map_resolve_details (rp, reg, mp->context); + } +} + +static void + vl_api_nat44_add_del_interface_addr_t_handler + (vl_api_nat44_add_del_interface_addr_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_add_del_interface_addr_reply_t *rmp; + u32 sw_if_index = ntohl (mp->sw_if_index); + int rv = 0; + u8 is_del; + + if (sm->static_mapping_only) + { + rv = VNET_API_ERROR_FEATURE_DISABLED; + goto send_reply; + } + + is_del = !mp->is_add; + + VALIDATE_SW_IF_INDEX (mp); + + rv = snat_add_interface_address (sm, sw_if_index, is_del, + mp->flags & NAT_API_IS_TWICE_NAT); + + BAD_SW_IF_INDEX_LABEL; + +send_reply: + REPLY_MACRO (VL_API_NAT44_ADD_DEL_INTERFACE_ADDR_REPLY); +} + +static void +send_nat44_interface_addr_details (u32 sw_if_index, + vl_api_registration_t * reg, u32 context, + u8 twice_nat) +{ + vl_api_nat44_interface_addr_details_t *rmp; + snat_main_t *sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (VL_API_NAT44_INTERFACE_ADDR_DETAILS + sm->msg_id_base); + rmp->sw_if_index = ntohl (sw_if_index); + + if (twice_nat) + rmp->flags = (vl_api_nat_config_flags_t) NAT_API_IS_TWICE_NAT; + rmp->context = context; + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_nat44_interface_addr_dump_t_handler (vl_api_nat44_interface_addr_dump_t + * mp) +{ + vl_api_registration_t *reg; + snat_main_t *sm = &snat_main; + u32 *i; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + vec_foreach (i, sm->auto_add_sw_if_indices) + { + send_nat44_interface_addr_details (*i, reg, mp->context, 0); + } + vec_foreach (i, sm->auto_add_sw_if_indices_twice_nat) + { + send_nat44_interface_addr_details (*i, reg, mp->context, 1); + } +} + +/* user (internal host) key */ +typedef struct +{ + union + { + struct + { + ip4_address_t addr; + u32 fib_index; + }; + u64 as_u64; + }; +} snat_user_key_t; + +typedef struct +{ + ip4_address_t addr; + u32 fib_index; + u32 nsessions; + u32 nstaticsessions; +} snat_user_t; + +typedef struct +{ + u32 user_buckets; + snat_user_t *users; + clib_bihash_8_8_t user_hash; +} user_create_helper_t; + +static void +send_nat44_user_details (snat_user_t * u, vl_api_registration_t * reg, + u32 context) +{ + vl_api_nat44_user_details_t *rmp; + snat_main_t *sm = &snat_main; + ip4_main_t *im = &ip4_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_NAT44_USER_DETAILS + sm->msg_id_base); + + if (!pool_is_free_index (im->fibs, u->fib_index)) + { + fib_table_t *fib = fib_table_get (u->fib_index, FIB_PROTOCOL_IP4); + rmp->vrf_id = ntohl (fib->ft_table_id); + } + + clib_memcpy (rmp->ip_address, &(u->addr), 4); + rmp->nsessions = ntohl (u->nsessions); + rmp->nstaticsessions = ntohl (u->nstaticsessions); + rmp->context = context; + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +nat_ed_user_create_helper (user_create_helper_t *uch, snat_session_t *s) +{ + snat_user_key_t k; + k.addr = s->in2out.addr; + k.fib_index = s->in2out.fib_index; + clib_bihash_kv_8_8_t key, value; + key.key = k.as_u64; + snat_user_t *u; + + if (clib_bihash_search_8_8 (&uch->user_hash, &key, &value)) + { + pool_get (uch->users, u); + u->addr = k.addr; + u->fib_index = k.fib_index; + u->nsessions = 0; + u->nstaticsessions = 0; + key.value = u - uch->users; + clib_bihash_add_del_8_8 (&uch->user_hash, &key, 1); + } + else + { + u = pool_elt_at_index (uch->users, value.value); + } + if (snat_is_session_static (s)) + { + ++u->nstaticsessions; + } + else + { + ++u->nsessions; + } +} + +u8 * +format_user_kvp (u8 *s, va_list *args) +{ + clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *); + snat_user_key_t k; + k.as_u64 = v->key; + s = format (s, "%U fib %d user-index %llu", format_ip4_address, &k.addr, + k.fib_index, v->value); + return s; +} + +static void +nat_ed_users_create (snat_main_per_thread_data_t *tsm, + user_create_helper_t *uch) +{ + snat_session_t *s; + clib_bihash_init_8_8 (&uch->user_hash, "users", uch->user_buckets, 0); + clib_bihash_set_kvp_format_fn_8_8 (&uch->user_hash, format_user_kvp); + pool_foreach (s, tsm->sessions) + { + nat_ed_user_create_helper (uch, s); + } +} + +static void +nat_ed_users_destroy (user_create_helper_t *uch) +{ + pool_free (uch->users); + clib_bihash_free_8_8 (&uch->user_hash); +} + +static void +vl_api_nat44_user_dump_t_handler (vl_api_nat44_user_dump_t * mp) +{ + user_create_helper_t uch; + vl_api_registration_t *reg; + snat_main_t *sm = &snat_main; + snat_main_per_thread_data_t *tsm; + snat_user_t *u; + + clib_memset (&uch, 0, sizeof (uch)); + + uch.user_buckets = nat_calc_bihash_buckets (1024); + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + vec_foreach (tsm, sm->per_thread_data) + { + nat_ed_users_create (tsm, &uch); + pool_foreach (u, uch.users) + { + send_nat44_user_details (u, reg, mp->context); + } + nat_ed_users_destroy (&uch); + } +} + +static void +send_nat44_user_session_details (snat_session_t * s, + vl_api_registration_t * reg, u32 context) +{ + vl_api_nat44_user_session_details_t *rmp; + snat_main_t *sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (VL_API_NAT44_USER_SESSION_DETAILS + sm->msg_id_base); + clib_memcpy (rmp->outside_ip_address, (&s->out2in.addr), 4); + clib_memcpy (rmp->inside_ip_address, (&s->in2out.addr), 4); + + if (snat_is_session_static (s)) + rmp->flags |= NAT_API_IS_STATIC; + + if (is_twice_nat_session (s)) + rmp->flags |= NAT_API_IS_TWICE_NAT; + + if (is_ed_session (s) || is_fwd_bypass_session (s)) + rmp->flags |= NAT_API_IS_EXT_HOST_VALID; + + rmp->last_heard = clib_host_to_net_u64 ((u64) s->last_heard); + rmp->total_bytes = clib_host_to_net_u64 (s->total_bytes); + rmp->total_pkts = ntohl (s->total_pkts); + rmp->context = context; + if (snat_is_unk_proto_session (s)) + { + rmp->outside_port = 0; + rmp->inside_port = 0; + rmp->protocol = ntohs (s->in2out.port); + } + else + { + rmp->outside_port = s->out2in.port; + rmp->inside_port = s->in2out.port; + rmp->protocol = ntohs (nat_proto_to_ip_proto (s->nat_proto)); + } + if (is_ed_session (s) || is_fwd_bypass_session (s)) + { + clib_memcpy (rmp->ext_host_address, &s->ext_host_addr, 4); + rmp->ext_host_port = s->ext_host_port; + if (is_twice_nat_session (s)) + { + clib_memcpy (rmp->ext_host_nat_address, &s->ext_host_nat_addr, 4); + rmp->ext_host_nat_port = s->ext_host_nat_port; + } + } + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_nat44_user_session_dump_t_handler (vl_api_nat44_user_session_dump_t * + mp) +{ + snat_main_per_thread_data_t *tsm; + snat_main_t *sm = &snat_main; + vl_api_registration_t *reg; + snat_user_key_t ukey; + snat_session_t *s; + ip4_header_t ip; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + clib_memcpy (&ukey.addr, mp->ip_address, 4); + ip.src_address.as_u32 = ukey.addr.as_u32; + ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id)); + if (sm->num_workers > 1) + tsm = + vec_elt_at_index (sm->per_thread_data, + sm->worker_in2out_cb (&ip, ukey.fib_index, 0)); + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + + pool_foreach (s, tsm->sessions) { + if (s->in2out.addr.as_u32 == ukey.addr.as_u32) + { + send_nat44_user_session_details (s, reg, mp->context); + } + } +} + +static nat44_lb_addr_port_t * +unformat_nat44_lb_addr_port (vl_api_nat44_lb_addr_port_t * addr_port_pairs, + u32 addr_port_pair_num) +{ + u8 i; + nat44_lb_addr_port_t *lb_addr_port_pairs = 0, lb_addr_port; + vl_api_nat44_lb_addr_port_t *ap; + + for (i = 0; i < addr_port_pair_num; i++) + { + ap = &addr_port_pairs[i]; + clib_memset (&lb_addr_port, 0, sizeof (lb_addr_port)); + clib_memcpy (&lb_addr_port.addr, ap->addr, 4); + lb_addr_port.port = ap->port; + lb_addr_port.probability = ap->probability; + lb_addr_port.vrf_id = clib_net_to_host_u32 (ap->vrf_id); + vec_add1 (lb_addr_port_pairs, lb_addr_port); + } + + return lb_addr_port_pairs; +} + +static void + vl_api_nat44_add_del_lb_static_mapping_t_handler + (vl_api_nat44_add_del_lb_static_mapping_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_add_del_lb_static_mapping_reply_t *rmp; + twice_nat_type_t twice_nat = TWICE_NAT_DISABLED; + int rv = 0; + nat44_lb_addr_port_t *locals = 0; + ip4_address_t e_addr; + nat_protocol_t proto; + u8 *tag = 0; + + locals = + unformat_nat44_lb_addr_port (mp->locals, + clib_net_to_host_u32 (mp->local_num)); + clib_memcpy (&e_addr, mp->external_addr, 4); + proto = ip_proto_to_nat_proto (mp->protocol); + + if (mp->flags & NAT_API_IS_TWICE_NAT) + twice_nat = TWICE_NAT; + else if (mp->flags & NAT_API_IS_SELF_TWICE_NAT) + twice_nat = TWICE_NAT_SELF; + mp->tag[sizeof (mp->tag) - 1] = 0; + tag = format (0, "%s", mp->tag); + vec_terminate_c_string (tag); + + rv = + nat44_add_del_lb_static_mapping (e_addr, + mp->external_port, + proto, locals, mp->is_add, + twice_nat, + mp->flags & NAT_API_IS_OUT2IN_ONLY, tag, + clib_net_to_host_u32 (mp->affinity)); + + vec_free (locals); + vec_free (tag); + REPLY_MACRO (VL_API_NAT44_ADD_DEL_LB_STATIC_MAPPING_REPLY); +} + +static void + vl_api_nat44_lb_static_mapping_add_del_local_t_handler + (vl_api_nat44_lb_static_mapping_add_del_local_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_lb_static_mapping_add_del_local_reply_t *rmp; + int rv = 0; + ip4_address_t e_addr, l_addr; + nat_protocol_t proto; + + clib_memcpy (&e_addr, mp->external_addr, 4); + clib_memcpy (&l_addr, mp->local.addr, 4); + proto = ip_proto_to_nat_proto (mp->protocol); + + rv = + nat44_lb_static_mapping_add_del_local (e_addr, + mp->external_port, + l_addr, + mp->local.port, + proto, + clib_net_to_host_u32 (mp-> + local.vrf_id), + mp->local.probability, mp->is_add); + + REPLY_MACRO (VL_API_NAT44_LB_STATIC_MAPPING_ADD_DEL_LOCAL_REPLY); +} + +static void +send_nat44_lb_static_mapping_details (snat_static_mapping_t * m, + vl_api_registration_t * reg, + u32 context) +{ + vl_api_nat44_lb_static_mapping_details_t *rmp; + snat_main_t *sm = &snat_main; + nat44_lb_addr_port_t *ap; + vl_api_nat44_lb_addr_port_t *locals; + u32 local_num = 0; + + rmp = + vl_msg_api_alloc (sizeof (*rmp) + + (pool_elts (m->locals) * + sizeof (nat44_lb_addr_port_t))); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (VL_API_NAT44_LB_STATIC_MAPPING_DETAILS + sm->msg_id_base); + + clib_memcpy (rmp->external_addr, &(m->external_addr), 4); + rmp->external_port = m->external_port; + rmp->protocol = nat_proto_to_ip_proto (m->proto); + rmp->context = context; + + if (m->twice_nat == TWICE_NAT) + rmp->flags |= NAT_API_IS_TWICE_NAT; + else if (m->twice_nat == TWICE_NAT_SELF) + rmp->flags |= NAT_API_IS_SELF_TWICE_NAT; + if (is_out2in_only_static_mapping (m)) + rmp->flags |= NAT_API_IS_OUT2IN_ONLY; + if (m->tag) + strncpy ((char *) rmp->tag, (char *) m->tag, vec_len (m->tag)); + + locals = (vl_api_nat44_lb_addr_port_t *) rmp->locals; + pool_foreach (ap, m->locals) + { + clib_memcpy (locals->addr, &(ap->addr), 4); + locals->port = ap->port; + locals->probability = ap->probability; + locals->vrf_id = ntohl (ap->vrf_id); + locals++; + local_num++; + } + rmp->local_num = ntohl (local_num); + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void + vl_api_nat44_lb_static_mapping_dump_t_handler + (vl_api_nat44_lb_static_mapping_dump_t * mp) +{ + vl_api_registration_t *reg; + snat_main_t *sm = &snat_main; + snat_static_mapping_t *m; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + pool_foreach (m, sm->static_mappings) + { + if (is_lb_static_mapping(m)) + send_nat44_lb_static_mapping_details (m, reg, mp->context); + } +} + +static void +vl_api_nat44_del_session_t_handler (vl_api_nat44_del_session_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_del_session_reply_t *rmp; + ip4_address_t addr, eh_addr; + u16 port, eh_port; + u32 vrf_id; + int rv = 0; + u8 is_in; + + memcpy (&addr.as_u8, mp->address, 4); + port = mp->port; + vrf_id = clib_net_to_host_u32 (mp->vrf_id); + memcpy (&eh_addr.as_u8, mp->ext_host_address, 4); + eh_port = mp->ext_host_port; + + is_in = mp->flags & NAT_API_IS_INSIDE; + + rv = nat44_del_ed_session (sm, &addr, port, &eh_addr, eh_port, mp->protocol, + vrf_id, is_in); + + REPLY_MACRO (VL_API_NAT44_DEL_SESSION_REPLY); +} + +static void + vl_api_nat44_forwarding_enable_disable_t_handler + (vl_api_nat44_forwarding_enable_disable_t * mp) +{ + vl_api_nat44_forwarding_enable_disable_reply_t *rmp; + snat_main_t *sm = &snat_main; + int rv = 0; + nat44_ed_forwarding_enable_disable (mp->enable); + REPLY_MACRO (VL_API_NAT44_FORWARDING_ENABLE_DISABLE_REPLY); +} + +static void + vl_api_nat44_forwarding_is_enabled_t_handler + (vl_api_nat44_forwarding_is_enabled_t * mp) +{ + vl_api_registration_t *reg; + snat_main_t *sm = &snat_main; + vl_api_nat44_forwarding_is_enabled_reply_t *rmp; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (VL_API_NAT44_FORWARDING_IS_ENABLED_REPLY + sm->msg_id_base); + rmp->context = mp->context; + + rmp->enabled = sm->forwarding_enabled; + + vl_api_send_msg (reg, (u8 *) rmp); +} + +static void +vl_api_nat44_ed_set_fq_options_t_handler (vl_api_nat44_ed_set_fq_options_t *mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_ed_set_fq_options_reply_t *rmp; + int rv = 0; + u32 frame_queue_nelts = ntohl (mp->frame_queue_nelts); + rv = nat44_ed_set_frame_queue_nelts (frame_queue_nelts); + REPLY_MACRO (VL_API_NAT44_ED_SET_FQ_OPTIONS_REPLY); +} + +static void +vl_api_nat44_ed_show_fq_options_t_handler ( + vl_api_nat44_ed_show_fq_options_t *mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_ed_show_fq_options_reply_t *rmp; + int rv = 0; + /* clang-format off */ + REPLY_MACRO2_ZERO (VL_API_NAT44_ED_SHOW_FQ_OPTIONS_REPLY, + ({ + rmp->frame_queue_nelts = htonl (sm->frame_queue_nelts); + })); + /* clang-format on */ +} + +/* API definitions */ +#include +#include + +/* Set up the API message handling tables */ +clib_error_t * +nat44_api_hookup (vlib_main_t * vm) +{ + snat_main_t *sm = &snat_main; + sm->msg_id_base = setup_message_id_table (); + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ed/nat44_ed_classify.c b/src/plugins/nat/nat44-ed/nat44_ed_classify.c new file mode 100644 index 00000000000..5a9f4e42657 --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed_classify.c @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief Classify for one armed NAT44 (in+out interface) + */ + +#include +#include +#include + +#include +#include + +#define foreach_nat44_classify_error \ +_(NEXT_IN2OUT, "next in2out") \ +_(NEXT_OUT2IN, "next out2in") \ +_(FRAG_CACHED, "fragment cached") + +typedef enum +{ +#define _(sym,str) NAT44_CLASSIFY_ERROR_##sym, + foreach_nat44_classify_error +#undef _ + NAT44_CLASSIFY_N_ERROR, +} nat44_classify_error_t; + +typedef enum +{ + NAT44_CLASSIFY_NEXT_IN2OUT, + NAT44_CLASSIFY_NEXT_OUT2IN, + NAT44_CLASSIFY_NEXT_DROP, + NAT44_CLASSIFY_N_NEXT, +} nat44_classify_next_t; + +typedef struct +{ + u8 next_in2out; + u8 cached; +} nat44_classify_trace_t; + +static u8 * +format_nat44_classify_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat44_classify_trace_t *t = va_arg (*args, nat44_classify_trace_t *); + char *next; + + if (t->cached) + s = format (s, "nat44-classify: fragment cached"); + else + { + next = t->next_in2out ? "nat44-ed-in2out" : "nat44-ed-out2in"; + s = format (s, "nat44-classify: next %s", next); + } + + return s; +} + +static inline uword +nat44_handoff_classify_node_fn_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + nat44_classify_next_t next_index; + snat_main_t *sm = &snat_main; + snat_static_mapping_t *m; + u32 next_in2out = 0, next_out2in = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = NAT_NEXT_IN2OUT_CLASSIFY; + ip4_header_t *ip0; + snat_address_t *ap; + clib_bihash_kv_8_8_t kv0, value0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + vec_foreach (ap, sm->addresses) + { + if (ip0->dst_address.as_u32 == ap->addr.as_u32) + { + next0 = NAT_NEXT_OUT2IN_CLASSIFY; + goto enqueue0; + } + } + + if (PREDICT_FALSE (pool_elts (sm->static_mappings))) + { + init_nat_k (&kv0, ip0->dst_address, 0, 0, 0); + /* try to classify the fragment based on IP header alone */ + if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, + &kv0, &value0)) + { + m = pool_elt_at_index (sm->static_mappings, value0.value); + if (m->local_addr.as_u32 != m->external_addr.as_u32) + next0 = NAT_NEXT_OUT2IN_CLASSIFY; + goto enqueue0; + } + init_nat_k (&kv0, ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_dst_port, 0, + ip_proto_to_nat_proto (ip0->protocol)); + if (!clib_bihash_search_8_8 + (&sm->static_mapping_by_external, &kv0, &value0)) + { + m = pool_elt_at_index (sm->static_mappings, value0.value); + if (m->local_addr.as_u32 != m->external_addr.as_u32) + next0 = NAT_NEXT_OUT2IN_CLASSIFY; + } + } + + enqueue0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat44_classify_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->cached = 0; + t->next_in2out = next0 == NAT_NEXT_IN2OUT_CLASSIFY ? 1 : 0; + } + + next_in2out += next0 == NAT_NEXT_IN2OUT_CLASSIFY; + next_out2in += next0 == NAT_NEXT_OUT2IN_CLASSIFY; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + NAT44_CLASSIFY_ERROR_NEXT_IN2OUT, next_in2out); + vlib_node_increment_counter (vm, node->node_index, + NAT44_CLASSIFY_ERROR_NEXT_OUT2IN, next_out2in); + return frame->n_vectors; +} + +static inline uword +nat44_ed_classify_node_fn_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + nat44_classify_next_t next_index; + snat_main_t *sm = &snat_main; + snat_static_mapping_t *m; + u32 next_in2out = 0, next_out2in = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = NAT_NEXT_IN2OUT_ED_FAST_PATH; + u32 sw_if_index0, rx_fib_index0; + ip4_header_t *ip0; + snat_address_t *ap; + clib_bihash_kv_8_8_t kv0, value0; + clib_bihash_kv_16_8_t ed_kv0, ed_value0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + u32 arc_next; + vnet_feature_next (&arc_next, b0); + vnet_buffer2 (b0)->nat.arc_next = arc_next; + + if (ip0->protocol != IP_PROTOCOL_ICMP) + { + /* process leading fragment/whole packet (with L4 header) */ + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, + sw_if_index0); + init_ed_k (&ed_kv0, ip0->src_address, + vnet_buffer (b0)->ip.reass.l4_src_port, + ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_dst_port, + rx_fib_index0, ip0->protocol); + /* process whole packet */ + if (!clib_bihash_search_16_8 (&sm->flow_hash, &ed_kv0, + &ed_value0)) + { + ASSERT (vm->thread_index == + ed_value_get_thread_index (&ed_value0)); + snat_main_per_thread_data_t *tsm = + &sm->per_thread_data[vm->thread_index]; + snat_session_t *s = pool_elt_at_index ( + tsm->sessions, ed_value_get_session_index (&ed_value0)); + clib_bihash_kv_16_8_t i2o_kv; + nat_6t_flow_to_ed_k (&i2o_kv, &s->i2o); + vnet_buffer2 (b0)->nat.cached_session_index = + ed_value_get_session_index (&ed_value0); + if (i2o_kv.key[0] == ed_kv0.key[0] && + i2o_kv.key[1] == ed_kv0.key[1]) + { + next0 = NAT_NEXT_IN2OUT_ED_FAST_PATH; + } + else + { + next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH; + } + + goto enqueue0; + } + /* session doesn't exist so continue in code */ + } + + vec_foreach (ap, sm->addresses) + { + if (ip0->dst_address.as_u32 == ap->addr.as_u32) + { + next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH; + goto enqueue0; + } + } + + if (PREDICT_FALSE (pool_elts (sm->static_mappings))) + { + init_nat_k (&kv0, ip0->dst_address, 0, 0, 0); + /* try to classify the fragment based on IP header alone */ + if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, + &kv0, &value0)) + { + m = pool_elt_at_index (sm->static_mappings, value0.value); + if (m->local_addr.as_u32 != m->external_addr.as_u32) + next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH; + goto enqueue0; + } + init_nat_k (&kv0, ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_dst_port, 0, + ip_proto_to_nat_proto (ip0->protocol)); + if (!clib_bihash_search_8_8 + (&sm->static_mapping_by_external, &kv0, &value0)) + { + m = pool_elt_at_index (sm->static_mappings, value0.value); + if (m->local_addr.as_u32 != m->external_addr.as_u32) + next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH; + } + } + + enqueue0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat44_classify_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->cached = 0; + t->next_in2out = next0 == NAT_NEXT_IN2OUT_ED_FAST_PATH ? 1 : 0; + } + + next_in2out += next0 == NAT_NEXT_IN2OUT_ED_FAST_PATH; + next_out2in += next0 == NAT_NEXT_OUT2IN_ED_FAST_PATH; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + NAT44_CLASSIFY_ERROR_NEXT_IN2OUT, next_in2out); + vlib_node_increment_counter (vm, node->node_index, + NAT44_CLASSIFY_ERROR_NEXT_OUT2IN, next_out2in); + return frame->n_vectors; +} + +VLIB_NODE_FN (nat44_ed_classify_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return nat44_ed_classify_node_fn_inline (vm, node, frame); +} + +VLIB_REGISTER_NODE (nat44_ed_classify_node) = { + .name = "nat44-ed-classify", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat44_classify_trace, + .type = VLIB_NODE_TYPE_INTERNAL, +}; + +VLIB_NODE_FN (nat44_handoff_classify_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return nat44_handoff_classify_node_fn_inline (vm, node, frame); +} + +VLIB_REGISTER_NODE (nat44_handoff_classify_node) = { + .name = "nat44-handoff-classify", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat44_classify_trace, + .type = VLIB_NODE_TYPE_INTERNAL, +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ed/nat44_ed_cli.c b/src/plugins/nat/nat44-ed/nat44_ed_cli.c new file mode 100644 index 00000000000..714b410bbd3 --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed_cli.c @@ -0,0 +1,2029 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief NAT44 CLI + */ + +#include + +#include +#include +#include + +#include +#include +#include + +static clib_error_t * +nat44_enable_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + + nat44_config_t c = { 0 }; + u8 mode_set = 0; + + if (sm->enabled) + return clib_error_return (0, "nat44 already enabled"); + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + { + if (nat44_plugin_enable (c) != 0) + return clib_error_return (0, "nat44 enable failed"); + return 0; + } + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (!mode_set && unformat (line_input, "static-mapping-only")) + { + mode_set = 1; + c.static_mapping_only = 1; + if (unformat (line_input, "connection-tracking")) + { + c.connection_tracking = 1; + } + } + else if (unformat (line_input, "inside-vrf %u", &c.inside_vrf)); + else if (unformat (line_input, "outside-vrf %u", &c.outside_vrf)); + else if (unformat (line_input, "sessions %u", &c.sessions)); + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (!c.sessions) + { + error = clib_error_return (0, "number of sessions is required"); + goto done; + } + + if (nat44_plugin_enable (c) != 0) + error = clib_error_return (0, "nat44 enable failed"); +done: + unformat_free (line_input); + return error; +} + +static clib_error_t * +nat44_disable_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + clib_error_t *error = 0; + + if (!sm->enabled) + return clib_error_return (0, "nat44 already disabled"); + + if (nat44_plugin_disable () != 0) + error = clib_error_return (0, "nat44 disable failed"); + + return error; +} + +static clib_error_t * +set_workers_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + uword *bitmap = 0; + int rv = 0; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_bitmap_list, &bitmap)) + ; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (bitmap == 0) + { + error = clib_error_return (0, "List of workers must be specified."); + goto done; + } + + rv = snat_set_workers (bitmap); + + clib_bitmap_free (bitmap); + + switch (rv) + { + case VNET_API_ERROR_INVALID_WORKER: + error = clib_error_return (0, "Invalid worker(s)."); + goto done; + case VNET_API_ERROR_FEATURE_DISABLED: + error = clib_error_return (0, + "Supported only if 2 or more workes available."); + goto done; + default: + break; + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +nat_show_workers_commnad_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + u32 *worker; + + if (sm->num_workers > 1) + { + vlib_cli_output (vm, "%d workers", vec_len (sm->workers)); + vec_foreach (worker, sm->workers) + { + vlib_worker_thread_t *w = + vlib_worker_threads + *worker + sm->first_worker_index; + vlib_cli_output (vm, " %s", w->name); + } + } + + return 0; +} + +static clib_error_t * +snat_set_log_level_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + snat_main_t *sm = &snat_main; + u8 log_level = NAT_LOG_NONE; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + if (!unformat (line_input, "%d", &log_level)) + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + if (log_level > NAT_LOG_DEBUG) + { + error = clib_error_return (0, "unknown logging level '%d'", log_level); + goto done; + } + sm->log_level = log_level; + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +snat_ipfix_logging_enable_disable_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 domain_id = 0; + u32 src_port = 0; + u8 enable = 1; + int rv = 0; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + { + rv = nat_ipfix_logging_enable_disable (enable, domain_id, + (u16) src_port); + if (rv) + return clib_error_return (0, "ipfix logging enable failed"); + return 0; + } + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "domain %d", &domain_id)) + ; + else if (unformat (line_input, "src-port %d", &src_port)) + ; + else if (unformat (line_input, "disable")) + enable = 0; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + rv = nat_ipfix_logging_enable_disable (enable, domain_id, (u16) src_port); + + if (rv) + { + error = clib_error_return (0, "ipfix logging enable failed"); + goto done; + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +nat44_show_hash_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + nat_affinity_main_t *nam = &nat_affinity_main; + int i; + int verbose = 0; + + if (unformat (input, "detail")) + verbose = 1; + else if (unformat (input, "verbose")) + verbose = 2; + + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->static_mapping_by_local, + verbose); + vlib_cli_output (vm, "%U", + format_bihash_8_8, &sm->static_mapping_by_external, + verbose); + vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->flow_hash, verbose); + vec_foreach_index (i, sm->per_thread_data) + { + vlib_cli_output (vm, "-------- thread %d %s --------\n", + i, vlib_worker_threads[i].name); + vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->flow_hash, verbose); + } + + vlib_cli_output (vm, "%U", format_bihash_16_8, &nam->affinity_hash, + verbose); + + vlib_cli_output (vm, "-------- hash table parameters --------\n"); + vlib_cli_output (vm, "translation buckets: %u", sm->translation_buckets); + return 0; +} + +static clib_error_t * +nat_set_mss_clamping_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + snat_main_t *sm = &snat_main; + clib_error_t *error = 0; + u32 mss; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "disable")) + sm->mss_clamping = 0; + else if (unformat (line_input, "%d", &mss)) + sm->mss_clamping = (u16) mss; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +nat_show_mss_clamping_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + + if (sm->mss_clamping) + vlib_cli_output (vm, "mss-clamping %d", sm->mss_clamping); + else + vlib_cli_output (vm, "mss-clamping disabled"); + + return 0; +} + +static clib_error_t * +add_address_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + snat_main_t *sm = &snat_main; + ip4_address_t start_addr, end_addr, this_addr; + u32 start_host_order, end_host_order; + u32 vrf_id = ~0; + int i, count; + int is_add = 1; + int rv = 0; + clib_error_t *error = 0; + u8 twice_nat = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U - %U", + unformat_ip4_address, &start_addr, + unformat_ip4_address, &end_addr)) + ; + else if (unformat (line_input, "tenant-vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "%U", unformat_ip4_address, &start_addr)) + end_addr = start_addr; + else if (unformat (line_input, "twice-nat")) + twice_nat = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (sm->static_mapping_only) + { + error = clib_error_return (0, "static mapping only mode"); + goto done; + } + + start_host_order = clib_host_to_net_u32 (start_addr.as_u32); + end_host_order = clib_host_to_net_u32 (end_addr.as_u32); + + if (end_host_order < start_host_order) + { + error = clib_error_return (0, "end address less than start address"); + goto done; + } + + count = (end_host_order - start_host_order) + 1; + + if (count > 1024) + nat_log_info ("%U - %U, %d addresses...", + format_ip4_address, &start_addr, + format_ip4_address, &end_addr, count); + + this_addr = start_addr; + + for (i = 0; i < count; i++) + { + if (is_add) + rv = snat_add_address (sm, &this_addr, vrf_id, twice_nat); + else + rv = snat_del_address (sm, this_addr, 0, twice_nat); + + switch (rv) + { + case VNET_API_ERROR_VALUE_EXIST: + error = clib_error_return (0, "NAT address already in use."); + goto done; + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = clib_error_return (0, "NAT address not exist."); + goto done; + case VNET_API_ERROR_UNSPECIFIED: + error = + clib_error_return (0, "NAT address used in static mapping."); + goto done; + default: + break; + } + + increment_v4_address (&this_addr); + } + +done: + unformat_free (line_input); + + return error; +} + +static void +nat44_show_lru_summary (vlib_main_t * vm, snat_main_per_thread_data_t * tsm, + u64 now, u64 sess_timeout_time) +{ + snat_main_t *sm = &snat_main; + dlist_elt_t *oldest_elt; + snat_session_t *s; + u32 oldest_index; + +#define _(n, d) \ + oldest_index = \ + clib_dlist_remove_head (tsm->lru_pool, tsm->n##_lru_head_index); \ + if (~0 != oldest_index) \ + { \ + oldest_elt = pool_elt_at_index (tsm->lru_pool, oldest_index); \ + s = pool_elt_at_index (tsm->sessions, oldest_elt->value); \ + sess_timeout_time = \ + s->last_heard + (f64)nat44_session_get_timeout (sm, s); \ + vlib_cli_output (vm, d " LRU min session timeout %llu (now %llu)", \ + sess_timeout_time, now); \ + clib_dlist_addhead (tsm->lru_pool, tsm->n##_lru_head_index, \ + oldest_index); \ + } + _(tcp_estab, "established tcp"); + _(tcp_trans, "transitory tcp"); + _(udp, "udp"); + _(unk_proto, "unknown protocol"); + _(icmp, "icmp"); +#undef _ +} + +static clib_error_t * +nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_per_thread_data_t *tsm; + snat_main_t *sm = &snat_main; + snat_session_t *s; + + u32 count = 0; + + u64 now = vlib_time_now (vm); + u64 sess_timeout_time = 0; + + u32 udp_sessions = 0; + u32 tcp_sessions = 0; + u32 icmp_sessions = 0; + + u32 timed_out = 0; + u32 transitory = 0; + u32 transitory_wait_closed = 0; + u32 transitory_closed = 0; + u32 established = 0; + + u32 fib; + + for (fib = 0; fib < vec_len (sm->max_translations_per_fib); fib++) + vlib_cli_output (vm, "max translations per thread: %u fib %u", + sm->max_translations_per_fib[fib], fib); + + if (sm->num_workers > 1) + { + vec_foreach (tsm, sm->per_thread_data) + { + pool_foreach (s, tsm->sessions) + { + sess_timeout_time = s->last_heard + + (f64) nat44_session_get_timeout (sm, s); + if (now >= sess_timeout_time) + timed_out++; + + switch (s->nat_proto) + { + case NAT_PROTOCOL_ICMP: + icmp_sessions++; + break; + case NAT_PROTOCOL_TCP: + tcp_sessions++; + if (s->state) + { + if (s->tcp_closed_timestamp) + { + if (now >= s->tcp_closed_timestamp) + { + ++transitory_closed; + } + else + { + ++transitory_wait_closed; + } + } + transitory++; + } + else + established++; + break; + case NAT_PROTOCOL_UDP: + default: + udp_sessions++; + break; + } + } + nat44_show_lru_summary (vm, tsm, now, sess_timeout_time); + count += pool_elts (tsm->sessions); + } + } + else + { + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + pool_foreach (s, tsm->sessions) + { + sess_timeout_time = s->last_heard + + (f64) nat44_session_get_timeout (sm, s); + if (now >= sess_timeout_time) + timed_out++; + + switch (s->nat_proto) + { + case NAT_PROTOCOL_ICMP: + icmp_sessions++; + break; + case NAT_PROTOCOL_TCP: + tcp_sessions++; + if (s->state) + { + if (s->tcp_closed_timestamp) + { + if (now >= s->tcp_closed_timestamp) + { + ++transitory_closed; + } + else + { + ++transitory_wait_closed; + } + } + transitory++; + } + else + established++; + break; + case NAT_PROTOCOL_UDP: + default: + udp_sessions++; + break; + } + } + nat44_show_lru_summary (vm, tsm, now, sess_timeout_time); + count = pool_elts (tsm->sessions); + } + + vlib_cli_output (vm, "total timed out sessions: %u", timed_out); + vlib_cli_output (vm, "total sessions: %u", count); + vlib_cli_output (vm, "total tcp sessions: %u", tcp_sessions); + vlib_cli_output (vm, "total tcp established sessions: %u", established); + vlib_cli_output (vm, "total tcp transitory sessions: %u", transitory); + vlib_cli_output (vm, "total tcp transitory (WAIT-CLOSED) sessions: %u", + transitory_wait_closed); + vlib_cli_output (vm, "total tcp transitory (CLOSED) sessions: %u", + transitory_closed); + vlib_cli_output (vm, "total udp sessions: %u", udp_sessions); + vlib_cli_output (vm, "total icmp sessions: %u", icmp_sessions); + return 0; +} + +static clib_error_t * +nat44_show_addresses_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + snat_address_t *ap; + + vlib_cli_output (vm, "NAT44 pool addresses:"); + vec_foreach (ap, sm->addresses) + { + vlib_cli_output (vm, "%U", format_ip4_address, &ap->addr); + if (ap->fib_index != ~0) + vlib_cli_output (vm, " tenant VRF: %u", + fib_table_get(ap->fib_index, FIB_PROTOCOL_IP4)->ft_table_id); + else + vlib_cli_output (vm, " tenant VRF independent"); + #define _(N, i, n, s) \ + vlib_cli_output (vm, " %d busy %s ports", ap->busy_##n##_ports, s); + foreach_nat_protocol + #undef _ + } + vlib_cli_output (vm, "NAT44 twice-nat pool addresses:"); + vec_foreach (ap, sm->twice_nat_addresses) + { + vlib_cli_output (vm, "%U", format_ip4_address, &ap->addr); + if (ap->fib_index != ~0) + vlib_cli_output (vm, " tenant VRF: %u", + fib_table_get(ap->fib_index, FIB_PROTOCOL_IP4)->ft_table_id); + else + vlib_cli_output (vm, " tenant VRF independent"); + #define _(N, i, n, s) \ + vlib_cli_output (vm, " %d busy %s ports", ap->busy_##n##_ports, s); + foreach_nat_protocol + #undef _ + } + return 0; +} + +static clib_error_t * +snat_feature_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + u32 *inside_sw_if_indices = 0; + u32 *outside_sw_if_indices = 0; + u8 is_output_feature = 0; + int is_del = 0; + int i; + + sw_if_index = ~0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "in %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + vec_add1 (inside_sw_if_indices, sw_if_index); + else if (unformat (line_input, "out %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + vec_add1 (outside_sw_if_indices, sw_if_index); + else if (unformat (line_input, "output-feature")) + is_output_feature = 1; + else if (unformat (line_input, "del")) + is_del = 1; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (vec_len (inside_sw_if_indices)) + { + for (i = 0; i < vec_len (inside_sw_if_indices); i++) + { + sw_if_index = inside_sw_if_indices[i]; + if (is_output_feature) + { + if (snat_interface_add_del_output_feature + (sw_if_index, 1, is_del)) + { + error = clib_error_return (0, "%s %U failed", + is_del ? "del" : "add", + format_vnet_sw_if_index_name, + vnm, sw_if_index); + goto done; + } + } + else + { + if (snat_interface_add_del (sw_if_index, 1, is_del)) + { + error = clib_error_return (0, "%s %U failed", + is_del ? "del" : "add", + format_vnet_sw_if_index_name, + vnm, sw_if_index); + goto done; + } + } + } + } + + if (vec_len (outside_sw_if_indices)) + { + for (i = 0; i < vec_len (outside_sw_if_indices); i++) + { + sw_if_index = outside_sw_if_indices[i]; + if (is_output_feature) + { + if (snat_interface_add_del_output_feature + (sw_if_index, 0, is_del)) + { + error = clib_error_return (0, "%s %U failed", + is_del ? "del" : "add", + format_vnet_sw_if_index_name, + vnm, sw_if_index); + goto done; + } + } + else + { + if (snat_interface_add_del (sw_if_index, 0, is_del)) + { + error = clib_error_return (0, "%s %U failed", + is_del ? "del" : "add", + format_vnet_sw_if_index_name, + vnm, sw_if_index); + goto done; + } + } + } + } + +done: + unformat_free (line_input); + vec_free (inside_sw_if_indices); + vec_free (outside_sw_if_indices); + + return error; +} + +static clib_error_t * +nat44_show_interfaces_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + snat_interface_t *i; + vnet_main_t *vnm = vnet_get_main (); + + vlib_cli_output (vm, "NAT44 interfaces:"); + pool_foreach (i, sm->interfaces) + { + vlib_cli_output (vm, " %U %s", format_vnet_sw_if_index_name, vnm, + i->sw_if_index, + (nat_interface_is_inside(i) && + nat_interface_is_outside(i)) ? "in out" : + (nat_interface_is_inside(i) ? "in" : "out")); + } + + pool_foreach (i, sm->output_feature_interfaces) + { + vlib_cli_output (vm, " %U output-feature %s", + format_vnet_sw_if_index_name, vnm, + i->sw_if_index, + (nat_interface_is_inside(i) && + nat_interface_is_outside(i)) ? "in out" : + (nat_interface_is_inside(i) ? "in" : "out")); + } + + return 0; +} + +static clib_error_t * +add_static_mapping_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + ip4_address_t l_addr, e_addr, exact_addr; + u32 l_port = 0, e_port = 0, vrf_id = ~0; + int is_add = 1, addr_only = 1, rv, exact = 0; + u32 sw_if_index = ~0; + vnet_main_t *vnm = vnet_get_main (); + nat_protocol_t proto = NAT_PROTOCOL_OTHER; + u8 proto_set = 0; + twice_nat_type_t twice_nat = TWICE_NAT_DISABLED; + u8 out2in_only = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "local %U %u", unformat_ip4_address, &l_addr, + &l_port)) + addr_only = 0; + else + if (unformat (line_input, "local %U", unformat_ip4_address, &l_addr)) + ; + else if (unformat (line_input, "external %U %u", unformat_ip4_address, + &e_addr, &e_port)) + addr_only = 0; + else if (unformat (line_input, "external %U", unformat_ip4_address, + &e_addr)) + ; + else if (unformat (line_input, "external %U %u", + unformat_vnet_sw_interface, vnm, &sw_if_index, + &e_port)) + addr_only = 0; + else if (unformat (line_input, "external %U", + unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else if (unformat (line_input, "exact %U", unformat_ip4_address, + &exact_addr)) + exact = 1; + else if (unformat (line_input, "vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "%U", unformat_nat_protocol, &proto)) + proto_set = 1; + else if (unformat (line_input, "twice-nat")) + twice_nat = TWICE_NAT; + else if (unformat (line_input, "self-twice-nat")) + twice_nat = TWICE_NAT_SELF; + else if (unformat (line_input, "out2in-only")) + out2in_only = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (twice_nat && addr_only) + { + error = clib_error_return (0, "twice NAT only for 1:1 NAPT"); + goto done; + } + + if (addr_only) + { + if (proto_set) + { + error = + clib_error_return (0, + "address only mapping doesn't support protocol"); + goto done; + } + } + else if (!proto_set) + { + error = clib_error_return (0, "protocol is required"); + goto done; + } + + rv = snat_add_static_mapping ( + l_addr, e_addr, clib_host_to_net_u16 (l_port), + clib_host_to_net_u16 (e_port), vrf_id, addr_only, sw_if_index, proto, + is_add, twice_nat, out2in_only, 0, 0, exact_addr, exact); + + switch (rv) + { + case VNET_API_ERROR_INVALID_VALUE: + error = clib_error_return (0, "External port already in use."); + goto done; + case VNET_API_ERROR_NO_SUCH_ENTRY: + if (is_add) + error = clib_error_return (0, "External address must be allocated."); + else + error = clib_error_return (0, "Mapping not exist."); + goto done; + case VNET_API_ERROR_NO_SUCH_FIB: + error = clib_error_return (0, "No such VRF id."); + goto done; + case VNET_API_ERROR_VALUE_EXIST: + error = clib_error_return (0, "Mapping already exist."); + goto done; + default: + break; + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +add_identity_mapping_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + ip4_address_t addr, pool_addr = { 0 }; + u32 port = 0, vrf_id = ~0; + int is_add = 1; + int addr_only = 1; + u32 sw_if_index = ~0; + vnet_main_t *vnm = vnet_get_main (); + int rv; + nat_protocol_t proto; + + addr.as_u32 = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_ip4_address, &addr)) + ; + else if (unformat (line_input, "external %U", + unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else if (unformat (line_input, "vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "%U %u", unformat_nat_protocol, &proto, + &port)) + addr_only = 0; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + rv = snat_add_static_mapping ( + addr, addr, clib_host_to_net_u16 (port), clib_host_to_net_u16 (port), + vrf_id, addr_only, sw_if_index, proto, is_add, 0, 0, 0, 1, pool_addr, 0); + + switch (rv) + { + case VNET_API_ERROR_INVALID_VALUE: + error = clib_error_return (0, "External port already in use."); + goto done; + case VNET_API_ERROR_NO_SUCH_ENTRY: + if (is_add) + error = clib_error_return (0, "External address must be allocated."); + else + error = clib_error_return (0, "Mapping not exist."); + goto done; + case VNET_API_ERROR_NO_SUCH_FIB: + error = clib_error_return (0, "No such VRF id."); + goto done; + case VNET_API_ERROR_VALUE_EXIST: + error = clib_error_return (0, "Mapping already exist."); + goto done; + default: + break; + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +add_lb_static_mapping_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + ip4_address_t l_addr, e_addr; + u32 l_port = 0, e_port = 0, vrf_id = 0, probability = 0, affinity = 0; + int is_add = 1; + int rv; + nat_protocol_t proto; + u8 proto_set = 0; + nat44_lb_addr_port_t *locals = 0, local; + twice_nat_type_t twice_nat = TWICE_NAT_DISABLED; + u8 out2in_only = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "local %U:%u probability %u", + unformat_ip4_address, &l_addr, &l_port, &probability)) + { + clib_memset (&local, 0, sizeof (local)); + local.addr = l_addr; + local.port = (u16) l_port; + local.probability = (u8) probability; + vec_add1 (locals, local); + } + else if (unformat (line_input, "local %U:%u vrf %u probability %u", + unformat_ip4_address, &l_addr, &l_port, &vrf_id, + &probability)) + { + clib_memset (&local, 0, sizeof (local)); + local.addr = l_addr; + local.port = (u16) l_port; + local.probability = (u8) probability; + local.vrf_id = vrf_id; + vec_add1 (locals, local); + } + else if (unformat (line_input, "external %U:%u", unformat_ip4_address, + &e_addr, &e_port)) + ; + else if (unformat (line_input, "protocol %U", unformat_nat_protocol, + &proto)) + proto_set = 1; + else if (unformat (line_input, "twice-nat")) + twice_nat = TWICE_NAT; + else if (unformat (line_input, "self-twice-nat")) + twice_nat = TWICE_NAT_SELF; + else if (unformat (line_input, "out2in-only")) + out2in_only = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "affinity %u", &affinity)) + ; + else + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (vec_len (locals) < 2) + { + error = clib_error_return (0, "at least two local must be set"); + goto done; + } + + if (!proto_set) + { + error = clib_error_return (0, "missing protocol"); + goto done; + } + + rv = nat44_add_del_lb_static_mapping (e_addr, (u16) e_port, proto, locals, + is_add, twice_nat, out2in_only, 0, + affinity); + + switch (rv) + { + case VNET_API_ERROR_INVALID_VALUE: + error = clib_error_return (0, "External port already in use."); + goto done; + case VNET_API_ERROR_NO_SUCH_ENTRY: + if (is_add) + error = clib_error_return (0, "External address must be allocated."); + else + error = clib_error_return (0, "Mapping not exist."); + goto done; + case VNET_API_ERROR_VALUE_EXIST: + error = clib_error_return (0, "Mapping already exist."); + goto done; + default: + break; + } + +done: + unformat_free (line_input); + vec_free (locals); + + return error; +} + +static clib_error_t * +add_lb_backend_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + ip4_address_t l_addr, e_addr; + u32 l_port = 0, e_port = 0, vrf_id = 0, probability = 0; + int is_add = 1; + int rv; + nat_protocol_t proto; + u8 proto_set = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "local %U:%u probability %u", + unformat_ip4_address, &l_addr, &l_port, &probability)) + ; + else if (unformat (line_input, "local %U:%u vrf %u probability %u", + unformat_ip4_address, &l_addr, &l_port, &vrf_id, + &probability)) + ; + else if (unformat (line_input, "external %U:%u", unformat_ip4_address, + &e_addr, &e_port)) + ; + else if (unformat (line_input, "protocol %U", unformat_nat_protocol, + &proto)) + proto_set = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (!l_port || !e_port) + { + error = clib_error_return (0, "local or external must be set"); + goto done; + } + + if (!proto_set) + { + error = clib_error_return (0, "missing protocol"); + goto done; + } + + rv = + nat44_lb_static_mapping_add_del_local (e_addr, (u16) e_port, l_addr, + l_port, proto, vrf_id, probability, + is_add); + + switch (rv) + { + case VNET_API_ERROR_INVALID_VALUE: + error = clib_error_return (0, "External is not load-balancing static " + "mapping."); + goto done; + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = clib_error_return (0, "Mapping or back-end not exist."); + goto done; + case VNET_API_ERROR_VALUE_EXIST: + error = clib_error_return (0, "Back-end already exist."); + goto done; + case VNET_API_ERROR_UNSPECIFIED: + error = clib_error_return (0, "At least two back-ends must remain"); + goto done; + default: + break; + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +nat44_show_static_mappings_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + snat_static_mapping_t *m; + snat_static_map_resolve_t *rp; + + vlib_cli_output (vm, "NAT44 static mappings:"); + pool_foreach (m, sm->static_mappings) + { + vlib_cli_output (vm, " %U", format_snat_static_mapping, m); + } + vec_foreach (rp, sm->to_resolve) + vlib_cli_output (vm, " %U", format_snat_static_map_to_resolve, rp); + + return 0; +} + +static clib_error_t * +snat_add_interface_address_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + u32 sw_if_index; + int rv; + int is_del = 0; + clib_error_t *error = 0; + u8 twice_nat = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_vnet_sw_interface, + sm->vnet_main, &sw_if_index)) + ; + else if (unformat (line_input, "twice-nat")) + twice_nat = 1; + else if (unformat (line_input, "del")) + is_del = 1; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + rv = snat_add_interface_address (sm, sw_if_index, is_del, twice_nat); + + switch (rv) + { + case 0: + break; + + default: + error = clib_error_return (0, "snat_add_interface_address returned %d", + rv); + goto done; + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +nat44_show_interface_address_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + vnet_main_t *vnm = vnet_get_main (); + u32 *sw_if_index; + + vlib_cli_output (vm, "NAT44 pool address interfaces:"); + vec_foreach (sw_if_index, sm->auto_add_sw_if_indices) + { + vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, vnm, + *sw_if_index); + } + vlib_cli_output (vm, "NAT44 twice-nat pool address interfaces:"); + vec_foreach (sw_if_index, sm->auto_add_sw_if_indices_twice_nat) + { + vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, vnm, + *sw_if_index); + } + + return 0; +} + +static clib_error_t * +nat44_show_sessions_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + snat_main_per_thread_data_t *tsm; + snat_main_t *sm = &snat_main; + + int i = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + goto print; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + break; + } + unformat_free (line_input); + +print: + vlib_cli_output (vm, "NAT44 ED sessions:"); + + vec_foreach_index (i, sm->per_thread_data) + { + tsm = vec_elt_at_index (sm->per_thread_data, i); + + vlib_cli_output (vm, "-------- thread %d %s: %d sessions --------\n", + i, vlib_worker_threads[i].name, + pool_elts (tsm->sessions)); + + snat_session_t *s; + pool_foreach (s, tsm->sessions) + { + vlib_cli_output (vm, " %U\n", format_snat_session, tsm, s); + } + } + return error; +} + +static clib_error_t * +nat44_set_session_limit_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + + u32 session_limit = 0, vrf_id = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%u", &session_limit)) + ; + else if (unformat (line_input, "vrf %u", &vrf_id)) + ; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (!session_limit) + error = clib_error_return (0, "missing value of session limit"); + else if (nat44_update_session_limit (session_limit, vrf_id)) + error = clib_error_return (0, "nat44_set_session_limit failed"); + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +nat44_del_session_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + u32 port = 0, eh_port = 0, vrf_id = sm->outside_vrf_id; + clib_error_t *error = 0; + ip4_address_t addr, eh_addr; + nat_protocol_t proto; + int is_in = 0; + int rv; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U:%u %U", unformat_ip4_address, &addr, &port, + unformat_nat_protocol, &proto)) + ; + else if (unformat (line_input, "in")) + { + is_in = 1; + vrf_id = sm->inside_vrf_id; + } + else if (unformat (line_input, "out")) + { + is_in = 0; + vrf_id = sm->outside_vrf_id; + } + else if (unformat (line_input, "vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "external-host %U:%u", + unformat_ip4_address, &eh_addr, &eh_port)) + ; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + rv = + nat44_del_ed_session (sm, &addr, clib_host_to_net_u16 (port), &eh_addr, + clib_host_to_net_u16 (eh_port), + nat_proto_to_ip_proto (proto), vrf_id, is_in); + + switch (rv) + { + case 0: + break; + + default: + error = clib_error_return (0, "nat44_del_session returned %d", rv); + goto done; + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +snat_forwarding_set_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + u8 forwarding_enable; + u8 forwarding_enable_set = 0; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "'enable' or 'disable' expected"); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (!forwarding_enable_set && unformat (line_input, "enable")) + { + forwarding_enable = 1; + forwarding_enable_set = 1; + } + else if (!forwarding_enable_set && unformat (line_input, "disable")) + { + forwarding_enable = 0; + forwarding_enable_set = 1; + } + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (!forwarding_enable_set) + { + error = clib_error_return (0, "'enable' or 'disable' expected"); + goto done; + } + + sm->forwarding_enabled = forwarding_enable; + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +set_timeout_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "udp %u", &sm->timeouts.udp)); + else if (unformat (line_input, "tcp-established %u", + &sm->timeouts.tcp.established)); + else if (unformat (line_input, "tcp-transitory %u", + &sm->timeouts.tcp.transitory)); + else if (unformat (line_input, "icmp %u", &sm->timeouts.icmp)); + else if (unformat (line_input, "reset")) + nat_reset_timeouts (&sm->timeouts); + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } +done: + unformat_free (line_input); + return error; +} + +static clib_error_t * +nat_show_timeouts_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + + vlib_cli_output (vm, "udp timeout: %dsec", sm->timeouts.udp); + vlib_cli_output (vm, "tcp-established timeout: %dsec", + sm->timeouts.tcp.established); + vlib_cli_output (vm, "tcp-transitory timeout: %dsec", + sm->timeouts.tcp.transitory); + vlib_cli_output (vm, "icmp timeout: %dsec", sm->timeouts.icmp); + + return 0; +} + +static clib_error_t * +set_frame_queue_nelts_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + u32 frame_queue_nelts = 0; + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%u", &frame_queue_nelts)) + ; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + if (!frame_queue_nelts) + { + error = clib_error_return (0, "frame_queue_nelts cannot be zero"); + goto done; + } + if (snat_set_frame_queue_nelts (frame_queue_nelts) != 0) + { + error = clib_error_return (0, "snat_set_frame_queue_nelts failed"); + goto done; + } +done: + unformat_free (line_input); + return error; +} + +/*? + * @cliexpar + * @cliexstart{nat44 enable} + * Enable nat44 plugin + * To enable nat44, use: + * vpp# nat44 enable sessions + * To enable nat44 static mapping only, use: + * vpp# nat44 enable sessions static-mapping + * To enable nat44 static mapping with connection tracking, use: + * vpp# nat44 enable sessions static-mapping connection-tracking + * To set inside-vrf outside-vrf, use: + * vpp# nat44 enable sessions inside-vrf outside-vrf + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat44_enable_command, static) = { + .path = "nat44 enable", + .short_help = + "nat44 enable sessions [static-mappig-only " + "[connection-tracking]] [inside-vrf ] [outside-vrf ]", + .function = nat44_enable_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{nat44 disable} + * Disable nat44 plugin + * To disable nat44, use: + * vpp# nat44 disable + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat44_disable_command, static) = { + .path = "nat44 disable", + .short_help = "nat44 disable", + .function = nat44_disable_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{set snat workers} + * Set NAT workers if 2 or more workers available, use: + * vpp# set snat workers 0-2,5 + * @cliexend +?*/ +VLIB_CLI_COMMAND (set_workers_command, static) = { + .path = "set nat workers", + .function = set_workers_command_fn, + .short_help = "set nat workers ", +}; + +/*? + * @cliexpar + * @cliexstart{show nat workers} + * Show NAT workers. + * vpp# show nat workers: + * 2 workers + * vpp_wk_0 + * vpp_wk_1 + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat_show_workers_command, static) = { + .path = "show nat workers", + .short_help = "show nat workers", + .function = nat_show_workers_commnad_fn, +}; + +/*? + * @cliexpar + * @cliexstart{set nat timeout} + * Set values of timeouts for NAT sessions (in seconds), use: + * vpp# set nat timeout udp 120 tcp-established 7500 tcp-transitory 250 icmp 90 + * To reset default values use: + * vpp# set nat timeout reset + * @cliexend +?*/ +VLIB_CLI_COMMAND (set_timeout_command, static) = { + .path = "set nat timeout", + .function = set_timeout_command_fn, + .short_help = + "set nat timeout [udp | tcp-established " + "tcp-transitory | icmp | reset]", +}; + +/*? + * @cliexpar + * @cliexstart{show nat timeouts} + * Show values of timeouts for NAT sessions. + * vpp# show nat timeouts + * udp timeout: 300sec + * tcp-established timeout: 7440sec + * tcp-transitory timeout: 240sec + * icmp timeout: 60sec + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat_show_timeouts_command, static) = { + .path = "show nat timeouts", + .short_help = "show nat timeouts", + .function = nat_show_timeouts_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{set nat frame-queue-nelts} + * Set number of worker handoff frame queue elements. + * @cliexend +?*/ +VLIB_CLI_COMMAND (set_frame_queue_nelts_command, static) = { + .path = "set nat frame-queue-nelts", + .function = set_frame_queue_nelts_command_fn, + .short_help = "set nat frame-queue-nelts ", +}; + +/*? + * @cliexpar + * @cliexstart{nat set logging level} + * To set NAT logging level use: + * Set nat logging level + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_set_log_level_command, static) = { + .path = "nat set logging level", + .function = snat_set_log_level_command_fn, + .short_help = "nat set logging level ", +}; + +/*? + * @cliexpar + * @cliexstart{snat ipfix logging} + * To enable NAT IPFIX logging use: + * vpp# nat ipfix logging + * To set IPFIX exporter use: + * vpp# set ipfix exporter collector 10.10.10.3 src 10.10.10.1 + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_ipfix_logging_enable_disable_command, static) = { + .path = "nat ipfix logging", + .function = snat_ipfix_logging_enable_disable_command_fn, + .short_help = "nat ipfix logging [domain ] [src-port ] [disable]", +}; + +/*? + * @cliexpar + * @cliexstart{nat mss-clamping} + * Set TCP MSS rewriting configuration + * To enable TCP MSS rewriting use: + * vpp# nat mss-clamping 1452 + * To disbale TCP MSS rewriting use: + * vpp# nat mss-clamping disable + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat_set_mss_clamping_command, static) = { + .path = "nat mss-clamping", + .short_help = "nat mss-clamping |disable", + .function = nat_set_mss_clamping_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{show nat mss-clamping} + * Show TCP MSS rewriting configuration + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat_show_mss_clamping_command, static) = { + .path = "show nat mss-clamping", + .short_help = "show nat mss-clamping", + .function = nat_show_mss_clamping_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{show nat44 hash tables} + * Show NAT44 hash tables + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat44_show_hash, static) = { + .path = "show nat44 hash tables", + .short_help = "show nat44 hash tables [detail|verbose]", + .function = nat44_show_hash_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{nat44 add address} + * Add/delete NAT44 pool address. + * To add NAT44 pool address use: + * vpp# nat44 add address 172.16.1.3 + * vpp# nat44 add address 172.16.2.2 - 172.16.2.24 + * To add NAT44 pool address for specific tenant (identified by VRF id) use: + * vpp# nat44 add address 172.16.1.3 tenant-vrf 10 + * @cliexend +?*/ +VLIB_CLI_COMMAND (add_address_command, static) = { + .path = "nat44 add address", + .short_help = "nat44 add address [- ] " + "[tenant-vrf ] [twice-nat] [del]", + .function = add_address_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{show nat44 summary} + * Show NAT44 summary + * vpp# show nat44 summary + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat44_show_summary_command, static) = { + .path = "show nat44 summary", + .short_help = "show nat44 summary", + .function = nat44_show_summary_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{show nat44 addresses} + * Show NAT44 pool addresses. + * vpp# show nat44 addresses + * NAT44 pool addresses: + * 172.16.2.2 + * tenant VRF independent + * 10 busy udp ports + * 0 busy tcp ports + * 0 busy icmp ports + * 172.16.1.3 + * tenant VRF: 10 + * 0 busy udp ports + * 2 busy tcp ports + * 0 busy icmp ports + * NAT44 twice-nat pool addresses: + * 10.20.30.72 + * tenant VRF independent + * 0 busy udp ports + * 0 busy tcp ports + * 0 busy icmp ports + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat44_show_addresses_command, static) = { + .path = "show nat44 addresses", + .short_help = "show nat44 addresses", + .function = nat44_show_addresses_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{set interface nat44} + * Enable/disable NAT44 feature on the interface. + * To enable NAT44 feature with local network interface use: + * vpp# set interface nat44 in GigabitEthernet0/8/0 + * To enable NAT44 feature with external network interface use: + * vpp# set interface nat44 out GigabitEthernet0/a/0 + * @cliexend +?*/ +VLIB_CLI_COMMAND (set_interface_snat_command, static) = { + .path = "set interface nat44", + .function = snat_feature_command_fn, + .short_help = "set interface nat44 in out [output-feature] " + "[del]", +}; + +/*? + * @cliexpar + * @cliexstart{show nat44 interfaces} + * Show interfaces with NAT44 feature. + * vpp# show nat44 interfaces + * NAT44 interfaces: + * GigabitEthernet0/8/0 in + * GigabitEthernet0/a/0 out + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat44_show_interfaces_command, static) = { + .path = "show nat44 interfaces", + .short_help = "show nat44 interfaces", + .function = nat44_show_interfaces_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{nat44 add static mapping} + * Static mapping allows hosts on the external network to initiate connection + * to to the local network host. + * To create static mapping between local host address 10.0.0.3 port 6303 and + * external address 4.4.4.4 port 3606 for TCP protocol use: + * vpp# nat44 add static mapping tcp local 10.0.0.3 6303 external 4.4.4.4 3606 + * If not runnig "static mapping only" NAT plugin mode use before: + * vpp# nat44 add address 4.4.4.4 + * To create address only static mapping between local and external address use: + * vpp# nat44 add static mapping local 10.0.0.3 external 4.4.4.4 + * To create ICMP static mapping between local and external with ICMP echo + * identifier 10 use: + * vpp# nat44 add static mapping icmp local 10.0.0.3 10 external 4.4.4.4 10 + * To force use of specific pool address, vrf independent + * vpp# nat44 add static mapping local 10.0.0.2 1234 external 10.0.2.2 1234 twice-nat exact 10.0.1.2 + * @cliexend +?*/ +VLIB_CLI_COMMAND (add_static_mapping_command, static) = { + .path = "nat44 add static mapping", + .function = add_static_mapping_command_fn, + .short_help = + "nat44 add static mapping tcp|udp|icmp local [] " + "external [] [vrf ] [twice-nat|self-twice-nat] " + "[out2in-only] [exact ] [del]", +}; + +/*? + * @cliexpar + * @cliexstart{nat44 add identity mapping} + * Identity mapping translate an IP address to itself. + * To create identity mapping for address 10.0.0.3 port 6303 for TCP protocol + * use: + * vpp# nat44 add identity mapping 10.0.0.3 tcp 6303 + * To create identity mapping for address 10.0.0.3 use: + * vpp# nat44 add identity mapping 10.0.0.3 + * To create identity mapping for DHCP addressed interface use: + * vpp# nat44 add identity mapping external GigabitEthernet0/a/0 tcp 3606 + * @cliexend +?*/ +VLIB_CLI_COMMAND (add_identity_mapping_command, static) = { + .path = "nat44 add identity mapping", + .function = add_identity_mapping_command_fn, + .short_help = "nat44 add identity mapping |external " + "[ ] [vrf ] [del]", +}; + +/*? + * @cliexpar + * @cliexstart{nat44 add load-balancing static mapping} + * Service load balancing using NAT44 + * To add static mapping with load balancing for service with external IP + * address 1.2.3.4 and TCP port 80 and mapped to 2 local servers + * 10.100.10.10:8080 and 10.100.10.20:8080 with probability 80% resp. 20% use: + * vpp# nat44 add load-balancing static mapping protocol tcp external 1.2.3.4:80 local 10.100.10.10:8080 probability 80 local 10.100.10.20:8080 probability 20 + * @cliexend +?*/ +VLIB_CLI_COMMAND (add_lb_static_mapping_command, static) = { + .path = "nat44 add load-balancing static mapping", + .function = add_lb_static_mapping_command_fn, + .short_help = + "nat44 add load-balancing static mapping protocol tcp|udp " + "external : local : [vrf ] " + "probability [twice-nat|self-twice-nat] [out2in-only] " + "[affinity ] [del]", +}; + +/*? + * @cliexpar + * @cliexstart{nat44 add load-balancing static mapping} + * Modify service load balancing using NAT44 + * To add new back-end server 10.100.10.30:8080 for service load balancing + * static mapping with external IP address 1.2.3.4 and TCP port 80 use: + * vpp# nat44 add load-balancing back-end protocol tcp external 1.2.3.4:80 local 10.100.10.30:8080 probability 25 + * @cliexend +?*/ +VLIB_CLI_COMMAND (add_lb_backend_command, static) = { + .path = "nat44 add load-balancing back-end", + .function = add_lb_backend_command_fn, + .short_help = + "nat44 add load-balancing back-end protocol tcp|udp " + "external : local : [vrf ] " + "probability [del]", +}; + +/*? + * @cliexpar + * @cliexstart{show nat44 static mappings} + * Show NAT44 static mappings. + * vpp# show nat44 static mappings + * NAT44 static mappings: + * local 10.0.0.3 external 4.4.4.4 vrf 0 + * tcp local 192.168.0.4:6303 external 4.4.4.3:3606 vrf 0 + * tcp vrf 0 external 1.2.3.4:80 out2in-only + * local 10.100.10.10:8080 probability 80 + * local 10.100.10.20:8080 probability 20 + * tcp local 10.100.3.8:8080 external 169.10.10.1:80 vrf 0 twice-nat + * tcp local 10.0.0.10:3603 external GigabitEthernet0/a/0:6306 vrf 10 + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat44_show_static_mappings_command, static) = { + .path = "show nat44 static mappings", + .short_help = "show nat44 static mappings", + .function = nat44_show_static_mappings_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{nat44 add interface address} + * Use NAT44 pool address from specific interfce + * To add NAT44 pool address from specific interface use: + * vpp# nat44 add interface address GigabitEthernet0/8/0 + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_add_interface_address_command, static) = { + .path = "nat44 add interface address", + .short_help = "nat44 add interface address [twice-nat] [del]", + .function = snat_add_interface_address_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{show nat44 interface address} + * Show NAT44 pool address interfaces + * vpp# show nat44 interface address + * NAT44 pool address interfaces: + * GigabitEthernet0/a/0 + * NAT44 twice-nat pool address interfaces: + * GigabitEthernet0/8/0 + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat44_show_interface_address_command, static) = { + .path = "show nat44 interface address", + .short_help = "show nat44 interface address", + .function = nat44_show_interface_address_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{show nat44 sessions} + * Show NAT44 sessions. + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat44_show_sessions_command, static) = { + .path = "show nat44 sessions", + .short_help = "show nat44 sessions [detail|metrics]", + .function = nat44_show_sessions_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{set nat44 session limit} + * Set NAT44 session limit. + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat44_set_session_limit_command, static) = { + .path = "set nat44 session limit", + .short_help = "set nat44 session limit [vrf ]", + .function = nat44_set_session_limit_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{nat44 del session} + * To administratively delete NAT44 session by inside address and port use: + * vpp# nat44 del session in 10.0.0.3:6303 tcp + * To administratively delete NAT44 session by outside address and port use: + * vpp# nat44 del session out 1.0.0.3:6033 udp + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat44_del_session_command, static) = { + .path = "nat44 del session", + .short_help = "nat44 del session in|out : tcp|udp|icmp [vrf ] [external-host :]", + .function = nat44_del_session_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{nat44 forwarding} + * Enable or disable forwarding + * Forward packets which don't match existing translation + * or static mapping instead of dropping them. + * To enable forwarding, use: + * vpp# nat44 forwarding enable + * To disable forwarding, use: + * vpp# nat44 forwarding disable + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_forwarding_set_command, static) = { + .path = "nat44 forwarding", + .short_help = "nat44 forwarding enable|disable", + .function = snat_forwarding_set_command_fn, +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ed/nat44_ed_format.c b/src/plugins/nat/nat44-ed/nat44_ed_format.c new file mode 100644 index 00000000000..29fd1129f32 --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed_format.c @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief NAT formatting + */ + +#include +#include + +uword +unformat_nat_protocol (unformat_input_t * input, va_list * args) +{ + u32 *r = va_arg (*args, u32 *); + + if (0); +#define _(N, i, n, s) else if (unformat (input, s)) *r = NAT_PROTOCOL_##N; + foreach_nat_protocol +#undef _ + else + return 0; + return 1; +} + +u8 * +format_nat_protocol (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u8 *t = 0; + + switch (i) + { +#define _(N, j, n, str) case NAT_PROTOCOL_##N: t = (u8 *) str; break; + foreach_nat_protocol +#undef _ + default: + s = format (s, "unknown"); + return s; + } + s = format (s, "%s", t); + return s; +} + +u8 * +format_nat_addr_and_port_alloc_alg (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u8 *t = 0; + + switch (i) + { +#define _(v, N, s) case NAT_ADDR_AND_PORT_ALLOC_ALG_##N: t = (u8 *) s; break; + foreach_nat_addr_and_port_alloc_alg +#undef _ + default: + s = format (s, "unknown"); + return s; + } + s = format (s, "%s", t); + return s; +} + +u8 * +format_snat_key (u8 * s, va_list * args) +{ + u64 key = va_arg (*args, u64); + + ip4_address_t addr; + u16 port; + nat_protocol_t protocol; + u32 fib_index; + + split_nat_key (key, &addr, &port, &fib_index, &protocol); + + s = format (s, "%U proto %U port %d fib %d", + format_ip4_address, &addr, + format_nat_protocol, protocol, + clib_net_to_host_u16 (port), fib_index); + return s; +} + +u8 * +format_snat_session_state (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u8 *t = 0; + + switch (i) + { +#define _(v, N, str) case SNAT_SESSION_##N: t = (u8 *) str; break; + foreach_snat_session_state +#undef _ + default: + t = format (t, "unknown"); + } + s = format (s, "%s", t); + return s; +} + +u8 * +format_snat_session (u8 * s, va_list * args) +{ + snat_main_per_thread_data_t *tsm = + va_arg (*args, snat_main_per_thread_data_t *); + snat_session_t *sess = va_arg (*args, snat_session_t *); + + if (snat_is_unk_proto_session (sess)) + { + s = format (s, " i2o %U proto %u fib %u\n", + format_ip4_address, &sess->in2out.addr, + sess->in2out.port, sess->in2out.fib_index); + s = + format (s, " o2i %U proto %u fib %u\n", format_ip4_address, + &sess->out2in.addr, sess->out2in.port, sess->out2in.fib_index); + } + else + { + s = format (s, " i2o %U proto %U port %d fib %d\n", + format_ip4_address, &sess->in2out.addr, + format_nat_protocol, sess->nat_proto, + clib_net_to_host_u16 (sess->in2out.port), + sess->in2out.fib_index); + s = format (s, " o2i %U proto %U port %d fib %d\n", + format_ip4_address, &sess->out2in.addr, format_nat_protocol, + sess->nat_proto, clib_net_to_host_u16 (sess->out2in.port), + sess->out2in.fib_index); + } + if (is_ed_session (sess) || is_fwd_bypass_session (sess)) + { + if (is_twice_nat_session (sess)) + { + s = format (s, " external host o2i %U:%d i2o %U:%d\n", + format_ip4_address, &sess->ext_host_addr, + clib_net_to_host_u16 (sess->ext_host_port), + format_ip4_address, &sess->ext_host_nat_addr, + clib_net_to_host_u16 (sess->ext_host_nat_port)); + } + else + { + if (sess->ext_host_addr.as_u32) + s = format (s, " external host %U:%u\n", + format_ip4_address, &sess->ext_host_addr, + clib_net_to_host_u16 (sess->ext_host_port)); + } + s = format (s, " i2o flow: %U\n", format_nat_6t_flow, &sess->i2o); + s = format (s, " o2i flow: %U\n", format_nat_6t_flow, &sess->o2i); + } + s = format (s, " index %llu\n", sess - tsm->sessions); + s = format (s, " last heard %.2f\n", sess->last_heard); + s = format (s, " total pkts %d, total bytes %lld\n", + sess->total_pkts, sess->total_bytes); + if (snat_is_session_static (sess)) + s = format (s, " static translation\n"); + else + s = format (s, " dynamic translation\n"); + if (is_fwd_bypass_session (sess)) + s = format (s, " forwarding-bypass\n"); + if (is_lb_session (sess)) + s = format (s, " load-balancing\n"); + if (is_twice_nat_session (sess)) + s = format (s, " twice-nat\n"); + return s; +} + +u8 * +format_snat_static_mapping (u8 * s, va_list * args) +{ + snat_static_mapping_t *m = va_arg (*args, snat_static_mapping_t *); + nat44_lb_addr_port_t *local; + + if (is_identity_static_mapping (m)) + { + if (is_addr_only_static_mapping (m)) + s = format (s, "identity mapping %U", + format_ip4_address, &m->local_addr); + else + s = format (s, "identity mapping %U %U:%d", + format_nat_protocol, m->proto, + format_ip4_address, &m->local_addr, + clib_net_to_host_u16 (m->local_port)); + + /* *INDENT-OFF* */ + pool_foreach (local, m->locals) + { + s = format (s, " vrf %d", local->vrf_id); + } + /* *INDENT-ON* */ + + return s; + } + + if (is_addr_only_static_mapping (m)) + s = format (s, "local %U external %U vrf %d %s %s", + format_ip4_address, &m->local_addr, + format_ip4_address, &m->external_addr, + m->vrf_id, + m->twice_nat == TWICE_NAT ? "twice-nat" : + m->twice_nat == TWICE_NAT_SELF ? "self-twice-nat" : "", + is_out2in_only_static_mapping (m) ? "out2in-only" : ""); + else + { + if (is_lb_static_mapping (m)) + { + s = format (s, "%U external %U:%d %s %s", + format_nat_protocol, m->proto, + format_ip4_address, &m->external_addr, + clib_net_to_host_u16 (m->external_port), + m->twice_nat == TWICE_NAT ? "twice-nat" : + m->twice_nat == TWICE_NAT_SELF ? "self-twice-nat" : "", + is_out2in_only_static_mapping (m) ? "out2in-only" : ""); + + /* *INDENT-OFF* */ + pool_foreach (local, m->locals) + { + s = format (s, "\n local %U:%d vrf %d probability %d\%", + format_ip4_address, &local->addr, + clib_net_to_host_u16 (local->port), + local->vrf_id, local->probability); + } + /* *INDENT-ON* */ + + } + else + s = format (s, "%U local %U:%d external %U:%d vrf %d %s %s", + format_nat_protocol, m->proto, + format_ip4_address, &m->local_addr, + clib_net_to_host_u16 (m->local_port), + format_ip4_address, &m->external_addr, + clib_net_to_host_u16 (m->external_port), + m->vrf_id, + m->twice_nat == TWICE_NAT ? "twice-nat" : + m->twice_nat == TWICE_NAT_SELF ? "self-twice-nat" : "", + is_out2in_only_static_mapping (m) ? "out2in-only" : ""); + } + return s; +} + +u8 * +format_snat_static_map_to_resolve (u8 * s, va_list * args) +{ + snat_static_map_resolve_t *m = va_arg (*args, snat_static_map_resolve_t *); + vnet_main_t *vnm = vnet_get_main (); + + if (m->addr_only) + s = format (s, "local %U external %U vrf %d", + format_ip4_address, &m->l_addr, + format_vnet_sw_if_index_name, vnm, m->sw_if_index, m->vrf_id); + else + s = format (s, "%U local %U:%d external %U:%d vrf %d", + format_nat_protocol, m->proto, + format_ip4_address, &m->l_addr, + clib_net_to_host_u16 (m->l_port), + format_vnet_sw_if_index_name, vnm, m->sw_if_index, + clib_net_to_host_u16 (m->e_port), m->vrf_id); + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ed/nat44_ed_handoff.c b/src/plugins/nat/nat44-ed/nat44_ed_handoff.c new file mode 100644 index 00000000000..6715ce2f2c4 --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed_handoff.c @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief NAT44 worker handoff + */ + +#include +#include +#include +#include +#include + +#include +#include + +typedef struct +{ + u32 next_worker_index; + u32 trace_index; + u8 in2out; + u8 output; +} nat44_handoff_trace_t; + +#define foreach_nat44_handoff_error \ + _ (CONGESTION_DROP, "congestion drop") \ + _ (SAME_WORKER, "same worker") \ + _ (DO_HANDOFF, "do handoff") + +typedef enum +{ +#define _(sym, str) NAT44_HANDOFF_ERROR_##sym, + foreach_nat44_handoff_error +#undef _ + NAT44_HANDOFF_N_ERROR, +} nat44_handoff_error_t; + +static char *nat44_handoff_error_strings[] = { +#define _(sym,string) string, + foreach_nat44_handoff_error +#undef _ +}; + +static u8 * +format_nat44_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat44_handoff_trace_t *t = va_arg (*args, nat44_handoff_trace_t *); + char *tag, *output; + + tag = t->in2out ? "IN2OUT" : "OUT2IN"; + output = t->output ? "OUTPUT-FEATURE" : ""; + s = + format (s, "NAT44_%s_WORKER_HANDOFF %s: next-worker %d trace index %d", + tag, output, t->next_worker_index, t->trace_index); + + return s; +} + +static inline uword +nat44_worker_handoff_fn_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, u8 is_output, + u8 is_in2out) +{ + u32 n_enq, n_left_from, *from, do_handoff = 0, same_worker = 0; + + u16 thread_indices[VLIB_FRAME_SIZE], *ti = thread_indices; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + snat_main_t *sm = &snat_main; + + u32 fq_index, thread_index = vm->thread_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + vlib_get_buffers (vm, from, b, n_left_from); + + if (is_in2out) + { + fq_index = is_output ? sm->fq_in2out_output_index : sm->fq_in2out_index; + } + else + { + fq_index = sm->fq_out2in_index; + } + + while (n_left_from >= 4) + { + u32 arc_next0, arc_next1, arc_next2, arc_next3; + u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3; + u32 rx_fib_index0, rx_fib_index1, rx_fib_index2, rx_fib_index3; + u32 iph_offset0 = 0, iph_offset1 = 0, iph_offset2 = 0, iph_offset3 = 0; + ip4_header_t *ip0, *ip1, *ip2, *ip3; + + if (PREDICT_TRUE (n_left_from >= 8)) + { + vlib_prefetch_buffer_header (b[4], LOAD); + vlib_prefetch_buffer_header (b[5], LOAD); + vlib_prefetch_buffer_header (b[6], LOAD); + vlib_prefetch_buffer_header (b[7], LOAD); + CLIB_PREFETCH (&b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&b[6]->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&b[7]->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + if (is_output) + { + iph_offset0 = vnet_buffer (b[0])->ip.save_rewrite_length; + iph_offset1 = vnet_buffer (b[1])->ip.save_rewrite_length; + iph_offset2 = vnet_buffer (b[2])->ip.save_rewrite_length; + iph_offset3 = vnet_buffer (b[3])->ip.save_rewrite_length; + } + + ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b[0]) + + iph_offset0); + ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b[1]) + + iph_offset1); + ip2 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b[2]) + + iph_offset2); + ip3 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b[3]) + + iph_offset3); + + vnet_feature_next (&arc_next0, b[0]); + vnet_feature_next (&arc_next1, b[1]); + vnet_feature_next (&arc_next2, b[2]); + vnet_feature_next (&arc_next3, b[3]); + + vnet_buffer2 (b[0])->nat.arc_next = arc_next0; + vnet_buffer2 (b[1])->nat.arc_next = arc_next1; + vnet_buffer2 (b[2])->nat.arc_next = arc_next2; + vnet_buffer2 (b[3])->nat.arc_next = arc_next3; + + sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_RX]; + sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_RX]; + sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_RX]; + + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index1); + rx_fib_index2 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index2); + rx_fib_index3 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index3); + + if (is_in2out) + { + ti[0] = sm->worker_in2out_cb (ip0, rx_fib_index0, is_output); + ti[1] = sm->worker_in2out_cb (ip1, rx_fib_index1, is_output); + ti[2] = sm->worker_in2out_cb (ip2, rx_fib_index2, is_output); + ti[3] = sm->worker_in2out_cb (ip3, rx_fib_index3, is_output); + } + else + { + ti[0] = sm->worker_out2in_cb (b[0], ip0, rx_fib_index0, is_output); + ti[1] = sm->worker_out2in_cb (b[1], ip1, rx_fib_index1, is_output); + ti[2] = sm->worker_out2in_cb (b[2], ip2, rx_fib_index2, is_output); + ti[3] = sm->worker_out2in_cb (b[3], ip3, rx_fib_index3, is_output); + } + + if (ti[0] == thread_index) + same_worker++; + else + do_handoff++; + + if (ti[1] == thread_index) + same_worker++; + else + do_handoff++; + + if (ti[2] == thread_index) + same_worker++; + else + do_handoff++; + + if (ti[3] == thread_index) + same_worker++; + else + do_handoff++; + + b += 4; + ti += 4; + n_left_from -= 4; + } + + while (n_left_from > 0) + { + u32 arc_next0; + u32 sw_if_index0; + u32 rx_fib_index0; + u32 iph_offset0 = 0; + ip4_header_t *ip0; + + + if (is_output) + iph_offset0 = vnet_buffer (b[0])->ip.save_rewrite_length; + + ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b[0]) + + iph_offset0); + + vnet_feature_next (&arc_next0, b[0]); + vnet_buffer2 (b[0])->nat.arc_next = arc_next0; + + sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + if (is_in2out) + { + ti[0] = sm->worker_in2out_cb (ip0, rx_fib_index0, is_output); + } + else + { + ti[0] = sm->worker_out2in_cb (b[0], ip0, rx_fib_index0, is_output); + } + + if (ti[0] == thread_index) + same_worker++; + else + do_handoff++; + + b += 1; + ti += 1; + n_left_from -= 1; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + u32 i; + b = bufs; + ti = thread_indices; + + for (i = 0; i < frame->n_vectors; i++) + { + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + { + nat44_handoff_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->next_worker_index = ti[0]; + t->trace_index = vlib_buffer_get_trace_index (b[0]); + t->in2out = is_in2out; + t->output = is_output; + + b += 1; + ti += 1; + } + else + break; + } + } + + n_enq = vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices, + frame->n_vectors, 1); + + if (n_enq < frame->n_vectors) + { + vlib_node_increment_counter (vm, node->node_index, + NAT44_HANDOFF_ERROR_CONGESTION_DROP, + frame->n_vectors - n_enq); + } + + vlib_node_increment_counter (vm, node->node_index, + NAT44_HANDOFF_ERROR_SAME_WORKER, same_worker); + vlib_node_increment_counter (vm, node->node_index, + NAT44_HANDOFF_ERROR_DO_HANDOFF, do_handoff); + return frame->n_vectors; +} + +VLIB_NODE_FN (snat_in2out_worker_handoff_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return nat44_worker_handoff_fn_inline (vm, node, frame, 0, 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = { + .name = "nat44-in2out-worker-handoff", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat44_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(nat44_handoff_error_strings), + .error_strings = nat44_handoff_error_strings, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FN (snat_in2out_output_worker_handoff_node) (vlib_main_t * vm, + vlib_node_runtime_t * + node, + vlib_frame_t * frame) +{ + return nat44_worker_handoff_fn_inline (vm, node, frame, 1, 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = { + .name = "nat44-in2out-output-worker-handoff", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat44_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(nat44_handoff_error_strings), + .error_strings = nat44_handoff_error_strings, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FN (snat_out2in_worker_handoff_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return nat44_worker_handoff_fn_inline (vm, node, frame, 0, 0); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = { + .name = "nat44-out2in-worker-handoff", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat44_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(nat44_handoff_error_strings), + .error_strings = nat44_handoff_error_strings, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c new file mode 100644 index 00000000000..784dea0faf6 --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c @@ -0,0 +1,1579 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief NAT44 endpoint-dependent inside to outside network translation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +/* number of attempts to get a port for ED overloading algorithm, if rolling + * a dice this many times doesn't produce a free port, it's treated + * as if there were no free ports available to conserve resources */ +#define ED_PORT_ALLOC_ATTEMPTS (10) + +static char *nat_in2out_ed_error_strings[] = { +#define _(sym,string) string, + foreach_nat_in2out_ed_error +#undef _ +}; + +typedef struct +{ + u32 sw_if_index; + u32 next_index; + u32 session_index; + nat_translation_error_e translation_error; + nat_6t_flow_t i2of; + nat_6t_flow_t o2if; + clib_bihash_kv_16_8_t search_key; + u8 is_slow_path; + u8 translation_via_i2of; + u8 lookup_skipped; +} nat_in2out_ed_trace_t; + +static u8 * +format_nat_in2out_ed_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat_in2out_ed_trace_t *t = va_arg (*args, nat_in2out_ed_trace_t *); + char *tag; + + tag = + t->is_slow_path ? "NAT44_IN2OUT_ED_SLOW_PATH" : + "NAT44_IN2OUT_ED_FAST_PATH"; + + s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag, + t->sw_if_index, t->next_index, t->session_index); + if (~0 != t->session_index) + { + s = format (s, ", translation result '%U' via %s", + format_nat_ed_translation_error, t->translation_error, + t->translation_via_i2of ? "i2of" : "o2if"); + s = format (s, "\n i2of %U", format_nat_6t_flow, &t->i2of); + s = format (s, "\n o2if %U", format_nat_6t_flow, &t->o2if); + } + if (!t->is_slow_path) + { + if (t->lookup_skipped) + { + s = format (s, "\n lookup skipped - cached session index used"); + } + else + { + s = format (s, "\n search key %U", format_ed_session_kvp, + &t->search_key); + } + } + + return s; +} + +/** + * @brief Check if packet should be translated + * + * Packets aimed at outside interface and external address with active session + * should be translated. + * + * @param sm NAT main + * @param rt NAT runtime data + * @param sw_if_index0 index of the inside interface + * @param ip0 IPv4 header + * @param proto0 NAT protocol + * @param rx_fib_index0 RX FIB index + * + * @returns 0 if packet should be translated otherwise 1 + */ +static inline int +snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node, + u32 sw_if_index0, ip4_header_t *ip0, u32 proto0, + u32 rx_fib_index0) +{ + fib_node_index_t fei = FIB_NODE_INDEX_INVALID; + nat_outside_fib_t *outside_fib; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = 32, + .fp_addr = { + .ip4.as_u32 = ip0->dst_address.as_u32, + } + , + }; + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE ( + is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32))) + return 1; + + fei = fib_table_lookup (rx_fib_index0, &pfx); + if (FIB_NODE_INDEX_INVALID != fei) + { + u32 sw_if_index = fib_entry_get_resolving_interface (fei); + if (sw_if_index == ~0) + { + vec_foreach (outside_fib, sm->outside_fibs) + { + fei = fib_table_lookup (outside_fib->fib_index, &pfx); + if (FIB_NODE_INDEX_INVALID != fei) + { + sw_if_index = fib_entry_get_resolving_interface (fei); + if (sw_if_index != ~0) + break; + } + } + } + if (sw_if_index == ~0) + return 1; + + snat_interface_t *i; + pool_foreach (i, sm->interfaces) + { + /* NAT packet aimed at outside interface */ + if ((nat_interface_is_outside (i)) && + (sw_if_index == i->sw_if_index)) + return 0; + } + } + + return 1; +} + +static int +nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto, + u32 thread_index, ip4_address_t r_addr, u16 r_port, + u8 proto, u16 port_per_thread, + u32 snat_thread_index, snat_session_t *s, + ip4_address_t *outside_addr, u16 *outside_port) +{ + int i; + snat_address_t *a, *ga = 0; + + const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024; + + for (i = 0; i < vec_len (sm->addresses); i++) + { + a = sm->addresses + i; + switch (nat_proto) + { +#define _(N, j, n, unused) \ + case NAT_PROTOCOL_##N: \ + if (a->fib_index == rx_fib_index) \ + { \ + s->o2i.match.daddr = a->addr; \ + /* first try port suggested by caller */ \ + u16 port = clib_net_to_host_u16 (*outside_port); \ + u16 port_offset = port - port_thread_offset; \ + if (port <= port_thread_offset || \ + port > port_thread_offset + port_per_thread) \ + { \ + /* need to pick a different port, suggested port doesn't fit in \ + * this thread's port range */ \ + port_offset = snat_random_port (0, port_per_thread - 1); \ + port = port_thread_offset + port_offset; \ + } \ + u16 attempts = ED_PORT_ALLOC_ATTEMPTS; \ + do \ + { \ + if (NAT_PROTOCOL_ICMP == nat_proto) \ + { \ + s->o2i.match.sport = clib_host_to_net_u16 (port); \ + } \ + s->o2i.match.dport = clib_host_to_net_u16 (port); \ + if (0 == \ + nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2)) \ + { \ + ++a->busy_##n##_port_refcounts[port]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *outside_addr = a->addr; \ + *outside_port = clib_host_to_net_u16 (port); \ + return 0; \ + } \ + port_offset = snat_random_port (0, port_per_thread - 1); \ + port = port_thread_offset + port_offset; \ + --attempts; \ + } \ + while (attempts > 0); \ + } \ + else if (a->fib_index == ~0) \ + { \ + ga = a; \ + } \ + break; + + foreach_nat_protocol; + default: + nat_elog_info (sm, "unknown protocol"); + return 1; + } + } + + if (ga) + { + /* fake fib_index to reuse macro */ + rx_fib_index = ~0; + a = ga; + switch (nat_proto) + { + foreach_nat_protocol; + default: + nat_elog_info (sm, "unknown protocol"); + return 1; + } + } + +#undef _ + + /* Totally out of translations to use... */ + nat_ipfix_logging_addresses_exhausted (thread_index, 0); + return 1; +} + +static_always_inline u32 +nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr) +{ + fib_node_index_t fei = FIB_NODE_INDEX_INVALID; + nat_outside_fib_t *outside_fib; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = 32, + .fp_addr = {.ip4.as_u32 = addr.as_u32,} + , + }; + // TODO: multiple vrfs none can resolve addr + vec_foreach (outside_fib, sm->outside_fibs) + { + fei = fib_table_lookup (outside_fib->fib_index, &pfx); + if (FIB_NODE_INDEX_INVALID != fei) + { + if (fib_entry_get_resolving_interface (fei) != ~0) + { + return outside_fib->fib_index; + } + } + } + return ~0; +} + +static_always_inline int +nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr, + u16 match_port, nat_protocol_t match_protocol, + u32 match_fib_index, ip4_address_t *daddr, + u16 *dport) +{ + clib_bihash_kv_8_8_t kv, value; + init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + { + /* Try address only mapping */ + init_nat_k (&kv, match_addr, 0, 0, 0); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, + &value)) + return 0; + } + + snat_static_mapping_t *m = + pool_elt_at_index (sm->static_mappings, value.value); + *daddr = m->local_addr; + if (dport) + { + /* Address only mapping doesn't change port */ + *dport = is_addr_only_static_mapping (m) ? match_port : m->local_port; + } + return 1; +} + +static u32 +slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, + ip4_address_t r_addr, u16 l_port, u16 r_port, u8 proto, + u32 rx_fib_index, snat_session_t **sessionp, + vlib_node_runtime_t *node, u32 next, u32 thread_index, f64 now) +{ + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + ip4_address_t outside_addr; + u16 outside_port; + u32 outside_fib_index; + u8 is_identity_nat; + + u32 nat_proto = ip_proto_to_nat_proto (proto); + snat_session_t *s = NULL; + lb_nat_type_t lb = 0; + ip4_address_t daddr = r_addr; + u16 dport = r_port; + + if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP)) + { + if (PREDICT_FALSE + (!tcp_flags_is_init + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_NON_SYN]; + return NAT_NEXT_DROP; + } + } + + if (PREDICT_FALSE + (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index))) + { + if (!nat_lru_free_one (sm, thread_index, now)) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_ipfix_logging_max_sessions (thread_index, + sm->max_translations_per_thread); + nat_elog_notice (sm, "maximum sessions exceeded"); + return NAT_NEXT_DROP; + } + } + + outside_fib_index = sm->outside_fib_index; + + switch (vec_len (sm->outside_fibs)) + { + case 0: + outside_fib_index = sm->outside_fib_index; + break; + case 1: + outside_fib_index = sm->outside_fibs[0].fib_index; + break; + default: + outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr); + break; + } + + ip4_address_t sm_addr; + u16 sm_port; + u32 sm_fib_index; + /* First try to match static mapping by local address and port */ + int is_sm; + if (snat_static_mapping_match (sm, l_addr, l_port, rx_fib_index, nat_proto, + &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0, + &lb, 0, &is_identity_nat, 0)) + { + is_sm = 0; + } + else + { + is_sm = 1; + } + + if (PREDICT_FALSE (is_sm && is_identity_nat)) + { + *sessionp = NULL; + return next; + } + + s = nat_ed_session_alloc (sm, thread_index, now, proto); + ASSERT (s); + + if (!is_sm) + { + s->in2out.addr = l_addr; + s->in2out.port = l_port; + s->nat_proto = nat_proto; + s->in2out.fib_index = rx_fib_index; + s->out2in.fib_index = outside_fib_index; + + // suggest using local port to allocation function + outside_port = l_port; + + // hairpinning? + int is_hairpinning = nat44_ed_external_sm_lookup ( + sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport); + s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; + + // destination addr/port updated with real values in + // nat_ed_alloc_addr_and_port + nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0, + s->out2in.fib_index, proto); + nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port); + } + else + { + nat_6t_flow_dport_rewrite_set (&s->o2i, l_port); + } + nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); + + if (nat_ed_alloc_addr_and_port ( + sm, rx_fib_index, nat_proto, thread_index, daddr, dport, proto, + sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr, + &outside_port)) + { + nat_elog_notice (sm, "addresses exhausted"); + b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS]; + nat_ed_session_delete (sm, s, thread_index, 1); + return NAT_NEXT_DROP; + } + s->out2in.addr = outside_addr; + s->out2in.port = outside_port; + } + else + { + // static mapping + s->out2in.addr = outside_addr = sm_addr; + s->out2in.port = outside_port = sm_port; + s->in2out.addr = l_addr; + s->in2out.port = l_port; + s->nat_proto = nat_proto; + s->in2out.fib_index = rx_fib_index; + s->out2in.fib_index = outside_fib_index; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + + // hairpinning? + int is_hairpinning = nat44_ed_external_sm_lookup ( + sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport); + s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; + + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr, + sm_port, s->out2in.fib_index, proto); + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port); + } + else + { + nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr, + sm_port, s->out2in.fib_index, proto); + nat_6t_flow_dport_rewrite_set (&s->o2i, l_port); + } + nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2)) + { + nat_elog_notice (sm, "out2in key add failed"); + goto error; + } + } + + if (lb) + s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING; + s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; + s->ext_host_addr = r_addr; + s->ext_host_port = r_port; + + nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port, + rx_fib_index, proto); + nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32); + nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port); + } + else + { + nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port); + nat_6t_flow_dport_rewrite_set (&s->i2o, dport); + } + nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); + + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice (sm, "in2out key add failed"); + goto error; + } + + /* log NAT event */ + nat_ipfix_logging_nat44_ses_create (thread_index, + s->in2out.addr.as_u32, + s->out2in.addr.as_u32, + s->nat_proto, + s->in2out.port, + s->out2in.port, s->in2out.fib_index); + + nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr, + s->in2out.port, &s->ext_host_nat_addr, + s->ext_host_nat_port, &s->out2in.addr, s->out2in.port, + &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0); + + per_vrf_sessions_register_session (s, thread_index); + + *sessionp = s; + return next; +error: + if (s) + { + if (!is_sm) + { + snat_free_outside_address_and_port (sm->addresses, thread_index, + &outside_addr, outside_port, + nat_proto); + } + nat_ed_session_delete (sm, s, thread_index, 1); + } + *sessionp = s = NULL; + return NAT_NEXT_DROP; +} + +static_always_inline int +nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node, + u32 sw_if_index, ip4_header_t * ip, u32 proto, + u32 rx_fib_index, u32 thread_index) +{ + udp_header_t *udp = ip4_next_header (ip); + clib_bihash_kv_16_8_t kv, value; + + init_ed_k (&kv, ip->dst_address, udp->dst_port, ip->src_address, + udp->src_port, sm->outside_fib_index, ip->protocol); + + /* NAT packet aimed at external address if has active sessions */ + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) + { + /* or is static mappings */ + ip4_address_t placeholder_addr; + u16 placeholder_port; + u32 placeholder_fib_index; + if (!snat_static_mapping_match + (sm, ip->dst_address, udp->dst_port, sm->outside_fib_index, proto, + &placeholder_addr, &placeholder_port, &placeholder_fib_index, 1, 0, + 0, 0, 0, 0, 0)) + return 0; + } + else + return 0; + + if (sm->forwarding_enabled) + return 1; + + return snat_not_translate_fast (sm, node, sw_if_index, ip, proto, + rx_fib_index); +} + +static_always_inline int +nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, + u32 thread_index, f64 now, + vlib_main_t * vm, vlib_buffer_t * b) +{ + clib_bihash_kv_16_8_t kv, value; + snat_session_t *s = 0; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + + if (!sm->forwarding_enabled) + return 0; + + if (ip->protocol == IP_PROTOCOL_ICMP) + { + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, + &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol)) + return 0; + init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport, + 0, lookup_protocol); + } + else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP) + { + init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port, + ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0, + ip->protocol); + } + else + { + init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0, + ip->protocol); + } + + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) + { + ASSERT (thread_index == ed_value_get_thread_index (&value)); + s = + pool_elt_at_index (tsm->sessions, + ed_value_get_session_index (&value)); + + if (is_fwd_bypass_session (s)) + { + if (ip->protocol == IP_PROTOCOL_TCP) + { + nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index); + } + /* Accounting */ + nat44_session_update_counters (s, now, + vlib_buffer_length_in_chain (vm, b), + thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s, thread_index); + return 1; + } + else + return 0; + } + + return 0; +} + +static_always_inline int +nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, + u16 src_port, u16 dst_port, + u32 thread_index, u32 rx_sw_if_index, + u32 tx_sw_if_index, f64 now) +{ + clib_bihash_kv_16_8_t kv, value; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + snat_interface_t *i; + snat_session_t *s; + u32 rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (rx_sw_if_index); + u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index); + + /* src NAT check */ + init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port, + tx_fib_index, ip->protocol); + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) + { + ASSERT (thread_index == ed_value_get_thread_index (&value)); + s = + pool_elt_at_index (tsm->sessions, + ed_value_get_session_index (&value)); + if (nat44_is_ses_closed (s) + && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp)) + { + nat_free_session_data (sm, s, thread_index, 0); + nat_ed_session_delete (sm, s, thread_index, 1); + } + return 1; + } + + /* dst NAT check */ + init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port, + rx_fib_index, ip->protocol); + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) + { + ASSERT (thread_index == ed_value_get_thread_index (&value)); + s = + pool_elt_at_index (tsm->sessions, + ed_value_get_session_index (&value)); + + if (is_fwd_bypass_session (s)) + return 0; + + /* hairpinning */ + pool_foreach (i, sm->output_feature_interfaces) + { + if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index)) + return 0; + } + return 1; + } + + return 0; +} + +static inline u32 +icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + icmp46_header_t *icmp, u32 sw_if_index, + u32 rx_fib_index, vlib_node_runtime_t *node, + u32 next, f64 now, u32 thread_index, + nat_protocol_t nat_proto, snat_session_t **s_p) +{ + vlib_main_t *vm = vlib_get_main (); + u16 checksum; + int err; + snat_session_t *s = NULL; + u8 lookup_protocol = ip->protocol; + u16 lookup_sport, lookup_dport; + ip4_address_t lookup_saddr, lookup_daddr; + + err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, + &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol); + if (err != 0) + { + b->error = node->errors[err]; + return NAT_NEXT_DROP; + } + + if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0) + { + if (PREDICT_FALSE (nat44_ed_not_translate_output_feature ( + sm, ip, lookup_sport, lookup_dport, thread_index, sw_if_index, + vnet_buffer (b)->sw_if_index[VLIB_TX], now))) + { + return next; + } + } + else + { + if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index, ip, + NAT_PROTOCOL_ICMP, + rx_fib_index, thread_index))) + { + return next; + } + } + + if (PREDICT_FALSE (icmp_type_is_error_message ( + vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; + return NAT_NEXT_DROP; + } + + next = slow_path_ed (sm, b, ip->src_address, ip->dst_address, lookup_sport, + lookup_dport, ip->protocol, rx_fib_index, &s, node, + next, thread_index, vlib_time_now (vm)); + + if (NAT_NEXT_DROP == next) + goto out; + + if (PREDICT_TRUE (!ip4_is_fragment (ip))) + { + ip_csum_t sum = ip_incremental_checksum_buffer ( + vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b), + ntohs (ip->length) - ip4_header_bytes (ip), 0); + checksum = ~ip_csum_fold (sum); + if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff)) + { + next = NAT_NEXT_DROP; + goto out; + } + } + +out: + if (PREDICT_TRUE (next != NAT_NEXT_DROP && s)) + { + /* Accounting */ + nat44_session_update_counters ( + s, now, vlib_buffer_length_in_chain (vm, b), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s, thread_index); + } + *s_p = s; + return next; +} + +static snat_session_t * +nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, + ip4_header_t *ip, u32 rx_fib_index, + u32 thread_index, f64 now, + vlib_main_t *vm, + vlib_node_runtime_t *node) +{ + clib_bihash_kv_8_8_t kv, value; + clib_bihash_kv_16_8_t s_kv, s_value; + snat_static_mapping_t *m = NULL; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + snat_session_t *s = NULL; + u32 outside_fib_index = sm->outside_fib_index; + int i; + ip4_address_t new_src_addr = { 0 }; + ip4_address_t new_dst_addr = ip->dst_address; + + if (PREDICT_FALSE ( + nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index))) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_ipfix_logging_max_sessions (thread_index, + sm->max_translations_per_thread); + nat_elog_notice (sm, "maximum sessions exceeded"); + return 0; + } + + switch (vec_len (sm->outside_fibs)) + { + case 0: + outside_fib_index = sm->outside_fib_index; + break; + case 1: + outside_fib_index = sm->outside_fibs[0].fib_index; + break; + default: + outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address); + break; + } + + init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0); + + /* Try to find static mapping first */ + if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value)) + { + m = pool_elt_at_index (sm->static_mappings, value.value); + new_src_addr = m->external_addr; + } + else + { + pool_foreach (s, tsm->sessions) + { + if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32) + { + init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0, + outside_fib_index, ip->protocol); + if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value)) + { + new_src_addr = s->out2in.addr; + } + break; + } + } + + if (!new_src_addr.as_u32) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0, + outside_fib_index, ip->protocol); + if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value)) + { + new_src_addr = sm->addresses[i].addr; + } + } + } + } + + if (!new_src_addr.as_u32) + { + // could not allocate address for translation ... + return 0; + } + + s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); + if (!s) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_elog_warn (sm, "create NAT session failed"); + return 0; + } + + nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0, + ip->dst_address, 0, rx_fib_index, ip->protocol); + nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); + + // hairpinning? + int is_hairpinning = + nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER, + outside_fib_index, &new_dst_addr, NULL); + s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; + + nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); + + nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0, + outside_fib_index, ip->protocol); + nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32); + nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); + + s->ext_host_addr.as_u32 = ip->dst_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; + s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; + s->out2in.addr.as_u32 = new_src_addr.as_u32; + s->out2in.fib_index = outside_fib_index; + s->in2out.addr.as_u32 = ip->src_address.as_u32; + s->in2out.fib_index = rx_fib_index; + s->in2out.port = s->out2in.port = ip->protocol; + if (m) + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice (sm, "in2out flow hash add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; + } + + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice (sm, "out2in flow hash add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; + } + + per_vrf_sessions_register_session (s, thread_index); + + /* Accounting */ + nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b), + thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s, thread_index); + + return s; +} + +static inline uword +nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int is_output_feature) +{ + u32 n_left_from, *from; + snat_main_t *sm = &snat_main; + f64 now = vlib_time_now (vm); + u32 thread_index = vm->thread_index; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + u32 def_slow = is_output_feature ? NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH + : NAT_NEXT_IN2OUT_ED_SLOW_PATH; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next = nexts; + vlib_get_buffers (vm, from, b, n_left_from); + + while (n_left_from > 0) + { + vlib_buffer_t *b0; + u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0; + nat_protocol_t proto0; + ip4_header_t *ip0; + snat_session_t *s0 = 0; + clib_bihash_kv_16_8_t kv0, value0; + nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS; + nat_6t_flow_t *f = 0; + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + int lookup_skipped = 0; + + b0 = *b; + b++; + + /* Prefetch next iteration. */ + if (PREDICT_TRUE (n_left_from >= 2)) + { + vlib_buffer_t *p2; + + p2 = *b; + + vlib_prefetch_buffer_header (p2, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + if (is_output_feature) + { + iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length; + } + + next[0] = vnet_buffer2 (b0)->nat.arc_next; + + ip0 = + (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0); + + if (PREDICT_FALSE (ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next[0] = NAT_NEXT_ICMP_ERROR; + goto trace0; + } + + proto0 = ip_proto_to_nat_proto (ip0->protocol); + + if (is_output_feature) + { + if (PREDICT_FALSE + (nat_not_translate_output_feature_fwd + (sm, ip0, thread_index, now, vm, b0))) + goto trace0; + } + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + { + if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request && + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply && + !icmp_type_is_error_message ( + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)) + { + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + int err = nat_get_icmp_session_lookup_values ( + b0, ip0, &lookup_saddr, &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol); + if (err != 0) + { + b0->error = node->errors[err]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + } + else + { + lookup_protocol = ip0->protocol; + lookup_saddr = ip0->src_address; + lookup_daddr = ip0->dst_address; + lookup_sport = vnet_buffer (b0)->ip.reass.l4_src_port; + lookup_dport = vnet_buffer (b0)->ip.reass.l4_dst_port; + } + + /* there might be a stashed index in vnet_buffer2 from handoff or + * classify node, see if it can be used */ + if (!pool_is_free_index (tsm->sessions, + vnet_buffer2 (b0)->nat.cached_session_index)) + { + s0 = pool_elt_at_index (tsm->sessions, + vnet_buffer2 (b0)->nat.cached_session_index); + if (PREDICT_TRUE ( + nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0) + // for some hairpinning cases there are two "i2i" flows instead + // of i2o and o2i as both hosts are on inside + || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING && + nat_6t_flow_match ( + &s0->o2i, b0, lookup_saddr, lookup_sport, lookup_daddr, + lookup_dport, lookup_protocol, rx_fib_index0)))) + { + /* yes, this is the droid we're looking for */ + lookup_skipped = 1; + goto skip_lookup; + } + s0 = NULL; + } + + init_ed_k (&kv0, ip0->src_address, lookup_sport, ip0->dst_address, + lookup_dport, rx_fib_index0, lookup_protocol); + + // lookup flow + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) + { + // flow does not exist go slow path + next[0] = def_slow; + goto trace0; + } + + ASSERT (thread_index == ed_value_get_thread_index (&value0)); + s0 = + pool_elt_at_index (tsm->sessions, + ed_value_get_session_index (&value0)); + + skip_lookup: + + if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index))) + { + // session is closed, go slow path + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; + goto trace0; + } + + if (s0->tcp_closed_timestamp) + { + if (now >= s0->tcp_closed_timestamp) + { + // session is closed, go slow path, freed in slow path + next[0] = def_slow; + } + else + { + // session in transitory timeout, drop + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED]; + next[0] = NAT_NEXT_DROP; + } + goto trace0; + } + + // drop if session expired + u64 sess_timeout_time; + sess_timeout_time = + s0->last_heard + (f64) nat44_session_get_timeout (sm, s0); + if (now >= sess_timeout_time) + { + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + // session is closed, go slow path + next[0] = def_slow; + goto trace0; + } + + b0->flags |= VNET_BUFFER_F_IS_NATED; + + if (nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) + { + f = &s0->i2o; + } + else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING && + nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) + { + f = &s0->o2i; + } + else + { + translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH; + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + goto trace0; + } + + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, f, proto0, is_output_feature))) + { + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + goto trace0; + } + + switch (proto0) + { + case NAT_PROTOCOL_TCP: + vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp, + thread_index, sw_if_index0, 1); + nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); + break; + case NAT_PROTOCOL_UDP: + vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp, + thread_index, sw_if_index0, 1); + break; + case NAT_PROTOCOL_ICMP: + vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp, + thread_index, sw_if_index0, 1); + break; + case NAT_PROTOCOL_OTHER: + vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other, + thread_index, sw_if_index0, 1); + break; + } + + /* Accounting */ + nat44_session_update_counters (s0, now, + vlib_buffer_length_in_chain (vm, b0), + thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s0, thread_index); + + trace0: + if (PREDICT_FALSE + ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat_in2out_ed_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next[0]; + t->is_slow_path = 0; + t->translation_error = translation_error; + t->lookup_skipped = lookup_skipped; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); + + if (s0) + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->translation_via_i2of = (&s0->i2o == f); + } + else + { + t->session_index = ~0; + } + } + + if (next[0] == NAT_NEXT_DROP) + { + vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops, + thread_index, sw_if_index0, 1); + } + + n_left_from--; + next++; + } + + vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts, + frame->n_vectors); + return frame->n_vectors; +} + +static inline uword +nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int is_output_feature) +{ + u32 n_left_from, *from; + snat_main_t *sm = &snat_main; + f64 now = vlib_time_now (vm); + u32 thread_index = vm->thread_index; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next = nexts; + vlib_get_buffers (vm, from, b, n_left_from); + + while (n_left_from > 0) + { + vlib_buffer_t *b0; + u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0; + nat_protocol_t proto0; + ip4_header_t *ip0; + udp_header_t *udp0; + icmp46_header_t *icmp0; + snat_session_t *s0 = 0; + clib_bihash_kv_16_8_t kv0, value0; + int translation_error = NAT_ED_TRNSL_ERR_SUCCESS; + + b0 = *b; + + if (is_output_feature) + iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length; + + next[0] = vnet_buffer2 (b0)->nat.arc_next; + + ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + + iph_offset0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0); + + if (PREDICT_FALSE (ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next[0] = NAT_NEXT_ICMP_ERROR; + goto trace0; + } + + udp0 = ip4_next_header (ip0); + icmp0 = (icmp46_header_t *) udp0; + proto0 = ip_proto_to_nat_proto (ip0->protocol); + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + { + s0 = nat44_ed_in2out_slowpath_unknown_proto ( + sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node); + if (!s0) + next[0] = NAT_NEXT_DROP; + + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + { + goto trace0; + } + + vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other, + thread_index, sw_if_index0, 1); + goto trace0; + } + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + { + next[0] = icmp_in2out_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next[0], + now, thread_index, proto0, &s0); + if (NAT_NEXT_DROP != next[0] && s0 && + NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + { + goto trace0; + } + + vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp, + thread_index, sw_if_index0, 1); + goto trace0; + } + + init_ed_k (&kv0, ip0->src_address, + vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, + ip0->protocol); + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) + { + ASSERT (thread_index == ed_value_get_thread_index (&value0)); + s0 = + pool_elt_at_index (tsm->sessions, + ed_value_get_session_index (&value0)); + + if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp) + { + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + s0 = NULL; + } + } + + if (!s0) + { + if (is_output_feature) + { + if (PREDICT_FALSE + (nat44_ed_not_translate_output_feature + (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index, + sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX], + now))) + goto trace0; + + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (PREDICT_FALSE + (proto0 == NAT_PROTOCOL_UDP + && (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) + && ip0->dst_address.as_u32 == 0xffffffff)) + goto trace0; + } + else + { + if (PREDICT_FALSE + (nat44_ed_not_translate + (sm, node, sw_if_index0, ip0, proto0, rx_fib_index0, + thread_index))) + goto trace0; + } + + next[0] = + slow_path_ed (sm, b0, ip0->src_address, ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, + ip0->protocol, rx_fib_index0, &s0, node, next[0], + thread_index, now); + + if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP)) + goto trace0; + + if (PREDICT_FALSE (!s0)) + goto trace0; + + } + + b0->flags |= VNET_BUFFER_F_IS_NATED; + + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + { + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + s0 = NULL; + goto trace0; + } + + if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + { + vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp, + thread_index, sw_if_index0, 1); + nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); + } + else + { + vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp, + thread_index, sw_if_index0, 1); + } + + /* Accounting */ + nat44_session_update_counters (s0, now, + vlib_buffer_length_in_chain + (vm, b0), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s0, thread_index); + + trace0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat_in2out_ed_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next[0]; + t->is_slow_path = 1; + t->translation_error = translation_error; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); + + if (s0) + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->translation_via_i2of = 1; + } + + else + { + t->session_index = ~0; + } + } + + if (next[0] == NAT_NEXT_DROP) + { + vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops, + thread_index, sw_if_index0, 1); + } + + n_left_from--; + next++; + b++; + } + + vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts, + frame->n_vectors); + + return frame->n_vectors; +} + +VLIB_NODE_FN (nat44_ed_in2out_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 0); +} + +VLIB_REGISTER_NODE (nat44_ed_in2out_node) = { + .name = "nat44-ed-in2out", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat_in2out_ed_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings), + .error_strings = nat_in2out_ed_error_strings, + .runtime_data_bytes = sizeof (snat_runtime_t), +}; + +VLIB_NODE_FN (nat44_ed_in2out_output_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return nat44_ed_in2out_fast_path_node_fn_inline (vm, node, frame, 1); +} + +VLIB_REGISTER_NODE (nat44_ed_in2out_output_node) = { + .name = "nat44-ed-in2out-output", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat_in2out_ed_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings), + .error_strings = nat_in2out_ed_error_strings, + .runtime_data_bytes = sizeof (snat_runtime_t), +}; + +VLIB_NODE_FN (nat44_ed_in2out_slowpath_node) (vlib_main_t * vm, + vlib_node_runtime_t * + node, vlib_frame_t * frame) +{ + return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 0); +} + +VLIB_REGISTER_NODE (nat44_ed_in2out_slowpath_node) = { + .name = "nat44-ed-in2out-slowpath", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat_in2out_ed_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings), + .error_strings = nat_in2out_ed_error_strings, + .runtime_data_bytes = sizeof (snat_runtime_t), +}; + +VLIB_NODE_FN (nat44_ed_in2out_output_slowpath_node) (vlib_main_t * vm, + vlib_node_runtime_t + * node, + vlib_frame_t * frame) +{ + return nat44_ed_in2out_slow_path_node_fn_inline (vm, node, frame, 1); +} + +VLIB_REGISTER_NODE (nat44_ed_in2out_output_slowpath_node) = { + .name = "nat44-ed-in2out-output-slowpath", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat_in2out_ed_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (nat_in2out_ed_error_strings), + .error_strings = nat_in2out_ed_error_strings, + .runtime_data_bytes = sizeof (snat_runtime_t), +}; + +static u8 * +format_nat_pre_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *); + return format (s, "in2out next_index %d arc_next_index %d", t->next_index, + t->arc_next_index); +} + +VLIB_NODE_FN (nat_pre_in2out_node) + (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return nat_pre_node_fn_inline (vm, node, frame, + NAT_NEXT_IN2OUT_ED_FAST_PATH); +} + +VLIB_NODE_FN (nat_pre_in2out_output_node) + (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return nat_pre_node_fn_inline (vm, node, frame, + NAT_NEXT_IN2OUT_ED_OUTPUT_FAST_PATH); +} + +VLIB_REGISTER_NODE (nat_pre_in2out_node) = { + .name = "nat-pre-in2out", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat_pre_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, +}; + +VLIB_REGISTER_NODE (nat_pre_in2out_output_node) = { + .name = "nat-pre-in2out-output", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat_pre_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h new file mode 100644 index 00000000000..0623940cb6d --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h @@ -0,0 +1,869 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief The NAT inline functions + */ + +#ifndef __included_nat44_ed_inlines_h__ +#define __included_nat44_ed_inlines_h__ + +#include +#include +#include + +#include +#include + +always_inline u64 +calc_nat_key (ip4_address_t addr, u16 port, u32 fib_index, u8 proto) +{ + ASSERT (fib_index <= (1 << 14) - 1); + ASSERT (proto <= (1 << 3) - 1); + return (u64) addr.as_u32 << 32 | (u64) port << 16 | fib_index << 3 | + (proto & 0x7); +} + +always_inline void +split_nat_key (u64 key, ip4_address_t *addr, u16 *port, u32 *fib_index, + nat_protocol_t *proto) +{ + if (addr) + { + addr->as_u32 = key >> 32; + } + if (port) + { + *port = (key >> 16) & (u16) ~0; + } + if (fib_index) + { + *fib_index = key >> 3 & ((1 << 13) - 1); + } + if (proto) + { + *proto = key & 0x7; + } +} + +always_inline void +init_nat_k (clib_bihash_kv_8_8_t *kv, ip4_address_t addr, u16 port, + u32 fib_index, nat_protocol_t proto) +{ + kv->key = calc_nat_key (addr, port, fib_index, proto); + kv->value = ~0ULL; +} + +always_inline void +init_nat_kv (clib_bihash_kv_8_8_t *kv, ip4_address_t addr, u16 port, + u32 fib_index, nat_protocol_t proto, u32 thread_index, + u32 session_index) +{ + init_nat_k (kv, addr, port, fib_index, proto); + kv->value = (u64) thread_index << 32 | session_index; +} + +always_inline void +init_nat_i2o_k (clib_bihash_kv_8_8_t *kv, snat_session_t *s) +{ + return init_nat_k (kv, s->in2out.addr, s->in2out.port, s->in2out.fib_index, + s->nat_proto); +} + +always_inline void +init_nat_i2o_kv (clib_bihash_kv_8_8_t *kv, snat_session_t *s, u32 thread_index, + u32 session_index) +{ + init_nat_k (kv, s->in2out.addr, s->in2out.port, s->in2out.fib_index, + s->nat_proto); + kv->value = (u64) thread_index << 32 | session_index; +} + +always_inline void +init_nat_o2i_k (clib_bihash_kv_8_8_t *kv, snat_session_t *s) +{ + return init_nat_k (kv, s->out2in.addr, s->out2in.port, s->out2in.fib_index, + s->nat_proto); +} + +always_inline void +init_nat_o2i_kv (clib_bihash_kv_8_8_t *kv, snat_session_t *s, u32 thread_index, + u32 session_index) +{ + init_nat_k (kv, s->out2in.addr, s->out2in.port, s->out2in.fib_index, + s->nat_proto); + kv->value = (u64) thread_index << 32 | session_index; +} + +always_inline u32 +nat_value_get_thread_index (clib_bihash_kv_8_8_t *value) +{ + return value->value >> 32; +} + +always_inline u32 +nat_value_get_session_index (clib_bihash_kv_8_8_t *value) +{ + return value->value & ~(u32) 0; +} + +always_inline void +init_ed_k (clib_bihash_kv_16_8_t *kv, ip4_address_t l_addr, u16 l_port, + ip4_address_t r_addr, u16 r_port, u32 fib_index, u8 proto) +{ + kv->key[0] = (u64) r_addr.as_u32 << 32 | l_addr.as_u32; + kv->key[1] = + (u64) r_port << 48 | (u64) l_port << 32 | fib_index << 8 | proto; +} + +always_inline void +init_ed_kv (clib_bihash_kv_16_8_t *kv, ip4_address_t l_addr, u16 l_port, + ip4_address_t r_addr, u16 r_port, u32 fib_index, u8 proto, + u32 thread_index, u32 session_index) +{ + init_ed_k (kv, l_addr, l_port, r_addr, r_port, fib_index, proto); + kv->value = (u64) thread_index << 32 | session_index; +} + +always_inline u32 +ed_value_get_thread_index (clib_bihash_kv_16_8_t *value) +{ + return value->value >> 32; +} + +always_inline u32 +ed_value_get_session_index (clib_bihash_kv_16_8_t *value) +{ + return value->value & ~(u32) 0; +} + +always_inline void +split_ed_kv (clib_bihash_kv_16_8_t *kv, ip4_address_t *l_addr, + ip4_address_t *r_addr, u8 *proto, u32 *fib_index, u16 *l_port, + u16 *r_port) +{ + if (l_addr) + { + l_addr->as_u32 = kv->key[0] & (u32) ~0; + } + if (r_addr) + { + r_addr->as_u32 = kv->key[0] >> 32; + } + if (r_port) + { + *r_port = kv->key[1] >> 48; + } + if (l_port) + { + *l_port = (kv->key[1] >> 32) & (u16) ~0; + } + if (fib_index) + { + *fib_index = (kv->key[1] >> 8) & ((1 << 24) - 1); + } + if (proto) + { + *proto = kv->key[1] & (u8) ~0; + } +} + +static_always_inline int +nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0, + ip4_address_t *lookup_saddr, + u16 *lookup_sport, + ip4_address_t *lookup_daddr, + u16 *lookup_dport, u8 *lookup_protocol) +{ + icmp46_header_t *icmp0; + icmp_echo_header_t *echo0, *inner_echo0 = 0; + ip4_header_t *inner_ip0 = 0; + void *l4_header = 0; + icmp46_header_t *inner_icmp0; + + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); + echo0 = (icmp_echo_header_t *) (icmp0 + 1); + + // avoid warning about unused variables in caller by setting to bogus values + *lookup_sport = 0; + *lookup_dport = 0; + + if (!icmp_type_is_error_message ( + vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) + { + *lookup_protocol = IP_PROTOCOL_ICMP; + lookup_saddr->as_u32 = ip0->src_address.as_u32; + *lookup_sport = vnet_buffer (b)->ip.reass.l4_src_port; + lookup_daddr->as_u32 = ip0->dst_address.as_u32; + *lookup_dport = vnet_buffer (b)->ip.reass.l4_dst_port; + } + else + { + inner_ip0 = (ip4_header_t *) (echo0 + 1); + l4_header = ip4_next_header (inner_ip0); + *lookup_protocol = inner_ip0->protocol; + lookup_saddr->as_u32 = inner_ip0->dst_address.as_u32; + lookup_daddr->as_u32 = inner_ip0->src_address.as_u32; + switch (ip_proto_to_nat_proto (inner_ip0->protocol)) + { + case NAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t *) l4_header; + inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); + *lookup_sport = inner_echo0->identifier; + *lookup_dport = inner_echo0->identifier; + break; + case NAT_PROTOCOL_UDP: + case NAT_PROTOCOL_TCP: + *lookup_sport = ((tcp_udp_header_t *) l4_header)->dst_port; + *lookup_dport = ((tcp_udp_header_t *) l4_header)->src_port; + break; + default: + return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL; + } + } + return 0; +} + +always_inline u32 +nat44_session_get_timeout (snat_main_t *sm, snat_session_t *s) +{ + switch (s->nat_proto) + { + case NAT_PROTOCOL_ICMP: + return sm->timeouts.icmp; + case NAT_PROTOCOL_UDP: + return sm->timeouts.udp; + case NAT_PROTOCOL_TCP: + { + if (s->state) + return sm->timeouts.tcp.transitory; + else + return sm->timeouts.tcp.established; + } + default: + return sm->timeouts.udp; + } + + return 0; +} + +static_always_inline u8 +nat44_ed_maximum_sessions_exceeded (snat_main_t *sm, u32 fib_index, + u32 thread_index) +{ + u32 translations; + translations = pool_elts (sm->per_thread_data[thread_index].sessions); + if (vec_len (sm->max_translations_per_fib) <= fib_index) + fib_index = 0; + return translations >= sm->max_translations_per_fib[fib_index]; +} + +static_always_inline int +nat_ed_lru_insert (snat_main_per_thread_data_t *tsm, snat_session_t *s, + f64 now, u8 proto) +{ + dlist_elt_t *lru_list_elt; + pool_get (tsm->lru_pool, lru_list_elt); + s->lru_index = lru_list_elt - tsm->lru_pool; + switch (proto) + { + case IP_PROTOCOL_UDP: + s->lru_head_index = tsm->udp_lru_head_index; + break; + case IP_PROTOCOL_TCP: + s->lru_head_index = tsm->tcp_trans_lru_head_index; + break; + case IP_PROTOCOL_ICMP: + s->lru_head_index = tsm->icmp_lru_head_index; + break; + default: + s->lru_head_index = tsm->unk_proto_lru_head_index; + break; + } + clib_dlist_addtail (tsm->lru_pool, s->lru_head_index, s->lru_index); + lru_list_elt->value = s - tsm->sessions; + s->last_lru_update = now; + return 1; +} + +static_always_inline void +nat_6t_flow_to_ed_k (clib_bihash_kv_16_8_t *kv, nat_6t_flow_t *f) +{ + init_ed_k (kv, f->match.saddr, f->match.sport, f->match.daddr, + f->match.dport, f->match.fib_index, f->match.proto); +} + +static_always_inline void +nat_6t_flow_to_ed_kv (clib_bihash_kv_16_8_t *kv, nat_6t_flow_t *f, + u32 thread_idx, u32 session_idx) +{ + init_ed_kv (kv, f->match.saddr, f->match.sport, f->match.daddr, + f->match.dport, f->match.fib_index, f->match.proto, thread_idx, + session_idx); +} + +static_always_inline int +nat_ed_ses_i2o_flow_hash_add_del (snat_main_t *sm, u32 thread_idx, + snat_session_t *s, int is_add) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_idx); + clib_bihash_kv_16_8_t kv; + if (0 == is_add) + { + nat_6t_flow_to_ed_k (&kv, &s->i2o); + } + else + { + nat_6t_flow_to_ed_kv (&kv, &s->i2o, thread_idx, s - tsm->sessions); + nat_6t_l3_l4_csum_calc (&s->i2o); + } + return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, is_add); +} + +static_always_inline int +nat_ed_ses_o2i_flow_hash_add_del (snat_main_t *sm, u32 thread_idx, + snat_session_t *s, int is_add) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_idx); + clib_bihash_kv_16_8_t kv; + if (0 == is_add) + { + nat_6t_flow_to_ed_k (&kv, &s->o2i); + } + else + { + nat_6t_flow_to_ed_kv (&kv, &s->o2i, thread_idx, s - tsm->sessions); + nat_6t_l3_l4_csum_calc (&s->o2i); + } + return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, is_add); +} + +always_inline void +nat_ed_session_delete (snat_main_t *sm, snat_session_t *ses, u32 thread_index, + int lru_delete + /* delete from global LRU list */) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_index); + + if (lru_delete) + { + clib_dlist_remove (tsm->lru_pool, ses->lru_index); + } + pool_put_index (tsm->lru_pool, ses->lru_index); + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, ses, 0)) + nat_elog_warn (sm, "flow hash del failed"); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, ses, 0)) + nat_elog_warn (sm, "flow hash del failed"); + pool_put (tsm->sessions, ses); + vlib_set_simple_counter (&sm->total_sessions, thread_index, 0, + pool_elts (tsm->sessions)); +} + +static_always_inline int +nat_lru_free_one_with_head (snat_main_t *sm, int thread_index, f64 now, + u32 head_index) +{ + snat_session_t *s = NULL; + dlist_elt_t *oldest_elt; + f64 sess_timeout_time; + u32 oldest_index; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + oldest_index = clib_dlist_remove_head (tsm->lru_pool, head_index); + if (~0 != oldest_index) + { + oldest_elt = pool_elt_at_index (tsm->lru_pool, oldest_index); + s = pool_elt_at_index (tsm->sessions, oldest_elt->value); + + sess_timeout_time = + s->last_heard + (f64) nat44_session_get_timeout (sm, s); + if (now >= sess_timeout_time || + (s->tcp_closed_timestamp && now >= s->tcp_closed_timestamp)) + { + nat_free_session_data (sm, s, thread_index, 0); + nat_ed_session_delete (sm, s, thread_index, 0); + return 1; + } + else + { + clib_dlist_addhead (tsm->lru_pool, head_index, oldest_index); + } + } + return 0; +} + +static_always_inline int +nat_lru_free_one (snat_main_t *sm, int thread_index, f64 now) +{ + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + int rc = 0; +#define _(p) \ + if ((rc = nat_lru_free_one_with_head (sm, thread_index, now, \ + tsm->p##_lru_head_index))) \ + { \ + return rc; \ + } + _ (tcp_trans); + _ (udp); + _ (unk_proto); + _ (icmp); + _ (tcp_estab); +#undef _ + return 0; +} + +static_always_inline snat_session_t * +nat_ed_session_alloc (snat_main_t *sm, u32 thread_index, f64 now, u8 proto) +{ + snat_session_t *s; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + + nat_lru_free_one (sm, thread_index, now); + + pool_get (tsm->sessions, s); + clib_memset (s, 0, sizeof (*s)); + + nat_ed_lru_insert (tsm, s, now, proto); + + s->ha_last_refreshed = now; + vlib_set_simple_counter (&sm->total_sessions, thread_index, 0, + pool_elts (tsm->sessions)); + return s; +} + +// slow path +static_always_inline void +per_vrf_sessions_cleanup (u32 thread_index) +{ + snat_main_t *sm = &snat_main; + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_index); + per_vrf_sessions_t *per_vrf_sessions; + u32 *to_free = 0, *i; + + vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec) + { + if (per_vrf_sessions->expired) + { + if (per_vrf_sessions->ses_count == 0) + { + vec_add1 (to_free, per_vrf_sessions - tsm->per_vrf_sessions_vec); + } + } + } + + if (vec_len (to_free)) + { + vec_foreach (i, to_free) + { + vec_del1 (tsm->per_vrf_sessions_vec, *i); + } + } + + vec_free (to_free); +} + +// slow path +static_always_inline void +per_vrf_sessions_register_session (snat_session_t *s, u32 thread_index) +{ + snat_main_t *sm = &snat_main; + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_index); + per_vrf_sessions_t *per_vrf_sessions; + + per_vrf_sessions_cleanup (thread_index); + + // s->per_vrf_sessions_index == ~0 ... reuse of old session + + vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec) + { + // ignore already expired registrations + if (per_vrf_sessions->expired) + continue; + + if ((s->in2out.fib_index == per_vrf_sessions->rx_fib_index) && + (s->out2in.fib_index == per_vrf_sessions->tx_fib_index)) + { + goto done; + } + if ((s->in2out.fib_index == per_vrf_sessions->tx_fib_index) && + (s->out2in.fib_index == per_vrf_sessions->rx_fib_index)) + { + goto done; + } + } + + // create a new registration + vec_add2 (tsm->per_vrf_sessions_vec, per_vrf_sessions, 1); + clib_memset (per_vrf_sessions, 0, sizeof (*per_vrf_sessions)); + + per_vrf_sessions->rx_fib_index = s->in2out.fib_index; + per_vrf_sessions->tx_fib_index = s->out2in.fib_index; + +done: + s->per_vrf_sessions_index = per_vrf_sessions - tsm->per_vrf_sessions_vec; + per_vrf_sessions->ses_count++; +} + +// fast path +static_always_inline void +per_vrf_sessions_unregister_session (snat_session_t *s, u32 thread_index) +{ + snat_main_t *sm = &snat_main; + snat_main_per_thread_data_t *tsm; + per_vrf_sessions_t *per_vrf_sessions; + + ASSERT (s->per_vrf_sessions_index != ~0); + + tsm = vec_elt_at_index (sm->per_thread_data, thread_index); + per_vrf_sessions = + vec_elt_at_index (tsm->per_vrf_sessions_vec, s->per_vrf_sessions_index); + + ASSERT (per_vrf_sessions->ses_count != 0); + + per_vrf_sessions->ses_count--; + s->per_vrf_sessions_index = ~0; +} + +// fast path +static_always_inline u8 +per_vrf_sessions_is_expired (snat_session_t *s, u32 thread_index) +{ + snat_main_t *sm = &snat_main; + snat_main_per_thread_data_t *tsm; + per_vrf_sessions_t *per_vrf_sessions; + + ASSERT (s->per_vrf_sessions_index != ~0); + + tsm = vec_elt_at_index (sm->per_thread_data, thread_index); + per_vrf_sessions = + vec_elt_at_index (tsm->per_vrf_sessions_vec, s->per_vrf_sessions_index); + return per_vrf_sessions->expired; +} + +static_always_inline void +nat_6t_flow_init (nat_6t_flow_t *f, u32 thread_idx, ip4_address_t saddr, + u16 sport, ip4_address_t daddr, u16 dport, u32 fib_index, + u8 proto, u32 session_idx) +{ + clib_memset (f, 0, sizeof (*f)); + f->match.saddr = saddr; + f->match.sport = sport; + f->match.daddr = daddr; + f->match.dport = dport; + f->match.proto = proto; + f->match.fib_index = fib_index; +} + +static_always_inline void +nat_6t_i2o_flow_init (snat_main_t *sm, u32 thread_idx, snat_session_t *s, + ip4_address_t saddr, u16 sport, ip4_address_t daddr, + u16 dport, u32 fib_index, u8 proto) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_idx); + nat_6t_flow_init (&s->i2o, thread_idx, saddr, sport, daddr, dport, fib_index, + proto, s - tsm->sessions); +} + +static_always_inline void +nat_6t_o2i_flow_init (snat_main_t *sm, u32 thread_idx, snat_session_t *s, + ip4_address_t saddr, u16 sport, ip4_address_t daddr, + u16 dport, u32 fib_index, u8 proto) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_idx); + nat_6t_flow_init (&s->o2i, thread_idx, saddr, sport, daddr, dport, fib_index, + proto, s - tsm->sessions); +} + +static_always_inline int +nat_6t_flow_match (nat_6t_flow_t *f, vlib_buffer_t *b, ip4_address_t saddr, + u16 sport, ip4_address_t daddr, u16 dport, u8 protocol, + u32 fib_index) +{ + return f->match.daddr.as_u32 == daddr.as_u32 && + f->match.dport == vnet_buffer (b)->ip.reass.l4_dst_port && + f->match.proto == protocol && f->match.fib_index == fib_index && + f->match.saddr.as_u32 == saddr.as_u32 && + f->match.sport == vnet_buffer (b)->ip.reass.l4_src_port; +} + +static inline uword +nat_pre_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, u32 def_next) +{ + u32 n_left_from, *from; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next = nexts; + vlib_get_buffers (vm, from, b, n_left_from); + + while (n_left_from >= 2) + { + u32 next0, next1; + u32 arc_next0, arc_next1; + vlib_buffer_t *b0, *b1; + + b0 = *b; + b++; + b1 = *b; + b++; + + /* Prefetch next iteration. */ + if (PREDICT_TRUE (n_left_from >= 4)) + { + vlib_buffer_t *p2, *p3; + + p2 = *b; + p3 = *(b + 1); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + next0 = def_next; + next1 = def_next; + + vnet_feature_next (&arc_next0, b0); + vnet_feature_next (&arc_next1, b1); + + vnet_buffer2 (b0)->nat.arc_next = arc_next0; + vnet_buffer2 (b1)->nat.arc_next = arc_next1; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + nat_pre_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->arc_next_index = arc_next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + nat_pre_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next1; + t->arc_next_index = arc_next1; + } + } + + n_left_from -= 2; + next[0] = next0; + next[1] = next1; + next += 2; + } + + while (n_left_from > 0) + { + u32 next0; + u32 arc_next0; + vlib_buffer_t *b0; + + b0 = *b; + b++; + + next0 = def_next; + vnet_feature_next (&arc_next0, b0); + vnet_buffer2 (b0)->nat.arc_next = arc_next0; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat_pre_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->arc_next_index = arc_next0; + } + + n_left_from--; + next[0] = next0; + next++; + } + vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts, + frame->n_vectors); + + return frame->n_vectors; +} + +static_always_inline u16 +snat_random_port (u16 min, u16 max) +{ + snat_main_t *sm = &snat_main; + u32 rwide; + u16 r; + + rwide = random_u32 (&sm->random_seed); + r = rwide & 0xFFFF; + if (r >= min && r <= max) + return r; + + return min + (rwide % (max - min + 1)); +} + +always_inline u8 +is_interface_addr (snat_main_t *sm, vlib_node_runtime_t *node, + u32 sw_if_index0, u32 ip4_addr) +{ + snat_runtime_t *rt = (snat_runtime_t *) node->runtime_data; + ip4_address_t *first_int_addr; + + if (PREDICT_FALSE (rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = ip4_interface_first_address ( + sm->ip4_main, sw_if_index0, 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + if (first_int_addr) + rt->cached_ip4_address = first_int_addr->as_u32; + else + rt->cached_ip4_address = 0; + } + + if (PREDICT_FALSE (ip4_addr == rt->cached_ip4_address)) + return 1; + else + return 0; +} + +always_inline void +nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses, + vlib_buffer_t *b, u32 thread_index) +{ + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + u8 tcp_flags = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags; + u32 tcp_ack_number = vnet_buffer (b)->ip.reass.tcp_ack_number; + u32 tcp_seq_number = vnet_buffer (b)->ip.reass.tcp_seq_number; + if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST)) + ses->state = NAT44_SES_RST; + if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST)) + ses->state = 0; + if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) && + (ses->state & NAT44_SES_O2I_SYN)) + ses->state = 0; + if (tcp_flags & TCP_FLAG_SYN) + ses->state |= NAT44_SES_I2O_SYN; + if (tcp_flags & TCP_FLAG_FIN) + { + ses->i2o_fin_seq = clib_net_to_host_u32 (tcp_seq_number); + ses->state |= NAT44_SES_I2O_FIN; + } + if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN)) + { + if (clib_net_to_host_u32 (tcp_ack_number) > ses->o2i_fin_seq) + { + ses->state |= NAT44_SES_O2I_FIN_ACK; + if (nat44_is_ses_closed (ses)) + { // if session is now closed, save the timestamp + ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory; + ses->last_lru_update = now; + } + } + } + + // move the session to proper LRU + if (ses->state) + { + ses->lru_head_index = tsm->tcp_trans_lru_head_index; + } + else + { + ses->lru_head_index = tsm->tcp_estab_lru_head_index; + } + clib_dlist_remove (tsm->lru_pool, ses->lru_index); + clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index); +} + +always_inline void +nat44_set_tcp_session_state_o2i (snat_main_t *sm, f64 now, snat_session_t *ses, + u8 tcp_flags, u32 tcp_ack_number, + u32 tcp_seq_number, u32 thread_index) +{ + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST)) + ses->state = NAT44_SES_RST; + if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST)) + ses->state = 0; + if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) && + (ses->state & NAT44_SES_O2I_SYN)) + ses->state = 0; + if (tcp_flags & TCP_FLAG_SYN) + ses->state |= NAT44_SES_O2I_SYN; + if (tcp_flags & TCP_FLAG_FIN) + { + ses->o2i_fin_seq = clib_net_to_host_u32 (tcp_seq_number); + ses->state |= NAT44_SES_O2I_FIN; + } + if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN)) + { + if (clib_net_to_host_u32 (tcp_ack_number) > ses->i2o_fin_seq) + ses->state |= NAT44_SES_I2O_FIN_ACK; + if (nat44_is_ses_closed (ses)) + { // if session is now closed, save the timestamp + ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory; + ses->last_lru_update = now; + } + } + // move the session to proper LRU + if (ses->state) + { + ses->lru_head_index = tsm->tcp_trans_lru_head_index; + } + else + { + ses->lru_head_index = tsm->tcp_estab_lru_head_index; + } + clib_dlist_remove (tsm->lru_pool, ses->lru_index); + clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index); +} + +always_inline void +nat44_session_update_counters (snat_session_t *s, f64 now, uword bytes, + u32 thread_index) +{ + s->last_heard = now; + s->total_pkts++; + s->total_bytes += bytes; +} + +/** \brief Per-user LRU list maintenance */ +always_inline void +nat44_session_update_lru (snat_main_t *sm, snat_session_t *s, u32 thread_index) +{ + /* don't update too often - timeout is in magnitude of seconds anyway */ + if (s->last_heard > s->last_lru_update + 1) + { + clib_dlist_remove (sm->per_thread_data[thread_index].lru_pool, + s->lru_index); + clib_dlist_addtail (sm->per_thread_data[thread_index].lru_pool, + s->lru_head_index, s->lru_index); + s->last_lru_update = s->last_heard; + } +} + +#endif /* __included_nat44_ed_inlines_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ed/nat44_ed_out2in.c b/src/plugins/nat/nat44-ed/nat44_ed_out2in.c new file mode 100644 index 00000000000..4d354d3e8ec --- /dev/null +++ b/src/plugins/nat/nat44-ed/nat44_ed_out2in.c @@ -0,0 +1,1443 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief NAT44 endpoint-dependent outside to inside network translation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +static char *nat_out2in_ed_error_strings[] = { +#define _(sym,string) string, + foreach_nat_out2in_ed_error +#undef _ +}; + +typedef struct +{ + u32 sw_if_index; + u32 next_index; + u32 session_index; + nat_translation_error_e translation_error; + nat_6t_flow_t i2of; + nat_6t_flow_t o2if; + clib_bihash_kv_16_8_t search_key; + u8 is_slow_path; + u8 translation_via_i2of; + u8 lookup_skipped; +} nat44_ed_out2in_trace_t; + +static u8 * +format_nat44_ed_out2in_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat44_ed_out2in_trace_t *t = va_arg (*args, nat44_ed_out2in_trace_t *); + char *tag; + + tag = + t->is_slow_path ? "NAT44_OUT2IN_ED_SLOW_PATH" : + "NAT44_OUT2IN_ED_FAST_PATH"; + + s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag, + t->sw_if_index, t->next_index, t->session_index); + if (~0 != t->session_index) + { + s = format (s, ", translation result '%U' via %s", + format_nat_ed_translation_error, t->translation_error, + t->translation_via_i2of ? "i2of" : "o2if"); + s = format (s, "\n i2of %U", format_nat_6t_flow, &t->i2of); + s = format (s, "\n o2if %U", format_nat_6t_flow, &t->o2if); + } + if (!t->is_slow_path) + { + if (t->lookup_skipped) + { + s = format (s, "\n lookup skipped - cached session index used"); + } + else + { + s = format (s, "\n search key %U", format_ed_session_kvp, + &t->search_key); + } + } + + return s; +} + +static int +next_src_nat (snat_main_t *sm, ip4_header_t *ip, u16 src_port, u16 dst_port, + u32 thread_index, u32 rx_fib_index) +{ + clib_bihash_kv_16_8_t kv, value; + + init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port, + rx_fib_index, ip->protocol); + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) + return 1; + + return 0; +} + +static void create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, + snat_session_t *s, ip4_header_t *ip, + u32 rx_fib_index, u32 thread_index); + +static snat_session_t *create_session_for_static_mapping_ed ( + snat_main_t *sm, vlib_buffer_t *b, ip4_address_t i2o_addr, u16 i2o_port, + u32 i2o_fib_index, ip4_address_t o2i_addr, u16 o2i_port, u32 o2i_fib_index, + nat_protocol_t nat_proto, vlib_node_runtime_t *node, u32 rx_fib_index, + u32 thread_index, twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now, + snat_static_mapping_t *mapping); + +static inline u32 +icmp_out2in_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + icmp46_header_t *icmp, u32 sw_if_index, + u32 rx_fib_index, vlib_node_runtime_t *node, + u32 next, f64 now, u32 thread_index, + snat_session_t **s_p) +{ + vlib_main_t *vm = vlib_get_main (); + + ip_csum_t sum; + u16 checksum; + + snat_session_t *s = 0; + u8 is_addr_only, identity_nat; + ip4_address_t sm_addr; + u16 sm_port; + u32 sm_fib_index; + snat_static_mapping_t *m; + u8 lookup_protocol; + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + + sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + + if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, &lookup_sport, + &lookup_daddr, &lookup_dport, + &lookup_protocol)) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_UNSUPPORTED_PROTOCOL]; + next = NAT_NEXT_DROP; + goto out; + } + + if (snat_static_mapping_match ( + sm, ip->dst_address, lookup_sport, rx_fib_index, + ip_proto_to_nat_proto (ip->protocol), &sm_addr, &sm_port, + &sm_fib_index, 1, &is_addr_only, 0, 0, 0, &identity_nat, &m)) + { + // static mapping not matched + if (!sm->forwarding_enabled) + { + /* Don't NAT packet aimed at the intfc address */ + if (!is_interface_addr (sm, node, sw_if_index, + ip->dst_address.as_u32)) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; + next = NAT_NEXT_DROP; + } + } + else + { + if (next_src_nat (sm, ip, lookup_sport, lookup_dport, thread_index, + rx_fib_index)) + { + next = NAT_NEXT_IN2OUT_ED_FAST_PATH; + } + else + { + create_bypass_for_fwd (sm, b, s, ip, rx_fib_index, thread_index); + } + } + goto out; + } + + if (PREDICT_FALSE (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply && + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request || + !is_addr_only))) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE]; + next = NAT_NEXT_DROP; + goto out; + } + + if (PREDICT_FALSE (identity_nat)) + { + goto out; + } + + /* Create session initiated by host from external network */ + s = create_session_for_static_mapping_ed ( + sm, b, sm_addr, sm_port, sm_fib_index, ip->dst_address, lookup_sport, + rx_fib_index, ip_proto_to_nat_proto (lookup_protocol), node, rx_fib_index, + thread_index, 0, 0, vlib_time_now (vm), m); + if (!s) + next = NAT_NEXT_DROP; + + if (PREDICT_TRUE (!ip4_is_fragment (ip))) + { + sum = ip_incremental_checksum_buffer ( + vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b), + ntohs (ip->length) - ip4_header_bytes (ip), 0); + checksum = ~ip_csum_fold (sum); + if (checksum != 0 && checksum != 0xffff) + { + next = NAT_NEXT_DROP; + goto out; + } + } + + if (PREDICT_TRUE (next != NAT_NEXT_DROP && s)) + { + /* Accounting */ + nat44_session_update_counters ( + s, now, vlib_buffer_length_in_chain (vm, b), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s, thread_index); + } +out: + if (NAT_NEXT_DROP == next && s) + { + nat_ed_session_delete (sm, s, thread_index, 1); + s = 0; + } + *s_p = s; + return next; +} + +// allocate exact address based on preference +static_always_inline int +nat_alloc_addr_and_port_exact (snat_address_t * a, + u32 thread_index, + nat_protocol_t proto, + ip4_address_t * addr, + u16 * port, + u16 port_per_thread, u32 snat_thread_index) +{ + snat_main_t *sm = &snat_main; + u32 portnum; + + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \ + { \ + while (1) \ + { \ + portnum = (port_per_thread * \ + snat_thread_index) + \ + snat_random_port(0, port_per_thread - 1) + 1024; \ + if (a->busy_##n##_port_refcounts[portnum]) \ + continue; \ + --a->busy_##n##_port_refcounts[portnum]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *addr = a->addr; \ + *port = clib_host_to_net_u16(portnum); \ + return 0; \ + } \ + } \ + break; + foreach_nat_protocol +#undef _ + default : nat_elog_info (sm, "unknown protocol"); + return 1; + } + + /* Totally out of translations to use... */ + nat_ipfix_logging_addresses_exhausted (thread_index, 0); + return 1; +} + +static_always_inline int +nat44_ed_alloc_outside_addr_and_port (snat_address_t *addresses, u32 fib_index, + u32 thread_index, nat_protocol_t proto, + ip4_address_t *addr, u16 *port, + u16 port_per_thread, + u32 snat_thread_index) +{ + snat_main_t *sm = &snat_main; + snat_address_t *a, *ga = 0; + u32 portnum; + int i; + + for (i = 0; i < vec_len (addresses); i++) + { + a = addresses + i; + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \ + { \ + if (a->fib_index == fib_index) \ + { \ + while (1) \ + { \ + portnum = (port_per_thread * snat_thread_index) + \ + snat_random_port (0, port_per_thread - 1) + 1024; \ + if (a->busy_##n##_port_refcounts[portnum]) \ + continue; \ + --a->busy_##n##_port_refcounts[portnum]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *addr = a->addr; \ + *port = clib_host_to_net_u16 (portnum); \ + return 0; \ + } \ + } \ + else if (a->fib_index == ~0) \ + { \ + ga = a; \ + } \ + } \ + break; + foreach_nat_protocol +#undef _ + default : nat_elog_info (sm, "unknown protocol"); + return 1; + } + } + + if (ga) + { + a = ga; + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + while (1) \ + { \ + portnum = (port_per_thread * snat_thread_index) + \ + snat_random_port (0, port_per_thread - 1) + 1024; \ + if (a->busy_##n##_port_refcounts[portnum]) \ + continue; \ + ++a->busy_##n##_port_refcounts[portnum]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *addr = a->addr; \ + *port = clib_host_to_net_u16 (portnum); \ + return 0; \ + } + break; + foreach_nat_protocol +#undef _ + default : nat_elog_info (sm, "unknown protocol"); + return 1; + } + } + + /* Totally out of translations to use... */ + nat_ipfix_logging_addresses_exhausted (thread_index, 0); + return 1; +} + +static snat_session_t * +create_session_for_static_mapping_ed ( + snat_main_t *sm, vlib_buffer_t *b, ip4_address_t i2o_addr, u16 i2o_port, + u32 i2o_fib_index, ip4_address_t o2i_addr, u16 o2i_port, u32 o2i_fib_index, + nat_protocol_t nat_proto, vlib_node_runtime_t *node, u32 rx_fib_index, + u32 thread_index, twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now, + snat_static_mapping_t *mapping) +{ + snat_session_t *s; + ip4_header_t *ip; + udp_header_t *udp; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + + if (PREDICT_FALSE + (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index))) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_elog_notice (sm, "maximum sessions exceeded"); + return 0; + } + + s = nat_ed_session_alloc (sm, thread_index, now, nat_proto); + if (!s) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_elog_warn (sm, "create NAT session failed"); + return 0; + } + + ip = vlib_buffer_get_current (b); + udp = ip4_next_header (ip); + + s->ext_host_addr.as_u32 = ip->src_address.as_u32; + s->ext_host_port = nat_proto == NAT_PROTOCOL_ICMP ? 0 : udp->src_port; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + if (lb_nat) + s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING; + if (lb_nat == AFFINITY_LB_NAT) + s->flags |= SNAT_SESSION_FLAG_AFFINITY; + s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; + s->out2in.addr = o2i_addr; + s->out2in.port = o2i_port; + s->out2in.fib_index = o2i_fib_index; + s->in2out.addr = i2o_addr; + s->in2out.port = i2o_port; + s->in2out.fib_index = i2o_fib_index; + s->nat_proto = nat_proto; + + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_o2i_flow_init (sm, thread_index, s, s->ext_host_addr, o2i_port, + o2i_addr, o2i_port, o2i_fib_index, ip->protocol); + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, i2o_port); + } + else + { + nat_6t_o2i_flow_init (sm, thread_index, s, s->ext_host_addr, + s->ext_host_port, o2i_addr, o2i_port, + o2i_fib_index, ip->protocol); + nat_6t_flow_dport_rewrite_set (&s->o2i, i2o_port); + } + nat_6t_flow_daddr_rewrite_set (&s->o2i, i2o_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, i2o_fib_index); + + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1)) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_HASH_ADD_FAILED]; + nat_ed_session_delete (sm, s, thread_index, 1); + nat_elog_warn (sm, "out2in flow hash add failed"); + return 0; + } + + if (twice_nat == TWICE_NAT || (twice_nat == TWICE_NAT_SELF && + ip->src_address.as_u32 == i2o_addr.as_u32)) + { + int rc = 0; + snat_address_t *filter = 0; + + // if exact address is specified use this address + if (is_exact_address (mapping)) + { + snat_address_t *ap; + vec_foreach (ap, sm->twice_nat_addresses) + { + if (mapping->pool_addr.as_u32 == ap->addr.as_u32) + { + filter = ap; + break; + } + } + } + + if (filter) + { + rc = nat_alloc_addr_and_port_exact (filter, + thread_index, + nat_proto, + &s->ext_host_nat_addr, + &s->ext_host_nat_port, + sm->port_per_thread, + tsm->snat_thread_index); + s->flags |= SNAT_SESSION_FLAG_EXACT_ADDRESS; + } + else + { + rc = nat44_ed_alloc_outside_addr_and_port ( + sm->twice_nat_addresses, 0, thread_index, nat_proto, + &s->ext_host_nat_addr, &s->ext_host_nat_port, sm->port_per_thread, + tsm->snat_thread_index); + } + + if (rc) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_OUT_OF_PORTS]; + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0)) + { + nat_elog_warn (sm, "out2in flow hash del failed"); + } + snat_free_outside_address_and_port ( + sm->twice_nat_addresses, thread_index, &s->ext_host_nat_addr, + s->ext_host_nat_port, s->nat_proto); + nat_ed_session_delete (sm, s, thread_index, 1); + return 0; + } + + s->flags |= SNAT_SESSION_FLAG_TWICE_NAT; + + nat_6t_flow_saddr_rewrite_set (&s->o2i, s->ext_host_nat_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, s->ext_host_nat_port); + } + else + { + nat_6t_flow_sport_rewrite_set (&s->o2i, s->ext_host_nat_port); + } + + nat_6t_l3_l4_csum_calc (&s->o2i); + + nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, + s->ext_host_nat_addr, s->ext_host_nat_port, + i2o_fib_index, ip->protocol); + nat_6t_flow_daddr_rewrite_set (&s->i2o, s->ext_host_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->i2o, s->ext_host_port); + } + else + { + nat_6t_flow_dport_rewrite_set (&s->i2o, s->ext_host_port); + } + } + else + { + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, + s->ext_host_addr, i2o_port, i2o_fib_index, + ip->protocol); + } + else + { + nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, + s->ext_host_addr, s->ext_host_port, + i2o_fib_index, ip->protocol); + } + } + + nat_6t_flow_saddr_rewrite_set (&s->i2o, o2i_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->i2o, o2i_port); + } + else + { + nat_6t_flow_sport_rewrite_set (&s->i2o, o2i_port); + } + + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice (sm, "in2out flow hash add failed"); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0)) + { + nat_elog_warn (sm, "out2in flow hash del failed"); + } + nat_ed_session_delete (sm, s, thread_index, 1); + return 0; + } + + nat_ipfix_logging_nat44_ses_create (thread_index, + s->in2out.addr.as_u32, + s->out2in.addr.as_u32, + s->nat_proto, + s->in2out.port, + s->out2in.port, s->in2out.fib_index); + + nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr, + s->in2out.port, &s->ext_host_nat_addr, + s->ext_host_nat_port, &s->out2in.addr, s->out2in.port, + &s->ext_host_addr, s->ext_host_port, s->nat_proto, + is_twice_nat_session (s)); + + per_vrf_sessions_register_session (s, thread_index); + + return s; +} + +static void +create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s, + ip4_header_t *ip, u32 rx_fib_index, u32 thread_index) +{ + clib_bihash_kv_16_8_t kv, value; + udp_header_t *udp; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + vlib_main_t *vm = vlib_get_main (); + f64 now = vlib_time_now (vm); + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + ip4_address_t lookup_saddr, lookup_daddr; + + if (ip->protocol == IP_PROTOCOL_ICMP) + { + if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, + &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol)) + return; + } + else + { + if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP) + { + udp = ip4_next_header (ip); + lookup_sport = udp->dst_port; + lookup_dport = udp->src_port; + } + else + { + lookup_sport = 0; + lookup_dport = 0; + } + lookup_saddr.as_u32 = ip->dst_address.as_u32; + lookup_daddr.as_u32 = ip->src_address.as_u32; + lookup_protocol = ip->protocol; + } + + init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport, + rx_fib_index, lookup_protocol); + + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) + { + ASSERT (thread_index == ed_value_get_thread_index (&value)); + s = + pool_elt_at_index (tsm->sessions, + ed_value_get_session_index (&value)); + } + else if (ip->protocol == IP_PROTOCOL_ICMP && + icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) + { + return; + } + else + { + u32 proto; + + if (PREDICT_FALSE + (nat44_ed_maximum_sessions_exceeded + (sm, rx_fib_index, thread_index))) + return; + + s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); + if (!s) + { + nat_elog_warn (sm, "create NAT session failed"); + return; + } + + proto = ip_proto_to_nat_proto (ip->protocol); + + s->ext_host_addr = ip->src_address; + s->ext_host_port = lookup_dport; + s->flags |= SNAT_SESSION_FLAG_FWD_BYPASS; + s->out2in.addr = ip->dst_address; + s->out2in.port = lookup_sport; + s->nat_proto = proto; + if (proto == NAT_PROTOCOL_OTHER) + { + s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; + s->out2in.port = ip->protocol; + } + s->out2in.fib_index = rx_fib_index; + s->in2out.addr = s->out2in.addr; + s->in2out.port = s->out2in.port; + s->in2out.fib_index = s->out2in.fib_index; + + nat_6t_i2o_flow_init (sm, thread_index, s, ip->dst_address, lookup_sport, + ip->src_address, lookup_dport, rx_fib_index, + ip->protocol); + nat_6t_flow_txfib_rewrite_set (&s->i2o, rx_fib_index); + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice (sm, "in2out flow add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return; + } + + per_vrf_sessions_register_session (s, thread_index); + } + + if (ip->protocol == IP_PROTOCOL_TCP) + { + tcp_header_t *tcp = ip4_next_header (ip); + nat44_set_tcp_session_state_o2i (sm, now, s, tcp->flags, + tcp->ack_number, tcp->seq_number, + thread_index); + } + + /* Accounting */ + nat44_session_update_counters (s, now, 0, thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s, thread_index); +} + +static snat_session_t * +nat44_ed_out2in_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, + ip4_header_t *ip, u32 rx_fib_index, + u32 thread_index, f64 now, + vlib_main_t *vm, + vlib_node_runtime_t *node) +{ + clib_bihash_kv_8_8_t kv, value; + snat_static_mapping_t *m; + snat_session_t *s; + + if (PREDICT_FALSE ( + nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index))) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_elog_notice (sm, "maximum sessions exceeded"); + return 0; + } + + init_nat_k (&kv, ip->dst_address, 0, 0, 0); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; + return 0; + } + + m = pool_elt_at_index (sm->static_mappings, value.value); + + /* Create a new session */ + s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); + if (!s) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_elog_warn (sm, "create NAT session failed"); + return 0; + } + + s->ext_host_addr.as_u32 = ip->src_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; + s->out2in.addr.as_u32 = ip->dst_address.as_u32; + s->out2in.fib_index = rx_fib_index; + s->in2out.addr.as_u32 = m->local_addr.as_u32; + s->in2out.fib_index = m->fib_index; + s->in2out.port = s->out2in.port = ip->protocol; + + nat_6t_o2i_flow_init (sm, thread_index, s, ip->dst_address, 0, + ip->src_address, 0, m->fib_index, ip->protocol); + nat_6t_flow_saddr_rewrite_set (&s->i2o, ip->dst_address.as_u32); + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice (sm, "in2out key add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; + } + + nat_6t_o2i_flow_init (sm, thread_index, s, ip->src_address, 0, + ip->dst_address, 0, rx_fib_index, ip->protocol); + nat_6t_flow_daddr_rewrite_set (&s->o2i, m->local_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, m->fib_index); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice (sm, "out2in flow hash add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; + } + + per_vrf_sessions_register_session (s, thread_index); + + /* Accounting */ + nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b), + thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s, thread_index); + + return s; +} + +static inline uword +nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int is_multi_worker) +{ + u32 n_left_from, *from; + snat_main_t *sm = &snat_main; + f64 now = vlib_time_now (vm); + u32 thread_index = vm->thread_index; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next = nexts; + vlib_get_buffers (vm, from, b, n_left_from); + + while (n_left_from > 0) + { + vlib_buffer_t *b0; + u32 sw_if_index0, rx_fib_index0; + nat_protocol_t proto0; + ip4_header_t *ip0; + snat_session_t *s0 = 0; + clib_bihash_kv_16_8_t kv0, value0; + nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS; + nat_6t_flow_t *f = 0; + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + int lookup_skipped = 0; + + b0 = *b; + b++; + + lookup_sport = vnet_buffer (b0)->ip.reass.l4_src_port; + lookup_dport = vnet_buffer (b0)->ip.reass.l4_dst_port; + + /* Prefetch next iteration. */ + if (PREDICT_TRUE (n_left_from >= 2)) + { + vlib_buffer_t *p2; + + p2 = *b; + + vlib_prefetch_buffer_header (p2, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + next[0] = vnet_buffer2 (b0)->nat.arc_next; + + vnet_buffer (b0)->snat.flags = 0; + ip0 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0); + + if (PREDICT_FALSE (ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next[0] = NAT_NEXT_ICMP_ERROR; + goto trace0; + } + + proto0 = ip_proto_to_nat_proto (ip0->protocol); + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + { + if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request && + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply && + !icmp_type_is_error_message ( + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)) + { + b0->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + int err = nat_get_icmp_session_lookup_values ( + b0, ip0, &lookup_saddr, &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol); + if (err != 0) + { + b0->error = node->errors[err]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + } + else + { + lookup_saddr.as_u32 = ip0->src_address.as_u32; + lookup_daddr.as_u32 = ip0->dst_address.as_u32; + lookup_protocol = ip0->protocol; + } + + /* there might be a stashed index in vnet_buffer2 from handoff or + * classify node, see if it can be used */ + if (!pool_is_free_index (tsm->sessions, + vnet_buffer2 (b0)->nat.cached_session_index)) + { + s0 = pool_elt_at_index (tsm->sessions, + vnet_buffer2 (b0)->nat.cached_session_index); + if (PREDICT_TRUE ( + nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0) || + (s0->flags & SNAT_SESSION_FLAG_TWICE_NAT && + nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, + lookup_protocol, rx_fib_index0)))) + { + /* yes, this is the droid we're looking for */ + lookup_skipped = 1; + goto skip_lookup; + } + s0 = NULL; + } + + init_ed_k (&kv0, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport, + rx_fib_index0, lookup_protocol); + + // lookup flow + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) + { + // flow does not exist go slow path + next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; + goto trace0; + } + ASSERT (thread_index == ed_value_get_thread_index (&value0)); + s0 = + pool_elt_at_index (tsm->sessions, + ed_value_get_session_index (&value0)); + skip_lookup: + + if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index))) + { + // session is closed, go slow path + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; + goto trace0; + } + + if (s0->tcp_closed_timestamp) + { + if (now >= s0->tcp_closed_timestamp) + { + // session is closed, go slow path, freed in slow path + next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; + } + else + { + // session in transitory timeout, drop + b0->error = node->errors[NAT_OUT2IN_ED_ERROR_TCP_CLOSED]; + next[0] = NAT_NEXT_DROP; + } + goto trace0; + } + + // drop if session expired + u64 sess_timeout_time; + sess_timeout_time = + s0->last_heard + (f64) nat44_session_get_timeout (sm, s0); + if (now >= sess_timeout_time) + { + // session is closed, go slow path + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; + goto trace0; + } + + if (nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) + { + f = &s0->o2i; + } + else if (s0->flags & SNAT_SESSION_FLAG_TWICE_NAT && + nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) + { + f = &s0->i2o; + } + else + { + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (PREDICT_FALSE ( + proto0 == NAT_PROTOCOL_UDP && + (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))) + { + goto trace0; + } + + if (!sm->forwarding_enabled) + { + b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + else + { + if (nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, + lookup_protocol, rx_fib_index0)) + { + f = &s0->i2o; + } + else + { + // FIXME TODO bypass ??? + // create_bypass_for_fwd (sm, b0, s0, ip0, rx_fib_index0, + // thread_index); + translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH; + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + goto trace0; + } + } + } + + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, f, proto0, 0 /* is_output_feature */))) + { + next[0] = NAT_NEXT_DROP; + goto trace0; + } + + switch (proto0) + { + case NAT_PROTOCOL_TCP: + vlib_increment_simple_counter (&sm->counters.fastpath.out2in.tcp, + thread_index, sw_if_index0, 1); + nat44_set_tcp_session_state_o2i (sm, now, s0, + vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags, + vnet_buffer (b0)->ip. + reass.tcp_ack_number, + vnet_buffer (b0)->ip. + reass.tcp_seq_number, + thread_index); + break; + case NAT_PROTOCOL_UDP: + vlib_increment_simple_counter (&sm->counters.fastpath.out2in.udp, + thread_index, sw_if_index0, 1); + break; + case NAT_PROTOCOL_ICMP: + vlib_increment_simple_counter (&sm->counters.fastpath.out2in.icmp, + thread_index, sw_if_index0, 1); + break; + case NAT_PROTOCOL_OTHER: + vlib_increment_simple_counter (&sm->counters.fastpath.out2in.other, + thread_index, sw_if_index0, 1); + break; + } + + /* Accounting */ + nat44_session_update_counters (s0, now, + vlib_buffer_length_in_chain (vm, b0), + thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s0, thread_index); + + trace0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat44_ed_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next[0]; + t->is_slow_path = 0; + t->translation_error = translation_error; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); + t->lookup_skipped = lookup_skipped; + + if (s0) + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->translation_via_i2of = (&s0->i2o == f); + } + else + { + t->session_index = ~0; + } + } + + if (next[0] == NAT_NEXT_DROP) + { + vlib_increment_simple_counter (&sm->counters.fastpath.out2in.drops, + thread_index, sw_if_index0, 1); + } + + n_left_from--; + next++; + } + + vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts, + frame->n_vectors); + return frame->n_vectors; +} + +static inline uword +nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, *from; + snat_main_t *sm = &snat_main; + f64 now = vlib_time_now (vm); + u32 thread_index = vm->thread_index; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + snat_static_mapping_t *m; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next = nexts; + vlib_get_buffers (vm, from, b, n_left_from); + + while (n_left_from > 0) + { + vlib_buffer_t *b0; + u32 sw_if_index0, rx_fib_index0; + nat_protocol_t proto0; + ip4_header_t *ip0; + udp_header_t *udp0; + icmp46_header_t *icmp0; + snat_session_t *s0 = 0; + clib_bihash_kv_16_8_t kv0, value0; + lb_nat_type_t lb_nat0; + twice_nat_type_t twice_nat0; + u8 identity_nat0; + ip4_address_t sm_addr; + u16 sm_port; + u32 sm_fib_index; + nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS; + + b0 = *b; + next[0] = vnet_buffer2 (b0)->nat.arc_next; + + vnet_buffer (b0)->snat.flags = 0; + ip0 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0); + + if (PREDICT_FALSE (ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next[0] = NAT_NEXT_ICMP_ERROR; + goto trace0; + } + + udp0 = ip4_next_header (ip0); + icmp0 = (icmp46_header_t *) udp0; + proto0 = ip_proto_to_nat_proto (ip0->protocol); + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + { + s0 = nat44_ed_out2in_slowpath_unknown_proto ( + sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node); + if (!sm->forwarding_enabled) + { + if (!s0) + next[0] = NAT_NEXT_DROP; + } + if (NAT_NEXT_DROP != next[0] && + NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->o2i, proto0, 0 /* is_output_feature */))) + { + goto trace0; + } + + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.other, + thread_index, sw_if_index0, 1); + goto trace0; + } + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + { + next[0] = icmp_out2in_ed_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next[0], now, thread_index, &s0); + + if (NAT_NEXT_DROP != next[0] && s0 && + NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->o2i, proto0, 0 /* is_output_feature */))) + { + goto trace0; + } + + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.icmp, + thread_index, sw_if_index0, 1); + goto trace0; + } + + init_ed_k (&kv0, ip0->src_address, + vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, + ip0->protocol); + + s0 = NULL; + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) + { + ASSERT (thread_index == ed_value_get_thread_index (&value0)); + s0 = + pool_elt_at_index (tsm->sessions, + ed_value_get_session_index (&value0)); + + if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp) + { + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + s0 = NULL; + } + } + + if (!s0) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + + if (snat_static_mapping_match + (sm, ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, + proto0, &sm_addr, &sm_port, &sm_fib_index, 1, 0, + &twice_nat0, &lb_nat0, &ip0->src_address, &identity_nat0, &m)) + { + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_UDP + && (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 + (UDP_DST_PORT_dhcp_to_client)))) + { + goto trace0; + } + + if (!sm->forwarding_enabled) + { + b0->error = + node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; + next[0] = NAT_NEXT_DROP; + } + else + { + if (next_src_nat + (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, + thread_index, rx_fib_index0)) + { + next[0] = NAT_NEXT_IN2OUT_ED_FAST_PATH; + } + else + { + create_bypass_for_fwd (sm, b0, s0, ip0, rx_fib_index0, + thread_index); + } + } + goto trace0; + } + + if (PREDICT_FALSE (identity_nat0)) + goto trace0; + + if ((proto0 == NAT_PROTOCOL_TCP) + && !tcp_flags_is_init (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags)) + { + b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + + /* Create session initiated by host from external network */ + s0 = create_session_for_static_mapping_ed (sm, b0, + sm_addr, sm_port, + sm_fib_index, + ip0->dst_address, + vnet_buffer (b0)-> + ip.reass.l4_dst_port, + rx_fib_index0, proto0, + node, rx_fib_index0, + thread_index, twice_nat0, + lb_nat0, now, m); + if (!s0) + { + next[0] = NAT_NEXT_DROP; + goto trace0; + } + } + + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->o2i, proto0, 0 /* is_output_feature */))) + { + next[0] = NAT_NEXT_DROP; + goto trace0; + } + + if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + { + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.tcp, + thread_index, sw_if_index0, 1); + nat44_set_tcp_session_state_o2i (sm, now, s0, + vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags, + vnet_buffer (b0)->ip. + reass.tcp_ack_number, + vnet_buffer (b0)->ip. + reass.tcp_seq_number, + thread_index); + } + else + { + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.udp, + thread_index, sw_if_index0, 1); + } + + /* Accounting */ + nat44_session_update_counters (s0, now, + vlib_buffer_length_in_chain (vm, b0), + thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s0, thread_index); + + trace0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat44_ed_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next[0]; + t->is_slow_path = 1; + t->translation_error = translation_error; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); + + if (s0) + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + } + else + { + t->session_index = ~0; + } + } + + if (next[0] == NAT_NEXT_DROP) + { + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.drops, + thread_index, sw_if_index0, 1); + } + + n_left_from--; + next++; + b++; + } + + vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts, + frame->n_vectors); + + return frame->n_vectors; +} + +VLIB_NODE_FN (nat44_ed_out2in_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + if (snat_main.num_workers > 1) + { + return nat44_ed_out2in_fast_path_node_fn_inline (vm, node, frame, 1); + } + else + { + return nat44_ed_out2in_fast_path_node_fn_inline (vm, node, frame, 0); + } +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat44_ed_out2in_node) = { + .name = "nat44-ed-out2in", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat44_ed_out2in_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(nat_out2in_ed_error_strings), + .error_strings = nat_out2in_ed_error_strings, + .runtime_data_bytes = sizeof (snat_runtime_t), +}; +/* *INDENT-ON* */ + +VLIB_NODE_FN (nat44_ed_out2in_slowpath_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return nat44_ed_out2in_slow_path_node_fn_inline (vm, node, frame); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = { + .name = "nat44-ed-out2in-slowpath", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat44_ed_out2in_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(nat_out2in_ed_error_strings), + .error_strings = nat_out2in_ed_error_strings, + .runtime_data_bytes = sizeof (snat_runtime_t), +}; +/* *INDENT-ON* */ + +static u8 * +format_nat_pre_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat_pre_trace_t *t = va_arg (*args, nat_pre_trace_t *); + return format (s, "out2in next_index %d arc_next_index %d", t->next_index, + t->arc_next_index); +} + +VLIB_NODE_FN (nat_pre_out2in_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return nat_pre_node_fn_inline (vm, node, frame, + NAT_NEXT_OUT2IN_ED_FAST_PATH); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat_pre_out2in_node) = { + .name = "nat-pre-out2in", + .vector_size = sizeof (u32), + .sibling_of = "nat-default", + .format_trace = format_nat_pre_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, + }; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg