/* * Copyright (c) 2016 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef struct { u32 sw_if_index; u32 next_index; u32 session_index; u32 is_slow_path; } snat_in2out_trace_t; typedef struct { u32 next_worker_index; u8 do_handoff; } snat_in2out_worker_handoff_trace_t; /* packet trace format function */ static u8 * format_snat_in2out_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *); char * tag; tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH"; s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag, t->sw_if_index, t->next_index, t->session_index); return s; } static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *); s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d", t->sw_if_index, t->next_index); return s; } static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); snat_in2out_worker_handoff_trace_t * t = va_arg (*args, snat_in2out_worker_handoff_trace_t *); char * m; m = t->do_handoff ? "next worker" : "same worker"; s = format (s, "NAT44_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index); return s; } typedef struct { u32 sw_if_index; u32 next_index; u8 cached; } nat44_in2out_reass_trace_t; static u8 * format_nat44_in2out_reass_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); nat44_in2out_reass_trace_t * t = va_arg (*args, nat44_in2out_reass_trace_t *); s = format (s, "NAT44_IN2OUT_REASS: sw_if_index %d, next index %d, status %s", t->sw_if_index, t->next_index, t->cached ? "cached" : "translated"); return s; } vlib_node_registration_t snat_in2out_node; vlib_node_registration_t snat_in2out_slowpath_node; vlib_node_registration_t snat_in2out_fast_node; vlib_node_registration_t snat_in2out_worker_handoff_node; vlib_node_registration_t snat_det_in2out_node; vlib_node_registration_t snat_in2out_output_node; vlib_node_registration_t snat_in2out_output_slowpath_node; vlib_node_registration_t snat_in2out_output_worker_handoff_node; vlib_node_registration_t snat_hairpin_dst_node; vlib_node_registration_t snat_hairpin_src_node; vlib_node_registration_t nat44_hairpinning_node; vlib_node_registration_t nat44_in2out_reass_node; #define foreach_snat_in2out_error \ _(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ _(IN2OUT_PACKETS, "Good in2out packets processed") \ _(OUT_OF_PORTS, "Out of ports") \ _(BAD_OUTSIDE_FIB, "Outside VRF ID not found") \ _(BAD_ICMP_TYPE, "unsupported ICMP type") \ _(NO_TRANSLATION, "No translation") \ _(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded") \ _(DROP_FRAGMENT, "Drop fragment") \ _(MAX_REASS, "Maximum reassemblies exceeded") \ _(MAX_FRAG, "Maximum fragments per reassembly exceeded") typedef enum { #define _(sym,str) SNAT_IN2OUT_ERROR_##sym, foreach_snat_in2out_error #undef _ SNAT_IN2OUT_N_ERROR, } snat_in2out_error_t; static char * snat_in2out_error_strings[] = { #define _(sym,string) string, foreach_snat_in2out_error #undef _ }; typedef enum { SNAT_IN2OUT_NEXT_LOOKUP, SNAT_IN2OUT_NEXT_DROP, SNAT_IN2OUT_NEXT_ICMP_ERROR, SNAT_IN2OUT_NEXT_SLOW_PATH, SNAT_IN2OUT_NEXT_REASS, SNAT_IN2OUT_N_NEXT, } snat_in2out_next_t; typedef enum { SNAT_HAIRPIN_SRC_NEXT_DROP, SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT, SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH, SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT, SNAT_HAIRPIN_SRC_N_NEXT, } snat_hairpin_next_t; /** * @brief Check if packet should be translated * * Packets aimed at outside interface and external addresss with active session * should be translated. * * @param sm NAT main * @param rt NAT runtime data * @param sw_if_index0 index of the inside interface * @param ip0 IPv4 header * @param proto0 NAT protocol * @param rx_fib_index0 RX FIB index * * @returns 0 if packet should be translated otherwise 1 */ static inline int snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node, u32 sw_if_index0, ip4_header_t * ip0, u32 proto0, u32 rx_fib_index0) { fib_node_index_t fei = FIB_NODE_INDEX_INVALID; fib_prefix_t pfx = { .fp_proto = FIB_PROTOCOL_IP4, .fp_len = 32, .fp_addr = { .ip4.as_u32 = ip0->dst_address.as_u32, }, }; /* Don't NAT packet aimed at the intfc address */ if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))) return 1; fei = fib_table_lookup (rx_fib_index0, &pfx); if (FIB_NODE_INDEX_INVALID != fei) { u32 sw_if_index = fib_entry_get_resolving_interface (fei); if (sw_if_index == ~0) { fei = fib_table_lookup (sm->outside_fib_index, &pfx); if (FIB_NODE_INDEX_INVALID != fei) sw_if_index = fib_entry_get_resolving_interface (fei); } snat_interface_t *i; pool_foreach (i, sm->interfaces, ({ /* NAT packet aimed at outside interface */ if ((nat_interface_is_outside(i)) && (sw_if_index == i->sw_if_index)) return 0; })); } return 1; } static inline int snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node, u32 sw_if_index0, ip4_header_t * ip0, u32 proto0, u32 rx_fib_index0, u32 thread_index) { udp_header_t * udp0 = ip4_next_header (ip0); snat_session_key_t key0, sm0; clib_bihash_kv_8_8_t kv0, value0; key0.addr = ip0->dst_address; key0.port = udp0->dst_port; key0.protocol = proto0; key0.fib_index = sm->outside_fib_index; kv0.key = key0.as_u64; /* NAT packet aimed at external address if */ /* has active sessions */ if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0, &value0)) { /* or is static mappings */ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) return 0; } else return 0; return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0, rx_fib_index0); } static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, ip4_header_t * ip0, u32 rx_fib_index0, snat_session_key_t * key0, snat_session_t ** sessionp, vlib_node_runtime_t * node, u32 next0, u32 thread_index) { snat_user_t *u; snat_user_key_t user_key; snat_session_t *s; clib_bihash_kv_8_8_t kv0, value0; u32 oldest_per_user_translation_list_index; dlist_elt_t * oldest_per_user_translation_list_elt; dlist_elt_t * per_user_translation_list_elt; dlist_elt_t * per_user_list_head_elt; u32 session_index; snat_session_key_t key1; u32 address_index = ~0; u32 outside_fib_index; uword * p; udp_header_t * udp0 = ip4_next_header (ip0); if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index))) { b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED]; return SNAT_IN2OUT_NEXT_DROP; } p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id); if (! p) { b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB]; return SNAT_IN2OUT_NEXT_DROP; } outside_fib_index = p[0]; key1.protocol = key0->protocol; user_key.addr = ip0->src_address; user_key.fib_index = rx_fib_index0; kv0.key = user_key.as_u64; /* Ever heard of the "user" = src ip4 address before? */ if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash, &kv0, &value0)) { /* no, make a new one */ pool_get (sm->per_thread_data[thread_index].users, u); memset (u, 0, sizeof (*u)); u->addr = ip0->src_address; u->fib_index = rx_fib_index0; pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt); u->sessions_per_user_list_head_index = per_user_list_head_elt - sm->per_thread_data[thread_index].list_pool; clib_dlist_init (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index); kv0.value = u - sm->per_thread_data[thread_index].users; /* add user */ clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash, &kv0, 1 /* is_add */); } else { u = pool_elt_at_index (sm->per_thread_data[thread_index].users, value0.value); } /* Over quota? Recycle the least recently used dynamic translation */ if (u->nsessions >= sm->max_translations_per_user) { /* Remove the oldest dynamic translation */ do { oldest_per_user_translation_list_index = clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index); ASSERT (oldest_per_user_translation_list_index != ~0); /* add it back to the end of the LRU list */ clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index, oldest_per_user_translation_list_index); /* Get the list element */ oldest_per_user_translation_list_elt = pool_elt_at_index (sm->per_thread_data[thread_index].list_pool, oldest_per_user_translation_list_index); /* Get the session index from the list element */ session_index = oldest_per_user_translation_list_elt->value; /* Get the session */ s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, session_index); } while (snat_is_session_static (s)); if (snat_is_unk_proto_session (s)) { clib_bihash_kv_16_8_t up_kv; nat_ed_ses_key_t key; /* Remove from lookup tables */ key.l_addr = s->in2out.addr; key.r_addr = s->ext_host_addr; key.fib_index = s->in2out.fib_index; key.proto = s->in2out.port; key.rsvd = 0; key.l_port = 0; up_kv.key[0] = key.as_u64[0]; up_kv.key[1] = key.as_u64[1]; if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0)) clib_warning ("in2out key del failed"); key.l_addr = s->out2in.addr; key.fib_index = s->out2in.fib_index; up_kv.key[0] = key.as_u64[0]; up_kv.key[1] = key.as_u64[1]; if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0)) clib_warning ("out2in key del failed"); } else { /* Remove in2out, out2in keys */ kv0.key = s->in2out.as_u64; if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0, 0 /* is_add */)) clib_warning ("in2out key delete failed"); kv0.key = s->out2in.as_u64; if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0, 0 /* is_add */)) clib_warning ("out2in key delete failed"); /* log NAT event */ snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->in2out.protocol, s->in2out.port, s->out2in.port, s->in2out.fib_index); snat_free_outside_address_and_port (sm->addresses, thread_index, &s->out2in, s->outside_address_index); } s->outside_address_index = ~0; if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0, thread_index, &key1, &address_index, sm->vrf_mode, sm->port_per_thread, sm->per_thread_data[thread_index].snat_thread_index)) { ASSERT(0); b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; return SNAT_IN2OUT_NEXT_DROP; } s->outside_address_index = address_index; } else { u8 static_mapping = 1; /* First try to match static mapping by local address and port */ if (snat_static_mapping_match (sm, *key0, &key1, 0, 0)) { static_mapping = 0; /* Try to create dynamic translation */ if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0, thread_index, &key1, &address_index, sm->vrf_mode, sm->port_per_thread, sm->per_thread_data[thread_index].snat_thread_index)) { b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; return SNAT_IN2OUT_NEXT_DROP; } } /* Create a new session */ pool_get (sm->per_thread_data[thread_index].sessions, s); memset (s, 0, sizeof (*s)); s->outside_address_index = address_index; if (static_mapping) { u->nstaticsessions++; s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; } else { u->nsessions++; } /* Create list elts */ pool_get (sm->per_thread_data[thread_index].list_pool, per_user_translation_list_elt); clib_dlist_init (sm->per_thread_data[thread_index].list_pool, per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool); per_user_translation_list_elt->value = s - sm->per_thread_data[thread_index].sessions; s->per_user_index = per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool; s->per_user_list_head_index = u->sessions_per_user_list_head_index; clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s->per_user_list_head_index, per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool); } s->in2out = *key0; s->out2in = key1; s->out2in.protocol = key0->protocol; s->out2in.fib_index = outside_fib_index; s->ext_host_addr.as_u32 = ip0->dst_address.as_u32; s->ext_host_port = udp0->dst_port; *sessionp = s; /* Add to translation hashes */ kv0.key = s->in2out.as_u64; kv0.value = s - sm->per_thread_data[thread_index].sessions; if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0, 1 /* is_add */)) clib_warning ("in2out key add failed"); kv0.key = s->out2in.as_u64; kv0.value = s - sm->per_thread_data[thread_index].sessions; if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0, 1 /* is_add */)) clib_warning ("out2in key add failed"); /* log NAT event */ snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->in2out.protocol, s->in2out.port, s->out2in.port, s->in2out.fib_index); return next0; } static_always_inline snat_in2out_error_t icmp_get_key(ip4_header_t *ip0, snat_session_key_t *p_key0) { icmp46_header_t *icmp0; snat_session_key_t key0; icmp_echo_header_t *echo0, *inner_echo0 = 0; ip4_header_t *inner_ip0 = 0; void *l4_header = 0; icmp46_header_t *inner_icmp0; icmp0 = (icmp46_header_t *) ip4_next_header (ip0); echo0 = (icmp_echo_header_t *)(icmp0+1); if (!icmp_is_error_message (icmp0)) { key0.protocol = SNAT_PROTOCOL_ICMP; key0.addr = ip0->src_address; key0.port = echo0->identifier; } else { inner_ip0 = (ip4_header_t *)(echo0+1); l4_header = ip4_next_header (inner_ip0); key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol); key0.addr = inner_ip0->dst_address; switch (key0.protocol) { case SNAT_PROTOCOL_ICMP: inner_icmp0 = (icmp46_header_t*)l4_header; inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1); key0.port = inner_echo0->identifier; break; case SNAT_PROTOCOL_UDP: case SNAT_PROTOCOL_TCP: key0.port = ((tcp_udp_header_t*)l4_header)->dst_port; break; default: return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL; } } *p_key0 = key0; return -1; /* success */ } /** * Get address and port values to be used for ICMP packet translation * and create session if needed * * @param[in,out] sm NAT main * @param[in,out] node NAT node runtime * @param[in] thread_index thread index * @param[in,out] b0 buffer containing packet to be translated * @param[out] p_proto protocol used for matching * @param[out] p_value address and port after NAT translation * @param[out] p_dont_translate if packet should not be translated * @param d optional parameter * @param e optional parameter */ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node, u32 thread_index, vlib_buffer_t *b0, ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e) { icmp46_header_t *icmp0; u32 sw_if_index0; u32 rx_fib_index0; snat_session_key_t key0; snat_session_t *s0 = 0; u8 dont_translate = 0; clib_bihash_kv_8_8_t kv0, value0; u32 next0 = ~0; int err; icmp0 = (icmp46_header_t *) ip4_next_header (ip0); sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); err = icmp_get_key (ip0, &key0); if (err != -1) { b0->error = node->errors[err]; next0 = SNAT_IN2OUT_NEXT_DROP; goto out; } key0.fib_index = rx_fib_index0; kv0.key =
## VPP Inband OAM (iOAM)    {#ioam_plugin_doc}

In-band OAM (iOAM) is an implementation study to record operational
information in the packet while the packet traverses a path between
two points in the network.

Overview of iOAM can be found in [iOAM-Devnet] page.
The following IETF drafts detail the motivation and mechanism for
recording operational information:
 - [iOAM-ietf-requirements] - Describes motivation and usecases for iOAM
 - [iOAM-ietf-data] - Describes data records that can be collected using iOAM
 - [iOAM-ietf-transport] - Lists out the transport protocols
 and mechanism to carry iOAM data records
 - [iOAM-ietf-proof-of-transit] - Describes the idea of Proof of Transit (POT)
 and mechanisms to operationalize the idea

## Terminology
In-band OAM is expected to be deployed in a specific domain rather
than on the overall Internet. The part of the network which employs in-band OAM
is referred to as **"in-band OAM-domain"**.

In-band OAM data is added to a packet on entering the in-band OAM-domain
and is removed from the packet when exiting the domain.
Within the in-band OAM-domain, network nodes that the packet traverses
may update the in-band OAM data records.

- The node which adds in-band OAM data to the packet is called the
**"in-band OAM encapsulating node"**.

- The node which removes the in-band OAM data is referred to as the
**"in-band OAM decapsulating node"**.

- Nodes within the domain which are aware of in-band OAM data and read
and/or write or process the in-band OAM data are called
**"in-band OAM transit nodes"**.

## Features supported in the current release
VPP can function as in-band OAM encapsulating, transit and decapsulating node.
In this version of VPP in-band OAM data is transported as options in an
IPv6 hop-by-hop extension header. Hence in-band OAM can be enabled
for IPv6 traffic.

The following iOAM features are supported:

- **In-band OAM Tracing** : In-band OAM supports multiple data records to be
recorded in the packet as the packet traverses the network.
These data records offer insights into the operational behavior of the network.
The following information can be collected in the tracing
data from the nodes a packet traverses:
  - Node ID
  - Ingress interface ID
  - Egress interface ID
  - Timestamp
  - Pre-configured application data

- **In-band OAM Proof of Transit (POT)**: Proof of transit iOAM data is
added to every packet for verifying that a packet traverses a specific
set of nodes.
In-band OAM data is updated at every node that is enabled with iOAM
proof of transit and is used to verify whether a packet traversed
all the specified nodes. When the verifier receives each packet,
it can validate whether the packet traversed the specified nodes.


## Configuration
Configuring iOAM involves:
- Selecting the packets for which iOAM data must be inserted, updated or removed
  - Selection of packets for iOAM data insertion on iOAM encapsulating node.
  Selection of packets is done by 5-tuple based classification
  - Selection of packets for updating iOAM data is implicitly done on the
  presence of iOAM options in the packet
  - Selection of packets for removing the iOAM data is done on 5-tuple
  based classification
- The kind of data to be collected
  - Tracing data
  - Proof of transit
- Additional details for processing iOAM data to be collected
  - For trace data - trace type, number of nodes to be recorded in the trace,
  time stamp precision, etc.
  - For POT data - configuration of POT profile required to process the POT data

The CLI for configuring iOAM is explained here followed by detailed steps
and examples to deploy iOAM on VPP as an encapsulating, transit or
decapsulating iOAM node in the subsequent sub-sections.

VPP iOAM configuration for enabling trace and POT is as follows:

    set ioam rewrite trace-type <0x1f|0x7|0x9|0x11|0x19>
    trace-elts <number of trace elements> trace-tsp <0|1|2|3>
    node-id <node ID in hex> app-data <application data in hex> [pot]

A description of each of the options of the CLI follows:
- trace-type : An entry in the "Node data List" array of the trace option
can have different formats, following the needs of the a deployment.
For example: Some deployments might only be interested
in recording the node identifiers, whereas others might be interested
in recording node identifier and timestamp.
The following types are currently supported:
    - 0x1f : Node data to include hop limit (8 bits), node ID (24 bits),
    ingress and egress interface IDs (16 bits each), timestamp (32 bits),
    application data (32 bits)
    - 0x7 : Node data to include hop limit (8 bits), node ID (24 bits),
    ingress and egress interface IDs (16 bits each)
    - 0x9 : Node data to include hop limit (8 bits), node ID (24 bits),
    timestamp (32 bits)
    - 0x11: Node data to include hop limit (8 bits), node ID (24 bits),
    application data (32 bits)
    - 0x19: Node data to include hop limit (8 bits), node ID (24 bits),
    timestamp (32 bits), application data (32 bits)
- trace-elts : Defines the length of the node data array in the trace option.
- trace-tsp : Defines the timestamp precision to use with the enumerated value
              for precision as follows:
    - 0 : 32bits timestamp in seconds
    - 1 : 32bits timestamp in milliseconds
    - 2 : 32bits timestamp in microseconds
    - 3 : 32bits timestamp in nanoseconds
- node-id : Unique identifier for the node, included in the node ID
  field of the node data in trace option.
- app-data : The value configured here is included as is in
application data field of node data in trace option.
- pot : Enables POT option to be included in the iOAM options.

### Trace configuration

#### On in-band OAM encapsulating node
 - **Configure classifier and apply ACL** to select packets for
 iOAM data insertion
    - Example to enable iOAM data insertion for all the packets
    towards IPv6 address db06::06:

    vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
    
    vpp# classify session acl-hit-next node ip6-add-hop-by-hop
    table-index 0 match l3 ip6 dst db06::06
    
    vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
    
 - **Enable tracing** : Specify node ID, maximum number of nodes for which
 trace data should be recorded, type of data to be included for recording,
 optionally application data to be included
    - Example to enable tracing with a maximum of 4 nodes recorded
    and the data to be recorded to include - hop limit, node id,
    ingress and egress interface IDs, timestamp (millisecond precision),
    application data (0x1234):


    vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
    node-id 0x1 app-data 0x1234



#### On in-band OAM transit node
- The transit node requires trace type, timestamp precision, node ID and
optionally application data to be configured,
to update its node data in the trace option.

Example:  

    vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1  
    node-id 0x2 app-data 0x1234  

#### On the In-band OAM decapsulating node
- The decapsulating node similar to encapsulating node requires
**classification** of the packets to remove iOAM data from.
    - Example to decapsulate iOAM data for packets towards
    db06::06, configure classifier and enable it as an ACL as follows:


    vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst

    vpp# classify session acl-hit-next node ip6-lookup table-index 0
    match l3 ip6 dst db06::06 opaque-index 100

    vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0


- Decapsulating node requires trace type, timestamp precision,
node ID and optionally application data to be configured,
to update its node data in the trace option before it is decapsulated.

Example:  

    vpp# set ioam rewrite trace-type 0x1f trace-elts 4  
    trace-tsp 1 node-id 0x3 app-data 0x1234  


### Proof of Transit configuration

For details on proof-of-transit,
see the IETF draft [iOAM-ietf-proof-of-transit].
To enable Proof of Transit all the nodes that participate
and hence are verified for transit need a proof of transit profile.
A script to generate a proof of transit profile as per the mechanism
described in [iOAM-ietf-proof-of-transit] will be available at [iOAM-Devnet].

The Proof of transit mechanism implemented here is based on
Shamir's Secret Sharing algorithm.
The overall algorithm uses two polynomials 
POLY-1 and POLY-2. The degree of polynomials depends on number of nodes
to be verified for transit.
POLY-1 is secret and constant. Each node gets a point on POLY-1
at setup-time and keeps it secret.
POLY-2 is public, random and per packet.
Each node is assigned a point on POLY-1 and POLY-2 with the same x index.
Each node derives its point on POLY-2 each time a packet arrives at it.
A node then contributes its points on POLY-1 and POLY-2 to construct
POLY-3 (POLY-3 = POLY-1 + POLY-2) using lagrange extrapolation and
forwards it towards the verifier by updating POT data in the packet.
The verifier constructs POLY-3 from the accumulated value from all the nodes
and its own points on POLY-1 and POLY-2 and verifies whether
POLY-3 = POLY-1 + POLY-2.  Only the verifier knows POLY-1.
The solution leverages finite field arithmetic in a field of size "prime number"
for reasons explained in description of Shamir's secret sharing algorithm.

Here is an explanation of POT profile list and profile configuration CLI to
realize the above mechanism.
It is best to use the script provided at [iOAM-Devnet] to generate
this configuration.
- **Create POT profile** : set pot profile name <string> id [0-1]  
[validator-key 0xu64] prime-number 0xu64 secret_share 0xu64  
lpc 0xu64 polynomial2 0xu64 bits-in-random [0-64]  
    - name : Profile list name.
    - id : Profile id, it can be 0 or 1.
    A maximum of two profiles can be configured per profile list.
    - validator-key : Secret key configured only on the
    verifier/decapsulating node used to compare and verify proof of transit.
    - prime-number : Prime number for finite field arithmetic as required by the
    proof of transit mechanism.
    - secret_share : Unique point for each node on the secret polynomial POLY-1.
    - lpc : Lagrange Polynomial Constant(LPC) calculated per node based on
    its point (x value used for evaluating the points on the polynomial)
    on the polynomial used in lagrange extrapolation
    for reconstructing polynomial (POLY-3).
    - polynomial2 : Is the pre-evaluated value of the point on
    2nd polynomial(POLY-2). This is unique for each node.
    It is pre-evaluated for all the coefficients of POLY-2 except
    for the constant part of the polynomial that changes per packet
    and is received as part of the POT data in the packet.
    - bits-in-random : To control the size of the random number to be
    generated. This number has to match the other numbers generated and used
    in the profile as per the algorithm.

- **Set a configured profile as active/in-use** :  
set pot profile-active name <string> ID [0-1]  
    - name : Name of the profile list to be used for computing
    POT data per packet.
    - ID : Identifier of the profile within the list to be used.

#### On In-band OAM encapsulating node
 - Configure the classifier and apply ACL to select packets for iOAM data insertion.
    - Example to enable iOAM data insertion for all the packet towards
    IPv6 address db06::06 -


    vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst

    vpp# classify session acl-hit-next node
    ip6-add-hop-by-hop table-index 0 match l3 ip6 dst db06::06

    vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0


 - Configure the proof of transit profile list with profiles.
Each profile list referred to by a name can contain 2 profiles,
only one is in use for updating proof of transit data at any time.
    - Example profile list example with a profile generated from the
    script to verify transit through 3 nodes is:


    vpp# set pot profile name example id 0 prime-number 0x7fff0000fa884685
    secret_share 0x6c22eff0f45ec56d lpc 0x7fff0000fa884682
    polynomial2 0xffb543d4a9c bits-in-random 63

 - Enable one of the profiles from the configured profile list as active
 so that is will be used for calculating proof of transit

Example enable profile ID 0 from profile list example configured above:


    vpp# set pot profile-active name example ID 0


 - Enable POT option to be inserted


    vpp# set ioam rewrite pot


#### On in-band OAM transit node
 - Configure the proof of transit profile list with profiles for transit node.
Example:


    vpp# set pot profile name example id 0 prime-number 0x7fff0000fa884685
    secret_share 0x564cdbdec4eb625d lpc 0x1
    polynomial2 0x23f3a227186a bits-in-random 63

#### On in-band OAM decapsulating node / verifier
- The decapsulating node, similar to the encapsulating node requires
classification of the packets to remove iOAM data from.
    - Example to decapsulate iOAM data for packets towards db06::06
    configure classifier and enable it as an ACL as follows:


    vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst

    vpp# classify session acl-hit-next node ip6-lookup table-index 0
    match l3 ip6 dst db06::06 opaque-index 100

    vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0

- To update and verify the proof of transit, POT profile list should be configured.
    - Example POT profile list configured as follows:

    vpp# set pot profile name example id 0 validate-key 0x7fff0000fa88465d
    prime-number 0x7fff0000fa884685 secret_share 0x7a08fbfc5b93116d lpc 0x3
    polynomial2 0x3ff738597ce bits-in-random 63

## Operational data

Following CLIs are available to check iOAM operation:
- To check iOAM configuration that are effective use "show ioam summary"  

Example:

    vpp# show ioam summary  
                  REWRITE FLOW CONFIGS - Not configured  
     HOP BY HOP OPTIONS - TRACE CONFIG -  
                            Trace Type : 0x1f (31)  
             Trace timestamp precision : 1 (Milliseconds)  
                    Num of trace nodes : 4  
                               Node-id : 0x2 (2)  
                              App Data : 0x1234 (4660)  
                            POT OPTION - 1 (Enabled)  
    Try 'show ioam pot and show pot profile' for more information  

- To find statistics about packets for which iOAM options were
added (encapsulating node) and removed (decapsulating node) execute
*show errors*

Example on encapsulating node:


    vpp# show error
       Count                    Node                  Reason
    1208804706                ip6-inacl               input ACL hits
    1208804706           ip6-add-hop-by-hop           Pkts w/ added ip6 hop-by-hop options
    
Example on decapsulating node:

    vpp# show error
       Count                    Node                  Reason
      69508569                ip6-inacl               input ACL hits
      69508569           ip6-pop-hop-by-hop           Pkts w/ removed ip6 hop-by-hop options

- To check the POT profiles use "show pot profile"

Example:

    vpp# show pot profile
    Profile list in use  : example
    POT Profile at index: 0
                     ID : 0
              Validator : False (0)
           Secret share : 0x564cdbdec4eb625d (6218586935324795485)
           Prime number : 0x7fff0000fa884685 (9223090566081300101)
    2nd polynomial(eval) : 0x23f3a227186a (39529304496234)
                     LPC : 0x1 (1)
               Bit mask : 0x7fffffffffffffff (9223372036854775807)
    Profile index in use: 0
    Pkts passed : 0x36 (54)

- To get statistics  of POT for packets use "show ioam pot"

Example at encapsulating or transit node:

    vpp# show ioam pot
     Pkts with ip6 hop-by-hop POT options - 54
     Pkts with ip6 hop-by-hop POT options but no profile set - 0
     Pkts with POT in Policy - 0
     Pkts with POT out of Policy - 0
    

Example at decapsulating/verification node:


    vpp# show ioam pot
     Pkts with ip6 hop-by-hop POT options - 54
     Pkts with ip6 hop-by-hop POT options but no profile set - 0
     Pkts with POT in Policy - 54
     Pkts with POT out of Policy - 0
    
- Tracing - enable trace of IPv6 packets to view the data inserted and
collected.

Example when the nodes are receiving data over a DPDK interface:
Enable tracing using "trace add dpdk-input 20" and
execute "show trace" to view the iOAM data collected:

  
    vpp# trace add dpdk-input 20  
      
    vpp# show trace
    
    ------------------- Start of thread 0 vpp_main -------------------  
    
    Packet 1  
      
    00:00:19:294697: dpdk-input  
      GigabitEthernetb/0/0 rx queue 0  
      buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0  
      PKT MBUF: port 0, nb_segs 1, pkt_len 214  
        buf_len 2176, data_len 214, ol_flags 0x0, data_off 128, phys_addr 0xe9a35a00  
        packet_type 0x0  
      IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55  
      IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6  
        tos 0x00, flow label 0x0, hop limit 63, payload length 160  
    00:00:19:294737: ethernet-input  
      IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55  
    00:00:19:294753: ip6-input  
      IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6  
        tos 0x00, flow label 0x0, hop limit 63, payload length 160  
    00:00:19:294757: ip6-lookup  
      fib 0 adj-idx 15 : indirect via db05::2 flow hash: 0x00000000  
      IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6  
        tos 0x00, flow label 0x0, hop limit 63, payload length 160  
    00:00:19:294802: ip6-hop-by-hop  
      IP6_HOP_BY_HOP: next index 5 len 96 traced 96  Trace Type 0x1f , 1 elts left  
        [0] ttl 0x0 node ID 0x0 ingress 0x0 egress 0x0 ts 0x0  
    app 0x0  
        [1] ttl 0x3e node ID 0x3 ingress 0x1 egress 0x2 ts 0xb68c2213  
    app 0x1234  
        [2] ttl 0x3f node ID 0x2 ingress 0x1 egress 0x2 ts 0xb68c2204  
    app 0x1234  
        [3] ttl 0x40 node ID 0x1 ingress 0x5 egress 0x6 ts 0xb68c2200  
    app 0x1234  
        POT opt present  
             random = 0x577a916946071950, Cumulative = 0x10b46e78a35a392d, Index = 0x0  
    00:00:19:294810: ip6-rewrite  
      tx_sw_if_index 1 adj-idx 14 : GigabitEthernetb/0/0  
                                    IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72 flow hash: 0x00000000  
      IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72  
      IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6  
        tos 0x00, flow label 0x0, hop limit 62, payload length 160  
    00:00:19:294814: GigabitEthernetb/0/0-output  
      GigabitEthernetb/0/0  
      IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72  
      IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6  
        tos 0x00, flow label 0x0, hop limit 62, payload length 160  
    00:00:19:294820: GigabitEthernetb/0/0-tx    
      GigabitEthernetb/0/0 tx queue 0    
      buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0    
      IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
      
      IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
      
        tos 0x00, flow label 0x0, hop limit 62, payload length 160  
    

[iOAM-Devnet]: <https://github.com/ciscodevnet/iOAM>
[iOAM-ietf-requirements]:<https://tools.ietf.org/html/draft-brockners-inband-oam-requirements-01>
[iOAM-ietf-transport]:<https://tools.ietf.org/html/draft-brockners-inband-oam-transport-01>
[iOAM-ietf-data]:<https://tools.ietf.org/html/draft-brockners-inband-oam-data-01>
[iOAM-ietf-proof-of-transit]:<https://tools.ietf.org/html/draft-brockners-proof-of-transit-01>
_TX] = sm->outside_fib_index; /* Accounting */ s->last_heard = now; s->total_pkts++; s->total_bytes += vlib_buffer_length_in_chain (vm, b); return s; } static inline uword snat_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, int is_slow_path, int is_output_feature) { u32 n_left_from, * from, * to_next; snat_in2out_next_t next_index; u32 pkts_processed = 0; snat_main_t * sm = &snat_main; f64 now = vlib_time_now (vm); u32 stats_node_index; u32 thread_index = vlib_get_thread_index (); stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index : snat_in2out_node.index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; while (n_left_from > 0) { u32 n_left_to_next; vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from >= 4 && n_left_to_next >= 2) { u32 bi0, bi1; vlib_buffer_t * b0, * b1; u32 next0, next1; u32 sw_if_index0, sw_if_index1; ip4_header_t * ip0, * ip1; ip_csum_t sum0, sum1; u32 new_addr0, old_addr0, new_addr1, old_addr1; u16 old_port0, new_port0, old_port1, new_port1; udp_header_t * udp0, * udp1; tcp_header_t * tcp0, * tcp1; icmp46_header_t * icmp0, * icmp1; snat_session_key_t key0, key1; u32 rx_fib_index0, rx_fib_index1; u32 proto0, proto1; snat_session_t * s0 = 0, * s1 = 0; clib_bihash_kv_8_8_t kv0, value0, kv1, value1; u32 iph_offset0 = 0, iph_offset1 = 0; /* Prefetch next iteration. */ { vlib_buffer_t * p2, * p3; p2 = vlib_get_buffer (vm, from[2]); p3 = vlib_get_buffer (vm, from[3]); vlib_prefetch_buffer_header (p2, LOAD); vlib_prefetch_buffer_header (p3, LOAD); CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); } /* speculatively enqueue b0 and b1 to the current next frame */ to_next[0] = bi0 = from[0]; to_next[1] = bi1 = from[1]; from += 2; to_next += 2; n_left_from -= 2; n_left_to_next -= 2; b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); if (is_output_feature) iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length; ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0); udp0 = ip4_next_header (ip0); tcp0 = (tcp_header_t *) udp0; icmp0 = (icmp46_header_t *) udp0; sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, sw_if_index0); next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP; if (PREDICT_FALSE(ip0->ttl == 1)) { vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; goto trace00; } proto0 = ip_proto_to_snat_proto (ip0->protocol); /* Next configured feature, probably ip4-lookup */ if (is_slow_path) { if (PREDICT_FALSE (proto0 == ~0)) { s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node); if (!s0) next0 = SNAT_IN2OUT_NEXT_DROP; goto trace00; } if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { next0 = icmp_in2out_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0, now, thread_index, &s0); goto trace00; } } else { if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP)) { next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; goto trace00; } if (ip4_is_fragment (ip0)) { next0 = SNAT_IN2OUT_NEXT_REASS; goto trace00; } } key0.addr = ip0->src_address; key0.port = udp0->src_port; key0.protocol = proto0; key0.fib_index = rx_fib_index0; kv0.key = key0.as_u64; if (PREDICT_FALSE (clib_bihash_search_8_8 ( &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0)) { if (is_slow_path) { if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature) goto trace00; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, &s0, node, next0, thread_index); if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto trace00; } else { next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; goto trace00; } } else { if (PREDICT_FALSE (value0.value == ~0ULL)) { if (is_slow_path) { s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node); if (!s0) next0 = SNAT_IN2OUT_NEXT_DROP; goto trace00; } else { next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; goto trace00; } } else { s0 = pool_elt_at_index ( sm->per_thread_data[thread_index].sessions, value0.value); } } b0->flags |= VNET_BUFFER_F_IS_NATED; old_addr0 = ip0->src_address.as_u32; ip0->src_address = s0->out2in.addr; new_addr0 = ip0->src_address.as_u32; if (!is_output_feature) vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address /* changed member */); ip0->checksum = ip_csum_fold (sum0); if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { old_port0 = tcp0->src_port; tcp0->src_port = s0->out2in.port; new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */, length /* changed member */); tcp0->checksum = ip_csum_fold(sum0); } else { old_port0 = udp0->src_port; udp0->src_port = s0->out2in.port; udp0->checksum = 0; } /* Accounting */ s0->last_heard = now; s0->total_pkts++; s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } trace00: if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { snat_in2out_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->is_slow_path = is_slow_path; t->sw_if_index = sw_if_index0; t->next_index = next0; t->session_index = ~0; if (s0) t->session_index = s0 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; if (is_output_feature) iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length; ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) + iph_offset1); udp1 = ip4_next_header (ip1); tcp1 = (tcp_header_t *) udp1; icmp1 = (icmp46_header_t *) udp1; sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, sw_if_index1); if (PREDICT_FALSE(ip1->ttl == 1)) { vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR; goto trace01; } proto1 = ip_proto_to_snat_proto (ip1->protocol); /* Next configured feature, probably ip4-lookup */ if (is_slow_path) { if (PREDICT_FALSE (proto1 == ~0)) { s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1, thread_index, now, vm, node); if (!s1) next1 = SNAT_IN2OUT_NEXT_DROP; goto trace01; } if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) { next1 = icmp_in2out_slow_path (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, next1, now, thread_index, &s1); goto trace01; } } else { if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP)) { next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; goto trace01; } if (ip4_is_fragment (ip1)) { next1 = SNAT_IN2OUT_NEXT_REASS; goto trace01; } } b1->flags |= VNET_BUFFER_F_IS_NATED; key1.addr = ip1->src_address; key1.port = udp1->src_port; key1.protocol = proto1; key1.fib_index = rx_fib_index1; kv1.key = key1.as_u64; if (PREDICT_FALSE(clib_bihash_search_8_8 ( &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0)) { if (is_slow_path) { if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1, ip1, proto1, rx_fib_index1, thread_index)) && !is_output_feature) goto trace01; next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1, &s1, node, next1, thread_index); if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP)) goto trace01; } else { next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; goto trace01; } } else { if (PREDICT_FALSE (value1.value == ~0ULL)) { if (is_slow_path) { s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1, thread_index, now, vm, node); if (!s1) next1 = SNAT_IN2OUT_NEXT_DROP; goto trace01; } else { next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; goto trace01; } } else { s1 = pool_elt_at_index ( sm->per_thread_data[thread_index].sessions, value1.value); } } old_addr1 = ip1->src_address.as_u32; ip1->src_address = s1->out2in.addr; new_addr1 = ip1->src_address.as_u32; if (!is_output_feature) vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index; sum1 = ip1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, src_address /* changed member */); ip1->checksum = ip_csum_fold (sum1); if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) { old_port1 = tcp1->src_port; tcp1->src_port = s1->out2in.port; new_port1 = tcp1->src_port; sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, dst_address /* changed member */); sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t /* cheat */, length /* changed member */); tcp1->checksum = ip_csum_fold(sum1); } else { old_port1 = udp1->src_port; udp1->src_port = s1->out2in.port; udp1->checksum = 0; } /* Accounting */ s1->last_heard = now; s1->total_pkts++; s1->total_bytes += vlib_buffer_length_in_chain (vm, b1); /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s1)) { clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s1->per_user_index); clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s1->per_user_list_head_index, s1->per_user_index); } trace01: if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) && (b1->flags & VLIB_BUFFER_IS_TRACED))) { snat_in2out_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); t->sw_if_index = sw_if_index1; t->next_index = next1; t->session_index = ~0; if (s1) t->session_index = s1 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP; /* verify speculative enqueues, maybe switch current next frame */ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, bi0, bi1, next0, next1); } while (n_left_from > 0 && n_left_to_next > 0) { u32 bi0; vlib_buffer_t * b0; u32 next0; u32 sw_if_index0; ip4_header_t * ip0; ip_csum_t sum0; u32 new_addr0, old_addr0; u16 old_port0, new_port0; udp_header_t * udp0; tcp_header_t * tcp0; icmp46_header_t * icmp0; snat_session_key_t key0; u32 rx_fib_index0; u32 proto0; snat_session_t * s0 = 0; clib_bihash_kv_8_8_t kv0, value0; u32 iph_offset0 = 0; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; to_next[0] = bi0; from += 1; to_next += 1; n_left_from -= 1; n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); next0 = SNAT_IN2OUT_NEXT_LOOKUP; if (is_output_feature) iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length; ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0); udp0 = ip4_next_header (ip0); tcp0 = (tcp_header_t *) udp0; icmp0 = (icmp46_header_t *) udp0; sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, sw_if_index0); if (PREDICT_FALSE(ip0->ttl == 1)) { vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; goto trace0; } proto0 = ip_proto_to_snat_proto (ip0->protocol); /* Next configured feature, probably ip4-lookup */ if (is_slow_path) { if (PREDICT_FALSE (proto0 == ~0)) { s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node); if (!s0) next0 = SNAT_IN2OUT_NEXT_DROP; goto trace0; } if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { next0 = icmp_in2out_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0, now, thread_index, &s0); goto trace0; } } else { if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP)) { next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; goto trace0; } if (ip4_is_fragment (ip0)) { next0 = SNAT_IN2OUT_NEXT_REASS; goto trace0; } } key0.addr = ip0->src_address; key0.port = udp0->src_port; key0.protocol = proto0; key0.fib_index = rx_fib_index0; kv0.key = key0.as_u64; if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0, &value0)) { if (is_slow_path) { if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature) goto trace0; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, &s0, node, next0, thread_index); if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto trace0; } else { next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; goto trace0; } } else { if (PREDICT_FALSE (value0.value == ~0ULL)) { if (is_slow_path) { s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node); if (!s0) next0 = SNAT_IN2OUT_NEXT_DROP; goto trace0; } else { next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; goto trace0; } } else { s0 = pool_elt_at_index ( sm->per_thread_data[thread_index].sessions, value0.value); } } b0->flags |= VNET_BUFFER_F_IS_NATED; old_addr0 = ip0->src_address.as_u32; ip0->src_address = s0->out2in.addr; new_addr0 = ip0->src_address.as_u32; if (!is_output_feature) vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address /* changed member */); ip0->checksum = ip_csum_fold (sum0); if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { old_port0 = tcp0->src_port; tcp0->src_port = s0->out2in.port; new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */, length /* changed member */); tcp0->checksum = ip_csum_fold(sum0); } else { old_port0 = udp0->src_port; udp0->src_port = s0->out2in.port; udp0->checksum = 0; } /* Accounting */ s0->last_heard = now; s0->total_pkts++; s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } trace0: if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { snat_in2out_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->is_slow_path = is_slow_path; t->sw_if_index = sw_if_index0; t->next_index = next0; t->session_index = ~0; if (s0) t->session_index = s0 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; /* verify speculative enqueue, maybe switch current next frame */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } vlib_node_increment_counter (vm, stats_node_index, SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, pkts_processed); return frame->n_vectors; } static uword snat_in2out_fast_path_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0); } VLIB_REGISTER_NODE (snat_in2out_node) = { .function = snat_in2out_fast_path_fn, .name = "nat44-in2out", .vector_size = sizeof (u32), .format_trace = format_snat_in2out_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = ARRAY_LEN(snat_in2out_error_strings), .error_strings = snat_in2out_error_strings, .runtime_data_bytes = sizeof (snat_runtime_t), .n_next_nodes = SNAT_IN2OUT_N_NEXT, /* edit / add dispositions here */ .next_nodes = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn); static uword snat_in2out_output_fast_path_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1); } VLIB_REGISTER_NODE (snat_in2out_output_node) = { .function = snat_in2out_output_fast_path_fn, .name = "nat44-in2out-output", .vector_size = sizeof (u32), .format_trace = format_snat_in2out_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = ARRAY_LEN(snat_in2out_error_strings), .error_strings = snat_in2out_error_strings, .runtime_data_bytes = sizeof (snat_runtime_t), .n_next_nodes = SNAT_IN2OUT_N_NEXT, /* edit / add dispositions here */ .next_nodes = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node, snat_in2out_output_fast_path_fn); static uword snat_in2out_slow_path_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0); } VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = { .function = snat_in2out_slow_path_fn, .name = "nat44-in2out-slowpath", .vector_size = sizeof (u32), .format_trace = format_snat_in2out_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = ARRAY_LEN(snat_in2out_error_strings), .error_strings = snat_in2out_error_strings, .runtime_data_bytes = sizeof (snat_runtime_t), .n_next_nodes = SNAT_IN2OUT_N_NEXT, /* edit / add dispositions here */ .next_nodes = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn); static uword snat_in2out_output_slow_path_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1); } VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = { .function = snat_in2out_output_slow_path_fn, .name = "nat44-in2out-output-slowpath", .vector_size = sizeof (u32), .format_trace = format_snat_in2out_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = ARRAY_LEN(snat_in2out_error_strings), .error_strings = snat_in2out_error_strings, .runtime_data_bytes = sizeof (snat_runtime_t), .n_next_nodes = SNAT_IN2OUT_N_NEXT, /* edit / add dispositions here */ .next_nodes = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node, snat_in2out_output_slow_path_fn); extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local; static uword nat44_hairpinning_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_left_from, * from, * to_next; snat_in2out_next_t next_index; u32 pkts_processed = 0; snat_main_t * sm = &snat_main; vnet_feature_main_t *fm = &feature_main; u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index]; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; while (n_left_from > 0) { u32 n_left_to_next; vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from > 0 && n_left_to_next > 0) { u32 bi0; vlib_buffer_t * b0; u32 next0; ip4_header_t * ip0; u32 proto0; udp_header_t * udp0; tcp_header_t * tcp0; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; to_next[0] = bi0; from += 1; to_next += 1; n_left_from -= 1; n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); ip0 = vlib_buffer_get_current (b0); udp0 = ip4_next_header (ip0); tcp0 = (tcp_header_t *) udp0; proto0 = ip_proto_to_snat_proto (ip0->protocol); vnet_get_config_data (&cm->config_main, &b0->current_config_index, &next0, 0); if (snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0)) next0 = SNAT_IN2OUT_NEXT_LOOKUP; pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; /* verify speculative enqueue, maybe switch current next frame */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } vlib_node_increment_counter (vm, nat44_hairpinning_node.index, SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, pkts_processed); return frame->n_vectors; } VLIB_REGISTER_NODE (nat44_hairpinning_node) = { .function = nat44_hairpinning_fn, .name = "nat44-hairpinning", .vector_size = sizeof (u32), .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = ARRAY_LEN(snat_in2out_error_strings), .error_strings = snat_in2out_error_strings, .n_next_nodes = 2, .next_nodes = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", }, }; VLIB_NODE_FUNCTION_MULTIARCH (nat44_hairpinning_node, nat44_hairpinning_fn); static inline void nat44_reass_hairpinning (snat_main_t *sm, vlib_buffer_t * b0, ip4_header_t * ip0, u16 sport, u16 dport, u32 proto0) { snat_session_key_t key0, sm0; snat_session_t * s0; clib_bihash_kv_8_8_t kv0, value0; ip_csum_t sum0; u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si; u16 new_dst_port0, old_dst_port0; udp_header_t * udp0; tcp_header_t * tcp0; key0.addr = ip0->dst_address; key0.port = dport; key0.protocol = proto0; key0.fib_index = sm->outside_fib_index; kv0.key = key0.as_u64; udp0 = ip4_next_header (ip0); /* Check if destination is static mappings */ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) { new_dst_addr0 = sm0.addr.as_u32; new_dst_port0 = sm0.port; vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; } /* or active sessions */ else { if (sm->num_workers > 1) ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread; else ti = sm->num_workers; if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0)) { si = value0.value; s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); new_dst_addr0 = s0->in2out.addr.as_u32; new_dst_port0 = s0->in2out.port; vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; } } /* Destination is behind the same NAT, use internal address and port */ if (new_dst_addr0) { old_dst_addr0 = ip0->dst_address.as_u32; ip0->dst_address.as_u32 = new_dst_addr0; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t, dst_address); ip0->checksum = ip_csum_fold (sum0); old_dst_port0 = dport; if (PREDICT_TRUE(new_dst_port0 != old_dst_port0 && ip4_is_first_fragment (ip0))) { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { tcp0 = ip4_next_header (ip0); tcp0->dst = new_dst_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t, dst_address); sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0, ip4_header_t /* cheat */, length); tcp0->checksum = ip_csum_fold(sum0); } else { udp0->dst_port = new_dst_port0; udp0->checksum = 0; } } else { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { tcp0 = ip4_next_header (ip0); sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t, dst_address); tcp0->checksum = ip_csum_fold(sum0); } } } } static uword nat44_in2out_reass_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_left_from, *from, *to_next; snat_in2out_next_t next_index; u32 pkts_processed = 0; snat_main_t *sm = &snat_main; f64 now = vlib_time_now (vm); u32 thread_index = vlib_get_thread_index (); snat_main_per_thread_data_t *per_thread_data = &sm->per_thread_data[thread_index]; u32 *fragments_to_drop = 0; u32 *fragments_to_loopback = 0; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; while (n_left_from > 0) { u32 n_left_to_next; vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from > 0 && n_left_to_next > 0) { u32 bi0, sw_if_index0, proto0, rx_fib_index0, new_addr0, old_addr0; vlib_buffer_t *b0; u32 next0; u8 cached0 = 0; ip4_header_t *ip0; nat_reass_ip4_t *reass0; udp_header_t * udp0; tcp_header_t * tcp0; snat_session_key_t key0; clib_bihash_kv_8_8_t kv0, value0; snat_session_t * s0 = 0; u16 old_port0, new_port0; ip_csum_t sum0; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; to_next[0] = bi0; from += 1; to_next += 1; n_left_from -= 1; n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); next0 = SNAT_IN2OUT_NEXT_LOOKUP; sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0); if (PREDICT_FALSE (nat_reass_is_drop_frag(0))) { next0 = SNAT_IN2OUT_NEXT_DROP; b0->error = node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT]; goto trace0; } ip0 = (ip4_header_t *) vlib_buffer_get_current (b0); udp0 = ip4_next_header (ip0); tcp0 = (tcp_header_t *) udp0; proto0 = ip_proto_to_snat_proto (ip0->protocol); reass0 = nat_ip4_reass_find_or_create (ip0->src_address, ip0->dst_address, ip0->fragment_id, ip0->protocol, 1, &fragments_to_drop); if (PREDICT_FALSE (!reass0)) { next0 = SNAT_IN2OUT_NEXT_DROP; b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_REASS]; goto trace0; } if (PREDICT_FALSE (ip4_is_first_fragment (ip0))) { key0.addr = ip0->src_address; key0.port = udp0->src_port; key0.protocol = proto0; key0.fib_index = rx_fib_index0; kv0.key = key0.as_u64; if (clib_bihash_search_8_8 (&per_thread_data->in2out, &kv0, &value0)) { if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0, proto0, rx_fib_index0, thread_index))) goto trace0; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, &s0, node, next0, thread_index); if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto trace0; reass0->sess_index = s0 - per_thread_data->sessions; } else { s0 = pool_elt_at_index (per_thread_data->sessions, value0.value); reass0->sess_index = value0.value; } nat_ip4_reass_get_frags (reass0, &fragments_to_loopback); } else { if (PREDICT_FALSE (reass0->sess_index == (u32) ~0)) { if (nat_ip4_reass_add_fragment (reass0, bi0)) { b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_FRAG]; next0 = SNAT_IN2OUT_NEXT_DROP; goto trace0; } cached0 = 1; goto trace0; } s0 = pool_elt_at_index (per_thread_data->sessions, reass0->sess_index); } old_addr0 = ip0->src_address.as_u32; ip0->src_address = s0->out2in.addr; new_addr0 = ip0->src_address.as_u32; vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address /* changed member */); ip0->checksum = ip_csum_fold (sum0); if (PREDICT_FALSE (ip4_is_first_fragment (ip0))) { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { old_port0 = tcp0->src_port; tcp0->src_port = s0->out2in.port; new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */, length /* changed member */); tcp0->checksum = ip_csum_fold(sum0); } else { old_port0 = udp0->src_port; udp0->src_port = s0->out2in.port; udp0->checksum = 0; } } /* Hairpinning */ nat44_reass_hairpinning (sm, b0, ip0, s0->out2in.port, s0->ext_host_port, proto0); /* Accounting */ s0->last_heard = now; s0->total_pkts++; s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } trace0: if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { nat44_in2out_reass_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->cached = cached0; t->sw_if_index = sw_if_index0; t->next_index = next0; } if (cached0) { n_left_to_next++; to_next--; } else { pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; /* verify speculative enqueue, maybe switch current next frame */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); } if (n_left_from == 0 && vec_len (fragments_to_loopback)) { from = vlib_frame_vector_args (frame); u32 len = vec_len (fragments_to_loopback); if (len <= VLIB_FRAME_SIZE) { clib_memcpy (from, fragments_to_loopback, sizeof (u32) * len); n_left_from = len; vec_reset_length (fragments_to_loopback); } else { clib_memcpy (from, fragments_to_loopback + (len - VLIB_FRAME_SIZE), sizeof (u32) * VLIB_FRAME_SIZE); n_left_from = VLIB_FRAME_SIZE; _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; } } } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } vlib_node_increment_counter (vm, nat44_in2out_reass_node.index, SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, pkts_processed); nat_send_all_to_node (vm, fragments_to_drop, node, &node->errors[SNAT_IN2OUT_ERROR_DROP_FRAGMENT], SNAT_IN2OUT_NEXT_DROP); vec_free (fragments_to_drop); vec_free (fragments_to_loopback); return frame->n_vectors; } VLIB_REGISTER_NODE (nat44_in2out_reass_node) = { .function = nat44_in2out_reass_node_fn, .name = "nat44-in2out-reass", .vector_size = sizeof (u32), .format_trace = format_nat44_in2out_reass_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = ARRAY_LEN(snat_in2out_error_strings), .error_strings = snat_in2out_error_strings, .n_next_nodes = SNAT_IN2OUT_N_NEXT, .next_nodes = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass", }, }; VLIB_NODE_FUNCTION_MULTIARCH (nat44_in2out_reass_node, nat44_in2out_reass_node_fn); /**************************/ /*** deterministic mode ***/ /**************************/ static uword snat_det_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_left_from, * from, * to_next; snat_in2out_next_t next_index; u32 pkts_processed = 0; snat_main_t * sm = &snat_main; u32 now = (u32) vlib_time_now (vm); u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; while (n_left_from > 0) { u32 n_left_to_next; vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from >= 4 && n_left_to_next >= 2) { u32 bi0, bi1; vlib_buffer_t * b0, * b1; u32 next0, next1; u32 sw_if_index0, sw_if_index1; ip4_header_t * ip0, * ip1; ip_csum_t sum0, sum1; ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1; u16 old_port0, new_port0, lo_port0, i0; u16 old_port1, new_port1, lo_port1, i1; udp_header_t * udp0, * udp1; tcp_header_t * tcp0, * tcp1; u32 proto0, proto1; snat_det_out_key_t key0, key1; snat_det_map_t * dm0, * dm1; snat_det_session_t * ses0 = 0, * ses1 = 0; u32 rx_fib_index0, rx_fib_index1; icmp46_header_t * icmp0, * icmp1; /* Prefetch next iteration. */ { vlib_buffer_t * p2, * p3; p2 = vlib_get_buffer (vm, from[2]); p3 = vlib_get_buffer (vm, from[3]); vlib_prefetch_buffer_header (p2, LOAD); vlib_prefetch_buffer_header (p3, LOAD); CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); } /* speculatively enqueue b0 and b1 to the current next frame */ to_next[0] = bi0 = from[0]; to_next[1] = bi1 = from[1]; from += 2; to_next += 2; n_left_from -= 2; n_left_to_next -= 2; b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); next0 = SNAT_IN2OUT_NEXT_LOOKUP; next1 = SNAT_IN2OUT_NEXT_LOOKUP; ip0 = vlib_buffer_get_current (b0); udp0 = ip4_next_header (ip0); tcp0 = (tcp_header_t *) udp0; sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; if (PREDICT_FALSE(ip0->ttl == 1)) { vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; goto trace0; } proto0 = ip_proto_to_snat_proto (ip0->protocol); if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP)) { rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); icmp0 = (icmp46_header_t *) udp0; next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0, thread_index, &ses0, &dm0); goto trace0; } dm0 = snat_det_map_by_user(sm, &ip0->src_address); if (PREDICT_FALSE(!dm0)) { clib_warning("no match for internal host %U", format_ip4_address, &ip0->src_address); next0 = SNAT_IN2OUT_NEXT_DROP; b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; goto trace0; } snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0); key0.ext_host_addr = ip0->dst_address; key0.ext_host_port = tcp0->dst; ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0); if (PREDICT_FALSE(!ses0)) { for (i0 = 0; i0 < dm0->ports_per_host; i0++) { key0.out_port = clib_host_to_net_u16 (lo_port0 + ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host)); if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64)) continue; ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0); break; } if (PREDICT_FALSE(!ses0)) { /* too many sessions for user, send ICMP error packet */ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable, ICMP4_destination_unreachable_destination_unreachable_host, 0); next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; goto trace0; } } new_port0 = ses0->out.out_port; old_addr0.as_u32 = ip0->src_address.as_u32; ip0->src_address.as_u32 = new_addr0.as_u32; vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, ip4_header_t, src_address /* changed member */); ip0->checksum = ip_csum_fold (sum0); if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { if (tcp0->flags & TCP_FLAG_SYN) ses0->state = SNAT_SESSION_TCP_SYN_SENT; else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT) ses0->state = SNAT_SESSION_TCP_ESTABLISHED; else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED) ses0->state = SNAT_SESSION_TCP_FIN_WAIT; else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT) snat_det_ses_close(dm0, ses0); else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT) ses0->state = SNAT_SESSION_TCP_LAST_ACK; else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN) ses0->state = SNAT_SESSION_TCP_ESTABLISHED; old_port0 = tcp0->src; tcp0->src = new_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, ip4_header_t, dst_address /* changed member */); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */, length /* changed member */); tcp0->checksum = ip_csum_fold(sum0); } else { ses0->state = SNAT_SESSION_UDP_ACTIVE; old_port0 = udp0->src_port; udp0->src_port = new_port0; udp0->checksum = 0; } switch(ses0->state) { case SNAT_SESSION_UDP_ACTIVE: ses0->expire = now + sm->udp_timeout; break; case SNAT_SESSION_TCP_SYN_SENT: case SNAT_SESSION_TCP_FIN_WAIT: case SNAT_SESSION_TCP_CLOSE_WAIT: case SNAT_SESSION_TCP_LAST_ACK: ses0->expire = now + sm->tcp_transitory_timeout; break; case SNAT_SESSION_TCP_ESTABLISHED: ses0->expire = now + sm->tcp_established_timeout; break; } trace0: if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { snat_in2out_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->is_slow_path = 0; t->sw_if_index = sw_if_index0; t->next_index = next0; t->session_index = ~0; if (ses0) t->session_index = ses0 - dm0->sessions; } pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; ip1 = vlib_buffer_get_current (b1); udp1 = ip4_next_header (ip1); tcp1 = (tcp_header_t *) udp1; sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; if (PREDICT_FALSE(ip1->ttl == 1)) { vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR; goto trace1; } proto1 = ip_proto_to_snat_proto (ip1->protocol); if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP)) { rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1); icmp1 = (icmp46_header_t *) udp1; next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, next1, thread_index, &ses1, &dm1); goto trace1; } dm1 = snat_det_map_by_user(sm, &ip1->src_address); if (PREDICT_FALSE(!dm1)) { clib_warning("no match for internal host %U", format_ip4_address, &ip0->src_address); next1 = SNAT_IN2OUT_NEXT_DROP; b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; goto trace1; } snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1); key1.ext_host_addr = ip1->dst_address; key1.ext_host_port = tcp1->dst; ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1); if (PREDICT_FALSE(!ses1)) { for (i1 = 0; i1 < dm1->ports_per_host; i1++) { key1.out_port = clib_host_to_net_u16 (lo_port1 + ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host)); if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64)) continue; ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1); break; } if (PREDICT_FALSE(!ses1)) { /* too many sessions for user, send ICMP error packet */ vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable, ICMP4_destination_unreachable_destination_unreachable_host, 0); next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR; goto trace1; } } new_port1 = ses1->out.out_port; old_addr1.as_u32 = ip1->src_address.as_u32; ip1->src_address.as_u32 = new_addr1.as_u32; vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index; sum1 = ip1->checksum; sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32, ip4_header_t, src_address /* changed member */); ip1->checksum = ip_csum_fold (sum1); if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) { if (tcp1->flags & TCP_FLAG_SYN) ses1->state = SNAT_SESSION_TCP_SYN_SENT; else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT) ses1->state = SNAT_SESSION_TCP_ESTABLISHED; else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED) ses1->state = SNAT_SESSION_TCP_FIN_WAIT; else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT) snat_det_ses_close(dm1, ses1); else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT) ses1->state = SNAT_SESSION_TCP_LAST_ACK; else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN) ses1->state = SNAT_SESSION_TCP_ESTABLISHED; old_port1 = tcp1->src; tcp1->src = new_port1; sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32, ip4_header_t, dst_address /* changed member */); sum1 = ip_csum_update (sum1, old_port1, new_port1, ip4_header_t /* cheat */, length /* changed member */); tcp1->checksum = ip_csum_fold(sum1); } else { ses1->state = SNAT_SESSION_UDP_ACTIVE; old_port1 = udp1->src_port; udp1->src_port = new_port1; udp1->checksum = 0; } switch(ses1->state) { case SNAT_SESSION_UDP_ACTIVE: ses1->expire = now + sm->udp_timeout; break; case SNAT_SESSION_TCP_SYN_SENT: case SNAT_SESSION_TCP_FIN_WAIT: case SNAT_SESSION_TCP_CLOSE_WAIT: case SNAT_SESSION_TCP_LAST_ACK: ses1->expire = now + sm->tcp_transitory_timeout; break; case SNAT_SESSION_TCP_ESTABLISHED: ses1->expire = now + sm->tcp_established_timeout; break; } trace1: if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) && (b1->flags & VLIB_BUFFER_IS_TRACED))) { snat_in2out_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); t->is_slow_path = 0; t->sw_if_index = sw_if_index1; t->next_index = next1; t->session_index = ~0; if (ses1) t->session_index = ses1 - dm1->sessions; } pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP; /* verify speculative enqueues, maybe switch current next frame */ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, bi0, bi1, next0, next1); } while (n_left_from > 0 && n_left_to_next > 0) { u32 bi0; vlib_buffer_t * b0; u32 next0; u32 sw_if_index0; ip4_header_t * ip0; ip_csum_t sum0; ip4_address_t new_addr0, old_addr0; u16 old_port0, new_port0, lo_port0, i0; udp_header_t * udp0; tcp_header_t * tcp0; u32 proto0; snat_det_out_key_t key0; snat_det_map_t * dm0; snat_det_session_t * ses0 = 0; u32 rx_fib_index0; icmp46_header_t * icmp0; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; to_next[0] = bi0; from += 1; to_next += 1; n_left_from -= 1; n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); next0 = SNAT_IN2OUT_NEXT_LOOKUP; ip0 = vlib_buffer_get_current (b0); udp0 = ip4_next_header (ip0); tcp0 = (tcp_header_t *) udp0; sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; if (PREDICT_FALSE(ip0->ttl == 1)) { vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; goto trace00; } proto0 = ip_proto_to_snat_proto (ip0->protocol); if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP)) { rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); icmp0 = (icmp46_header_t *) udp0; next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0, thread_index, &ses0, &dm0); goto trace00; } dm0 = snat_det_map_by_user(sm, &ip0->src_address); if (PREDICT_FALSE(!dm0)) { clib_warning("no match for internal host %U", format_ip4_address, &ip0->src_address); next0 = SNAT_IN2OUT_NEXT_DROP; b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; goto trace00; } snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0); key0.ext_host_addr = ip0->dst_address; key0.ext_host_port = tcp0->dst; ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0); if (PREDICT_FALSE(!ses0)) { for (i0 = 0; i0 < dm0->ports_per_host; i0++) { key0.out_port = clib_host_to_net_u16 (lo_port0 + ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host)); if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64)) continue; ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0); break; } if (PREDICT_FALSE(!ses0)) { /* too many sessions for user, send ICMP error packet */ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable, ICMP4_destination_unreachable_destination_unreachable_host, 0); next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; goto trace00; } } new_port0 = ses0->out.out_port; old_addr0.as_u32 = ip0->src_address.as_u32; ip0->src_address.as_u32 = new_addr0.as_u32; vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, ip4_header_t, src_address /* changed member */); ip0->checksum = ip_csum_fold (sum0); if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { if (tcp0->flags & TCP_FLAG_SYN) ses0->state = SNAT_SESSION_TCP_SYN_SENT; else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT) ses0->state = SNAT_SESSION_TCP_ESTABLISHED; else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED) ses0->state = SNAT_SESSION_TCP_FIN_WAIT; else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT) snat_det_ses_close(dm0, ses0); else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT) ses0->state = SNAT_SESSION_TCP_LAST_ACK; else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN) ses0->state = SNAT_SESSION_TCP_ESTABLISHED; old_port0 = tcp0->src; tcp0->src = new_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, ip4_header_t, dst_address /* changed member */); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */, length /* changed member */); tcp0->checksum = ip_csum_fold(sum0); } else { ses0->state = SNAT_SESSION_UDP_ACTIVE; old_port0 = udp0->src_port; udp0->src_port = new_port0; udp0->checksum = 0; } switch(ses0->state) { case SNAT_SESSION_UDP_ACTIVE: ses0->expire = now + sm->udp_timeout; break; case SNAT_SESSION_TCP_SYN_SENT: case SNAT_SESSION_TCP_FIN_WAIT: case SNAT_SESSION_TCP_CLOSE_WAIT: case SNAT_SESSION_TCP_LAST_ACK: ses0->expire = now + sm->tcp_transitory_timeout; break; case SNAT_SESSION_TCP_ESTABLISHED: ses0->expire = now + sm->tcp_established_timeout; break; } trace00: if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { snat_in2out_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->is_slow_path = 0; t->sw_if_index = sw_if_index0; t->next_index = next0; t->session_index = ~0; if (ses0) t->session_index = ses0 - dm0->sessions; } pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; /* verify speculative enqueue, maybe switch current next frame */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } vlib_node_increment_counter (vm, snat_det_in2out_node.index, SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, pkts_processed); return frame->n_vectors; } VLIB_REGISTER_NODE (snat_det_in2out_node) = { .function = snat_det_in2out_node_fn, .name = "nat44-det-in2out", .vector_size = sizeof (u32), .format_trace = format_snat_in2out_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = ARRAY_LEN(snat_in2out_error_strings), .error_strings = snat_in2out_error_strings, .runtime_data_bytes = sizeof (snat_runtime_t), .n_next_nodes = 3, /* edit / add dispositions here */ .next_nodes = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn); /** * Get address and port values to be used for ICMP packet translation * and create session if needed * * @param[in,out] sm NAT main * @param[in,out] node NAT node runtime * @param[in] thread_index thread index * @param[in,out] b0 buffer containing packet to be translated * @param[out] p_proto protocol used for matching * @param[out] p_value address and port after NAT translation * @param[out] p_dont_translate if packet should not be translated * @param d optional parameter * @param e optional parameter */ u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node, u32 thread_index, vlib_buffer_t *b0, ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e) { icmp46_header_t *icmp0; u32 sw_if_index0; u32 rx_fib_index0; u8 protocol; snat_det_out_key_t key0; u8 dont_translate = 0; u32 next0 = ~0; icmp_echo_header_t *echo0, *inner_echo0 = 0; ip4_header_t *inner_ip0; void *l4_header = 0; icmp46_header_t *inner_icmp0; snat_det_map_t * dm0 = 0; ip4_address_t new_addr0; u16 lo_port0, i0; snat_det_session_t * ses0 = 0; ip4_address_t in_addr; u16 in_port; icmp0 = (icmp46_header_t *) ip4_next_header (ip0); echo0 = (icmp_echo_header_t *)(icmp0+1); sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); if (!icmp_is_error_message (icmp0)) { protocol = SNAT_PROTOCOL_ICMP; in_addr = ip0->src_address; in_port = echo0->identifier; } else { inner_ip0 = (ip4_header_t *)(echo0+1); l4_header = ip4_next_header (inner_ip0); protocol = ip_proto_to_snat_proto (inner_ip0->protocol); in_addr = inner_ip0->dst_address; switch (protocol) { case SNAT_PROTOCOL_ICMP: inner_icmp0 = (icmp46_header_t*)l4_header; inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1); in_port = inner_echo0->identifier; break; case SNAT_PROTOCOL_UDP: case SNAT_PROTOCOL_TCP: in_port = ((tcp_udp_header_t*)l4_header)->dst_port; break; default: b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL]; next0 = SNAT_IN2OUT_NEXT_DROP; goto out; } } dm0 = snat_det_map_by_user(sm, &in_addr); if (PREDICT_FALSE(!dm0)) { clib_warning("no match for internal host %U", format_ip4_address, &in_addr); if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0, IP_PROTOCOL_ICMP, rx_fib_index0))) { dont_translate = 1; goto out; } next0 = SNAT_IN2OUT_NEXT_DROP; b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; goto out; } snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0); key0.ext_host_addr = ip0->dst_address; key0.ext_host_port = 0; ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0); if (PREDICT_FALSE(!ses0)) { if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0, IP_PROTOCOL_ICMP, rx_fib_index0))) { dont_translate = 1; goto out; } if (icmp0->type != ICMP4_echo_request) { b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; next0 = SNAT_IN2OUT_NEXT_DROP; goto out; } for (i0 = 0; i0 < dm0->ports_per_host; i0++) { key0.out_port = clib_host_to_net_u16 (lo_port0 + ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host)); if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64)) continue; ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0); break; } if (PREDICT_FALSE(!ses0)) { next0 = SNAT_IN2OUT_NEXT_DROP; b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; goto out; } } if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request && !icmp_is_error_message (icmp0))) { b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; next0 = SNAT_IN2OUT_NEXT_DROP; goto out; } u32 now = (u32) vlib_time_now (sm->vlib_main); ses0->state = SNAT_SESSION_ICMP_ACTIVE; ses0->expire = now + sm->icmp_timeout; out: *p_proto = protocol; if (ses0) { p_value->addr = new_addr0; p_value->fib_index = sm->outside_fib_index; p_value->port = ses0->out.out_port; } *p_dont_translate = dont_translate; if (d) *(snat_det_session_t**)d = ses0; if (e) *(snat_det_map_t**)e = dm0; return next0; } /**********************/ /*** worker handoff ***/ /**********************/ static inline uword snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, u8 is_output) { snat_main_t *sm = &snat_main; vlib_thread_main_t *tm = vlib_get_thread_main (); u32 n_left_from, *from, *to_next = 0; static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index; static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index = 0; vlib_frame_queue_elt_t *hf = 0; vlib_frame_t *f = 0; int i; u32 n_left_to_next_worker = 0, *to_next_worker = 0; u32 next_worker_index = 0; u32 current_worker_index = ~0; u32 thread_index = vlib_get_thread_index (); u32 fq_index; u32 to_node_index; ASSERT (vec_len (sm->workers)); if (is_output) { fq_index = sm->fq_in2out_output_index; to_node_index = sm->in2out_output_node_index; } else { fq_index = sm->fq_in2out_index; to_node_index = sm->in2out_node_index; } if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0)) { vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); vec_validate_init_empty (congested_handoff_queue_by_worker_index, sm->first_worker_index + sm->num_workers - 1, (vlib_frame_queue_t *) (~0)); } from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; while (n_left_from > 0) { u32 bi0; vlib_buffer_t *b0; u32 sw_if_index0; u32 rx_fib_index0; ip4_header_t * ip0; u8 do_handoff; bi0 = from[0]; from += 1; n_left_from -= 1; b0 = vlib_get_buffer (vm, bi0); sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); ip0 = vlib_buffer_get_current (b0); next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0); if (PREDICT_FALSE (next_worker_index != thread_index)) { do_handoff = 1; if (next_worker_index != current_worker_index) { if (hf) hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; hf = vlib_get_worker_handoff_queue_elt (fq_index, next_worker_index, handoff_queue_elt_by_worker_index); n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; to_next_worker = &hf->buffer_index[hf->n_vectors]; current_worker_index = next_worker_index; } /* enqueue to correct worker thread */ to_next_worker[0] = bi0; to_next_worker++; n_left_to_next_worker--; if (n_left_to_next_worker == 0) { hf->n_vectors = VLIB_FRAME_SIZE; vlib_put_frame_queue_elt (hf); current_worker_index = ~0; handoff_queue_elt_by_worker_index[next_worker_index] = 0; hf = 0; } } else { do_handoff = 0; /* if this is 1st frame */ if (!f) { f = vlib_get_frame_to_node (vm, to_node_index); to_next = vlib_frame_vector_args (f); } to_next[0] = bi0; to_next += 1; f->n_vectors++; } if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { snat_in2out_worker_handoff_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->next_worker_index = next_worker_index; t->do_handoff = do_handoff; } } if (f) vlib_put_frame_to_node (vm, to_node_index, f); if (hf) hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; /* Ship frames to the worker nodes */ for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) { if (handoff_queue_elt_by_worker_index[i]) { hf = handoff_queue_elt_by_worker_index[i]; /* * It works better to let the handoff node * rate-adapt, always ship the handoff queue element. */ if (1 || hf->n_vectors == hf->last_n_vectors) { vlib_put_frame_queue_elt (hf); handoff_queue_elt_by_worker_index[i] = 0; } else hf->last_n_vectors = hf->n_vectors; } congested_handoff_queue_by_worker_index[i] = (vlib_frame_queue_t *) (~0); } hf = 0; current_worker_index = ~0; return frame->n_vectors; } static uword snat_in2out_worker_handoff_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0); } VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = { .function = snat_in2out_worker_handoff_fn, .name = "nat44-in2out-worker-handoff", .vector_size = sizeof (u32), .format_trace = format_snat_in2out_worker_handoff_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_next_nodes = 1, .next_nodes = { [0] = "error-drop", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn); static uword snat_in2out_output_worker_handoff_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1); } VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = { .function = snat_in2out_output_worker_handoff_fn, .name = "nat44-in2out-output-worker-handoff", .vector_size = sizeof (u32), .format_trace = format_snat_in2out_worker_handoff_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_next_nodes = 1, .next_nodes = { [0] = "error-drop", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node, snat_in2out_output_worker_handoff_fn); static_always_inline int is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr) { snat_address_t * ap; clib_bihash_kv_8_8_t kv, value; snat_session_key_t m_key; vec_foreach (ap, sm->addresses) { if (ap->addr.as_u32 == dst_addr->as_u32) return 1; } m_key.addr.as_u32 = dst_addr->as_u32; m_key.fib_index = sm->outside_fib_index; m_key.port = 0; m_key.protocol = 0; kv.key = m_key.as_u64; if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) return 1; return 0; } static uword snat_hairpin_dst_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_left_from, * from, * to_next; snat_in2out_next_t next_index; u32 pkts_processed = 0; snat_main_t * sm = &snat_main; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; while (n_left_from > 0) { u32 n_left_to_next; vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from > 0 && n_left_to_next > 0) { u32 bi0; vlib_buffer_t * b0; u32 next0; ip4_header_t * ip0; u32 proto0; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; to_next[0] = bi0; from += 1; to_next += 1; n_left_from -= 1; n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); next0 = SNAT_IN2OUT_NEXT_LOOKUP; ip0 = vlib_buffer_get_current (b0); proto0 = ip_proto_to_snat_proto (ip0->protocol); vnet_buffer (b0)->snat.flags = 0; if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address))) { if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP) { udp_header_t * udp0 = ip4_next_header (ip0); tcp_header_t * tcp0 = (tcp_header_t *) udp0; snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0); } else if (proto0 == SNAT_PROTOCOL_ICMP) { icmp46_header_t * icmp0 = ip4_next_header (ip0); snat_icmp_hairpinning (sm, b0, ip0, icmp0); } else { snat_hairpinning_unknown_proto (sm, b0, ip0); } vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING; clib_warning("is hairpinning"); } pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; /* verify speculative enqueue, maybe switch current next frame */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } vlib_node_increment_counter (vm, snat_hairpin_dst_node.index, SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, pkts_processed); return frame->n_vectors; } VLIB_REGISTER_NODE (snat_hairpin_dst_node) = { .function = snat_hairpin_dst_fn, .name = "nat44-hairpin-dst", .vector_size = sizeof (u32), .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = ARRAY_LEN(snat_in2out_error_strings), .error_strings = snat_in2out_error_strings, .n_next_nodes = 2, .next_nodes = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node, snat_hairpin_dst_fn); static uword snat_hairpin_src_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_left_from, * from, * to_next; snat_in2out_next_t next_index; u32 pkts_processed = 0; snat_main_t *sm = &snat_main; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; while (n_left_from > 0) { u32 n_left_to_next; vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from > 0 && n_left_to_next > 0) { u32 bi0; vlib_buffer_t * b0; u32 next0; snat_interface_t *i; u32 sw_if_index0; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; to_next[0] = bi0; from += 1; to_next += 1; n_left_from -= 1; n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT; pool_foreach (i, sm->output_feature_interfaces, ({ /* Only packets from NAT inside interface */ if ((nat_interface_is_inside(i)) && (sw_if_index0 == i->sw_if_index)) { if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) & SNAT_FLAG_HAIRPINNING)) { if (PREDICT_TRUE (sm->num_workers > 1)) next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH; else next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT; } break; } })); pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; /* verify speculative enqueue, maybe switch current next frame */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } vlib_node_increment_counter (vm, snat_hairpin_src_node.index, SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, pkts_processed); return frame->n_vectors; } VLIB_REGISTER_NODE (snat_hairpin_src_node) = { .function = snat_hairpin_src_fn, .name = "nat44-hairpin-src", .vector_size = sizeof (u32), .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = ARRAY_LEN(snat_in2out_error_strings), .error_strings = snat_in2out_error_strings, .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT, .next_nodes = { [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop", [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-in2out-output", [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output", [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node, snat_hairpin_src_fn); static uword snat_in2out_fast_static_map_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_left_from, * from, * to_next; snat_in2out_next_t next_index; u32 pkts_processed = 0; snat_main_t * sm = &snat_main; u32 stats_node_index; stats_node_index = snat_in2out_fast_node.index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; while (n_left_from > 0) { u32 n_left_to_next; vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from > 0 && n_left_to_next > 0) { u32 bi0; vlib_buffer_t * b0; u32 next0; u32 sw_if_index0; ip4_header_t * ip0; ip_csum_t sum0; u32 new_addr0, old_addr0; u16 old_port0, new_port0; udp_header_t * udp0; tcp_header_t * tcp0; icmp46_header_t * icmp0; snat_session_key_t key0, sm0; u32 proto0; u32 rx_fib_index0; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; to_next[0] = bi0; from += 1; to_next += 1; n_left_from -= 1; n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); next0 = SNAT_IN2OUT_NEXT_LOOKUP; ip0 = vlib_buffer_get_current (b0); udp0 = ip4_next_header (ip0); tcp0 = (tcp_header_t *) udp0; icmp0 = (icmp46_header_t *) udp0; sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); if (PREDICT_FALSE(ip0->ttl == 1)) { vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; goto trace0; } proto0 = ip_proto_to_snat_proto (ip0->protocol); if (PREDICT_FALSE (proto0 == ~0)) goto trace0; if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0, ~0, 0, 0); goto trace0; } key0.addr = ip0->src_address; key0.protocol = proto0; key0.port = udp0->src_port; key0.fib_index = rx_fib_index0; if (snat_static_mapping_match(sm, key0, &sm0, 0, 0)) { b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; next0= SNAT_IN2OUT_NEXT_DROP; goto trace0; } new_addr0 = sm0.addr.as_u32; new_port0 = sm0.port; vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; old_addr0 = ip0->src_address.as_u32; ip0->src_address.as_u32 = new_addr0; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address /* changed member */); ip0->checksum = ip_csum_fold (sum0); if (PREDICT_FALSE(new_port0 != udp0->dst_port)) { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { old_port0 = tcp0->src_port; tcp0->src_port = new_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */); sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t /* cheat */, length /* changed member */); tcp0->checksum = ip_csum_fold(sum0); } else { old_port0 = udp0->src_port; udp0->src_port = new_port0; udp0->checksum = 0; } } else { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */); tcp0->checksum = ip_csum_fold(sum0); } } /* Hairpinning */ snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0); trace0: if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) && (b0->flags & VLIB_BUFFER_IS_TRACED))) { snat_in2out_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->sw_if_index = sw_if_index0; t->next_index = next0; } pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; /* verify speculative enqueue, maybe switch current next frame */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } vlib_node_increment_counter (vm, stats_node_index, SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, pkts_processed); return frame->n_vectors; } VLIB_REGISTER_NODE (snat_in2out_fast_node) = { .function = snat_in2out_fast_static_map_fn, .name = "nat44-in2out-fast", .vector_size = sizeof (u32), .format_trace = format_snat_in2out_fast_trace, .type = VLIB_NODE_TYPE_INTERNAL, .n_errors = ARRAY_LEN(snat_in2out_error_strings), .error_strings = snat_in2out_error_strings, .runtime_data_bytes = sizeof (snat_runtime_t), .n_next_nodes = SNAT_IN2OUT_N_NEXT, /* edit / add dispositions here */ .next_nodes = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", [SNAT_IN2OUT_NEXT_REASS] = "nat44-in2out-reass", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);