diff options
Diffstat (limited to 'src/vnet/dpo/mpls_label_dpo.c')
-rw-r--r-- | src/vnet/dpo/mpls_label_dpo.c | 703 |
1 files changed, 703 insertions, 0 deletions
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c new file mode 100644 index 00000000..2a6e7dd5 --- /dev/null +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -0,0 +1,703 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/dpo/mpls_label_dpo.h> +#include <vnet/mpls/mpls.h> + +/* + * pool of all MPLS Label DPOs + */ +mpls_label_dpo_t *mpls_label_dpo_pool; + +static mpls_label_dpo_t * +mpls_label_dpo_alloc (void) +{ + mpls_label_dpo_t *mld; + + pool_get_aligned(mpls_label_dpo_pool, mld, CLIB_CACHE_LINE_BYTES); + memset(mld, 0, sizeof(*mld)); + + dpo_reset(&mld->mld_dpo); + + return (mld); +} + +static index_t +mpls_label_dpo_get_index (mpls_label_dpo_t *mld) +{ + return (mld - mpls_label_dpo_pool); +} + +index_t +mpls_label_dpo_create (mpls_label_t *label_stack, + mpls_eos_bit_t eos, + u8 ttl, + u8 exp, + dpo_proto_t payload_proto, + const dpo_id_t *dpo) +{ + mpls_label_dpo_t *mld; + u32 ii; + + mld = mpls_label_dpo_alloc(); + mld->mld_n_labels = vec_len(label_stack); + mld->mld_n_hdr_bytes = mld->mld_n_labels * sizeof(mld->mld_hdr[0]); + mld->mld_payload_proto = payload_proto; + + /* + * construct label rewrite headers for each value value passed. + * get the header in network byte order since we will paint it + * on a packet in the data-plane + */ + + for (ii = 0; ii < mld->mld_n_labels-1; ii++) + { + vnet_mpls_uc_set_label(&mld->mld_hdr[ii].label_exp_s_ttl, label_stack[ii]); + vnet_mpls_uc_set_ttl(&mld->mld_hdr[ii].label_exp_s_ttl, 255); + vnet_mpls_uc_set_exp(&mld->mld_hdr[ii].label_exp_s_ttl, 0); + vnet_mpls_uc_set_s(&mld->mld_hdr[ii].label_exp_s_ttl, MPLS_NON_EOS); + mld->mld_hdr[ii].label_exp_s_ttl = + clib_host_to_net_u32(mld->mld_hdr[ii].label_exp_s_ttl); + } + + /* + * the inner most label + */ + ii = mld->mld_n_labels-1; + + vnet_mpls_uc_set_label(&mld->mld_hdr[ii].label_exp_s_ttl, label_stack[ii]); + vnet_mpls_uc_set_ttl(&mld->mld_hdr[ii].label_exp_s_ttl, ttl); + vnet_mpls_uc_set_exp(&mld->mld_hdr[ii].label_exp_s_ttl, exp); + vnet_mpls_uc_set_s(&mld->mld_hdr[ii].label_exp_s_ttl, eos); + mld->mld_hdr[ii].label_exp_s_ttl = + clib_host_to_net_u32(mld->mld_hdr[ii].label_exp_s_ttl); + + /* + * stack this label objct on its parent. + */ + dpo_stack(DPO_MPLS_LABEL, + mld->mld_payload_proto, + &mld->mld_dpo, + dpo); + + return (mpls_label_dpo_get_index(mld)); +} + +u8* +format_mpls_label_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + u32 indent = va_arg (*args, u32); + mpls_unicast_header_t hdr; + mpls_label_dpo_t *mld; + u32 ii; + + s = format(s, "mpls-label:[%d]:", index); + + if (pool_is_free_index(mpls_label_dpo_pool, index)) + { + /* + * the packet trace can be printed after the DPO has been deleted + */ + return (s); + } + + mld = mpls_label_dpo_get(index); + + for (ii = 0; ii < mld->mld_n_labels; ii++) + { + hdr.label_exp_s_ttl = + clib_net_to_host_u32(mld->mld_hdr[ii].label_exp_s_ttl); + s = format(s, "%U", format_mpls_header, hdr); + } + + s = format(s, "\n%U", format_white_space, indent); + s = format(s, "%U", format_dpo_id, &mld->mld_dpo, indent+2); + + return (s); +} + +static void +mpls_label_dpo_lock (dpo_id_t *dpo) +{ + mpls_label_dpo_t *mld; + + mld = mpls_label_dpo_get(dpo->dpoi_index); + + mld->mld_locks++; +} + +static void +mpls_label_dpo_unlock (dpo_id_t *dpo) +{ + mpls_label_dpo_t *mld; + + mld = mpls_label_dpo_get(dpo->dpoi_index); + + mld->mld_locks--; + + if (0 == mld->mld_locks) + { + dpo_reset(&mld->mld_dpo); + pool_put(mpls_label_dpo_pool, mld); + } +} + +/** + * @brief A struct to hold tracing information for the MPLS label imposition + * node. + */ +typedef struct mpls_label_imposition_trace_t_ +{ + /** + * The MPLS header imposed + */ + mpls_unicast_header_t hdr; +} mpls_label_imposition_trace_t; + +always_inline mpls_unicast_header_t * +mpls_label_paint (vlib_buffer_t * b0, + mpls_label_dpo_t *mld0, + u8 ttl0) +{ + mpls_unicast_header_t *hdr0; + + vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes)); + + hdr0 = vlib_buffer_get_current(b0); + + if (1 == mld0->mld_n_labels) + { + /* optimise for the common case of one label */ + *hdr0 = mld0->mld_hdr[0]; + } + else + { + clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes); + hdr0 = hdr0 + (mld0->mld_n_labels - 1); + } + /* fixup the TTL for the inner most label */ + ((char*)hdr0)[3] = ttl0; + + return (hdr0); +} + +always_inline uword +mpls_label_imposition_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + u8 payload_is_ip4, + u8 payload_is_ip6, + u8 payload_is_ethernet) +{ + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, mldi0, bi1, mldi1, bi2, mldi2, bi3, mldi3; + mpls_unicast_header_t *hdr0, *hdr1, *hdr2, *hdr3; + mpls_label_dpo_t *mld0, *mld1, *mld2, *mld3; + vlib_buffer_t * b0, *b1, * b2, *b3; + u32 next0, next1, next2, next3; + u8 ttl0, ttl1,ttl2, ttl3 ; + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + bi2 = to_next[2] = from[2]; + bi3 = to_next[3] = from[3]; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3, *p4, *p5; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + vlib_prefetch_buffer_header (p4, STORE); + vlib_prefetch_buffer_header (p5, STORE); + + CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE); + } + + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + /* dst lookup was done by ip4 lookup */ + mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + mldi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; + mldi2 = vnet_buffer(b2)->ip.adj_index[VLIB_TX]; + mldi3 = vnet_buffer(b3)->ip.adj_index[VLIB_TX]; + mld0 = mpls_label_dpo_get(mldi0); + mld1 = mpls_label_dpo_get(mldi1); + mld2 = mpls_label_dpo_get(mldi2); + mld3 = mpls_label_dpo_get(mldi3); + + if (payload_is_ip4) + { + /* + * decrement the TTL on ingress to the LSP + */ + ip4_header_t * ip0 = vlib_buffer_get_current(b0); + ip4_header_t * ip1 = vlib_buffer_get_current(b1); + ip4_header_t * ip2 = vlib_buffer_get_current(b2); + ip4_header_t * ip3 = vlib_buffer_get_current(b3); + u32 checksum0; + u32 checksum1; + u32 checksum2; + u32 checksum3; + + checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100); + checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100); + checksum2 = ip2->checksum + clib_host_to_net_u16 (0x0100); + checksum3 = ip3->checksum + clib_host_to_net_u16 (0x0100); + + checksum0 += checksum0 >= 0xffff; + checksum1 += checksum1 >= 0xffff; + checksum2 += checksum2 >= 0xffff; + checksum3 += checksum3 >= 0xffff; + + ip0->checksum = checksum0; + ip1->checksum = checksum1; + ip2->checksum = checksum2; + ip3->checksum = checksum3; + + ip0->ttl -= 1; + ip1->ttl -= 1; + ip2->ttl -= 1; + ip3->ttl -= 1; + + ttl1 = ip1->ttl; + ttl0 = ip0->ttl; + ttl3 = ip3->ttl; + ttl2 = ip2->ttl; + } + else if (payload_is_ip6) + { + /* + * decrement the TTL on ingress to the LSP + */ + ip6_header_t * ip0 = vlib_buffer_get_current(b0); + ip6_header_t * ip1 = vlib_buffer_get_current(b1); + ip6_header_t * ip2 = vlib_buffer_get_current(b2); + ip6_header_t * ip3 = vlib_buffer_get_current(b3); + + ip0->hop_limit -= 1; + ip1->hop_limit -= 1; + ip2->hop_limit -= 1; + ip3->hop_limit -= 1; + + ttl0 = ip0->hop_limit; + ttl1 = ip1->hop_limit; + ttl2 = ip2->hop_limit; + ttl3 = ip3->hop_limit; + } + else if (payload_is_ethernet) + { + /* + * nothing to chang ein the ethernet header + */ + ttl0 = ttl1 = ttl2 = ttl3 = 255; + } + else + { + /* + * else, the packet to be encapped is an MPLS packet + */ + if (PREDICT_TRUE(vnet_buffer(b0)->mpls.first)) + { + /* + * The first label to be imposed on the packet. this is a label swap. + * in which case we stashed the TTL and EXP bits in the + * packet in the lookup node + */ + ASSERT(0 != vnet_buffer (b0)->mpls.ttl); + + ttl0 = vnet_buffer(b0)->mpls.ttl - 1; + } + else + { + /* + * not the first label. implying we are recusring down a chain of + * output labels. + * Each layer is considered a new LSP - hence the TTL is reset. + */ + ttl0 = 255; + } + if (PREDICT_TRUE(vnet_buffer(b1)->mpls.first)) + { + ASSERT(1 != vnet_buffer (b1)->mpls.ttl); + ttl1 = vnet_buffer(b1)->mpls.ttl - 1; + } + else + { + ttl1 = 255; + } + if (PREDICT_TRUE(vnet_buffer(b2)->mpls.first)) + { + ASSERT(1 != vnet_buffer (b2)->mpls.ttl); + + ttl2 = vnet_buffer(b2)->mpls.ttl - 1; + } + else + { + ttl2 = 255; + } + if (PREDICT_TRUE(vnet_buffer(b3)->mpls.first)) + { + ASSERT(1 != vnet_buffer (b3)->mpls.ttl); + ttl3 = vnet_buffer(b3)->mpls.ttl - 1; + } + else + { + ttl3 = 255; + } + } + vnet_buffer(b0)->mpls.first = 0; + vnet_buffer(b1)->mpls.first = 0; + vnet_buffer(b2)->mpls.first = 0; + vnet_buffer(b3)->mpls.first = 0; + + /* Paint the MPLS header */ + hdr0 = mpls_label_paint(b0, mld0, ttl0); + hdr1 = mpls_label_paint(b1, mld1, ttl1); + hdr2 = mpls_label_paint(b2, mld2, ttl2); + hdr3 = mpls_label_paint(b3, mld3, ttl3); + + next0 = mld0->mld_dpo.dpoi_next_node; + next1 = mld1->mld_dpo.dpoi_next_node; + next2 = mld2->mld_dpo.dpoi_next_node; + next3 = mld3->mld_dpo.dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index; + vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mld1->mld_dpo.dpoi_index; + vnet_buffer(b2)->ip.adj_index[VLIB_TX] = mld2->mld_dpo.dpoi_index; + vnet_buffer(b3)->ip.adj_index[VLIB_TX] = mld3->mld_dpo.dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_imposition_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->hdr = *hdr0; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_imposition_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + tr->hdr = *hdr1; + } + if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_imposition_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + tr->hdr = *hdr2; + } + if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_imposition_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + tr->hdr = *hdr3; + } + + vlib_validate_buffer_enqueue_x4(vm, node, next_index, to_next, + n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + mpls_unicast_header_t *hdr0; + mpls_label_dpo_t *mld0; + vlib_buffer_t * b0; + u32 bi0, mldi0; + u32 next0; + u8 ttl; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* dst lookup was done by ip4 lookup */ + mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + mld0 = mpls_label_dpo_get(mldi0); + + if (payload_is_ip4) + { + /* + * decrement the TTL on ingress to the LSP + */ + ip4_header_t * ip0 = vlib_buffer_get_current(b0); + u32 checksum0; + + checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100); + checksum0 += checksum0 >= 0xffff; + + ip0->checksum = checksum0; + ip0->ttl -= 1; + ttl = ip0->ttl; + } + else if (payload_is_ip6) + { + /* + * decrement the TTL on ingress to the LSP + */ + ip6_header_t * ip0 = vlib_buffer_get_current(b0); + + ip0->hop_limit -= 1; + ttl = ip0->hop_limit; + } + else + { + /* + * else, the packet to be encapped is an MPLS packet + */ + if (vnet_buffer(b0)->mpls.first) + { + /* + * The first label to be imposed on the packet. this is a label swap. + * in which case we stashed the TTL and EXP bits in the + * packet in the lookup node + */ + ASSERT(0 != vnet_buffer (b0)->mpls.ttl); + + ttl = vnet_buffer(b0)->mpls.ttl - 1; + } + else + { + /* + * not the first label. implying we are recusring down a chain of + * output labels. + * Each layer is considered a new LSP - hence the TTL is reset. + */ + ttl = 255; + } + } + vnet_buffer(b0)->mpls.first = 0; + + /* Paint the MPLS header */ + vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes)); + hdr0 = vlib_buffer_get_current(b0); + clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes); + + /* fixup the TTL for the inner most label */ + hdr0 = hdr0 + (mld0->mld_n_labels - 1); + ((char*)hdr0)[3] = ttl; + + next0 = mld0->mld_dpo.dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_imposition_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->hdr = *hdr0; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static u8 * +format_mpls_label_imposition_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_label_imposition_trace_t * t; + mpls_unicast_header_t hdr; + uword indent; + + t = va_arg (*args, mpls_label_imposition_trace_t *); + indent = format_get_indent (s); + hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl); + + s = format (s, "%Umpls-header:%U", + format_white_space, indent, + format_mpls_header, hdr); + return (s); +} + +static uword +mpls_label_imposition (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 0)); +} + +VLIB_REGISTER_NODE (mpls_label_imposition_node) = { + .function = mpls_label_imposition, + .name = "mpls-label-imposition", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_label_imposition_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "mpls-drop", + } +}; +VLIB_NODE_FUNCTION_MULTIARCH (mpls_label_imposition_node, + mpls_label_imposition) + +static uword +ip4_mpls_label_imposition (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_label_imposition_inline(vm, node, frame, 1, 0, 0)); +} + +VLIB_REGISTER_NODE (ip4_mpls_label_imposition_node) = { + .function = ip4_mpls_label_imposition, + .name = "ip4-mpls-label-imposition", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_label_imposition_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "ip4-drop", + } +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_mpls_label_imposition_node, + ip4_mpls_label_imposition) + +static uword +ip6_mpls_label_imposition (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_label_imposition_inline(vm, node, frame, 0, 1, 0)); +} + +VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = { + .function = ip6_mpls_label_imposition, + .name = "ip6-mpls-label-imposition", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_label_imposition_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "ip6-drop", + } +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_imposition_node, + ip6_mpls_label_imposition) + +static uword +ethernet_mpls_label_imposition (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 1)); +} + +VLIB_REGISTER_NODE (ethernet_mpls_label_imposition_node) = { + .function = ethernet_mpls_label_imposition, + .name = "ethernet-mpls-label-imposition", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_label_imposition_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + } +}; +VLIB_NODE_FUNCTION_MULTIARCH (ethernet_mpls_label_imposition_node, + ethernet_mpls_label_imposition) + +static void +mpls_label_dpo_mem_show (void) +{ + fib_show_memory_usage("MPLS label", + pool_elts(mpls_label_dpo_pool), + pool_len(mpls_label_dpo_pool), + sizeof(mpls_label_dpo_t)); +} + +const static dpo_vft_t mld_vft = { + .dv_lock = mpls_label_dpo_lock, + .dv_unlock = mpls_label_dpo_unlock, + .dv_format = format_mpls_label_dpo, + .dv_mem_show = mpls_label_dpo_mem_show, +}; + +const static char* const mpls_label_imp_ip4_nodes[] = +{ + "ip4-mpls-label-imposition", + NULL, +}; +const static char* const mpls_label_imp_ip6_nodes[] = +{ + "ip6-mpls-label-imposition", + NULL, +}; +const static char* const mpls_label_imp_mpls_nodes[] = +{ + "mpls-label-imposition", + NULL, +}; +const static char* const mpls_label_imp_ethernet_nodes[] = +{ + "ethernet-mpls-label-imposition", + NULL, +}; + +const static char* const * const mpls_label_imp_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = mpls_label_imp_ip4_nodes, + [DPO_PROTO_IP6] = mpls_label_imp_ip6_nodes, + [DPO_PROTO_MPLS] = mpls_label_imp_mpls_nodes, + [DPO_PROTO_ETHERNET] = mpls_label_imp_ethernet_nodes, +}; + + +void +mpls_label_dpo_module_init (void) +{ + dpo_register(DPO_MPLS_LABEL, &mld_vft, mpls_label_imp_nodes); +} |