/* * Copyright (c) 2016 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <vnet/ip/ip.h> #include <vnet/dpo/mpls_label_dpo.h> #include <vnet/mpls/mpls.h> /* * pool of all MPLS Label DPOs */ mpls_label_dpo_t *mpls_label_dpo_pool; static mpls_label_dpo_t * mpls_label_dpo_alloc (void) { mpls_label_dpo_t *mld; pool_get_aligned(mpls_label_dpo_pool, mld, CLIB_CACHE_LINE_BYTES); memset(mld, 0, sizeof(*mld)); dpo_reset(&mld->mld_dpo); return (mld); } static index_t mpls_label_dpo_get_index (mpls_label_dpo_t *mld) { return (mld - mpls_label_dpo_pool); } index_t mpls_label_dpo_create (mpls_label_t *label_stack, mpls_eos_bit_t eos, u8 ttl, u8 exp, dpo_proto_t payload_proto, const dpo_id_t *dpo) { mpls_label_dpo_t *mld; u32 ii; mld = mpls_label_dpo_alloc(); mld->mld_n_labels = vec_len(label_stack); mld->mld_n_hdr_bytes = mld->mld_n_labels * sizeof(mld->mld_hdr[0]); mld->mld_payload_proto = payload_proto; /* * construct label rewrite headers for each value value passed. * get the header in network byte order since we will paint it * on a packet in the data-plane */ for (ii = 0; ii < mld->mld_n_labels-1; ii++) { vnet_mpls_uc_set_label(&mld->mld_hdr[ii].label_exp_s_ttl, label_stack[ii]); vnet_mpls_uc_set_ttl(&mld->mld_hdr[ii].label_exp_s_ttl, 255); vnet_mpls_uc_set_exp(&mld->mld_hdr[ii].label_exp_s_ttl, 0); vnet_mpls_uc_set_s(&mld->mld_hdr[ii].label_exp_s_ttl, MPLS_NON_EOS); mld->mld_hdr[ii].label_exp_s_ttl = clib_host_to_net_u32(mld->mld_hdr[ii].label_exp_s_ttl); } /* * the inner most label */ ii = mld->mld_n_labels-1; vnet_mpls_uc_set_label(&mld->mld_hdr[ii].label_exp_s_ttl, label_stack[ii]); vnet_mpls_uc_set_ttl(&mld->mld_hdr[ii].label_exp_s_ttl, ttl); vnet_mpls_uc_set_exp(&mld->mld_hdr[ii].label_exp_s_ttl, exp); vnet_mpls_uc_set_s(&mld->mld_hdr[ii].label_exp_s_ttl, eos); mld->mld_hdr[ii].label_exp_s_ttl = clib_host_to_net_u32(mld->mld_hdr[ii].label_exp_s_ttl); /* * stack this label objct on its parent. */ dpo_stack(DPO_MPLS_LABEL, mld->mld_payload_proto, &mld->mld_dpo, dpo); return (mpls_label_dpo_get_index(mld)); } u8* format_mpls_label_dpo (u8 *s, va_list *args) { index_t index = va_arg (*args, index_t); u32 indent = va_arg (*args, u32); mpls_unicast_header_t hdr; mpls_label_dpo_t *mld; u32 ii; s = format(s, "mpls-label:[%d]:", index); if (pool_is_free_index(mpls_label_dpo_pool, index)) { /* * the packet trace can be printed after the DPO has been deleted */ return (s); } mld = mpls_label_dpo_get(index); for (ii = 0; ii < mld->mld_n_labels; ii++) { hdr.label_exp_s_ttl = clib_net_to_host_u32(mld->mld_hdr[ii].label_exp_s_ttl); s = format(s, "%U", format_mpls_header, hdr); } s = format(s, "\n%U", format_white_space, indent); s = format(s, "%U", format_dpo_id, &mld->mld_dpo, indent+2); return (s); } static void mpls_label_dpo_lock (dpo_id_t *dpo) { mpls_label_dpo_t *mld; mld = mpls_label_dpo_get(dpo->dpoi_index); mld->mld_locks++; } static void mpls_label_dpo_unlock (dpo_id_t *dpo) { mpls_label_dpo_t *mld; mld = mpls_label_dpo_get(dpo->dpoi_index); mld->mld_locks--; if (0 == mld->mld_locks) { dpo_reset(&mld->mld_dpo); pool_put(mpls_label_dpo_pool, mld); } } /** * @brief A struct to hold tracing information for the MPLS label imposition * node. */ typedef struct mpls_label_imposition_trace_t_ { /** * The MPLS header imposed */ mpls_unicast_header_t hdr; } mpls_label_imposition_trace_t; always_inline mpls_unicast_header_t * mpls_label_paint (vlib_buffer_t * b0, mpls_label_dpo_t *mld0, u8 ttl0) { mpls_unicast_header_t *hdr0; vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes)); hdr0 = vlib_buffer_get_current(b0); if (1 == mld0->mld_n_labels) { /* optimise for the common case of one label */ *hdr0 = mld0->mld_hdr[0]; } else { clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes); hdr0 = hdr0 + (mld0->mld_n_labels - 1); } /* fixup the TTL for the inner most label */ ((char*)hdr0)[3] = ttl0; return (hdr0); } always_inline uword mpls_label_imposition_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, u8 payload_is_ip4, u8 payload_is_ip6, u8 payload_is_ethernet) { u32 n_left_from, next_index, * from, * to_next; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; next_index = node->cached_next_index; while (n_left_from > 0) { u32 n_left_to_next; vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); while (n_left_from >= 8 && n_left_to_next >= 4) { u32 bi0, mldi0, bi1, mldi1, bi2, mldi2, bi3, mldi3; mpls_unicast_header_t *hdr0, *hdr1, *hdr2, *hdr3; mpls_label_dpo_t *mld0, *mld1, *mld2, *mld3; vlib_buffer_t * b0, *b1, * b2, *b3; u32 next0, next1, next2, next3; u8 ttl0, ttl1,ttl2, ttl3 ; bi0 = to_next[0] = from[0]; bi1 = to_next[1] = from[1]; bi2 = to_next[2] = from[2]; bi3 = to_next[3] = from[3]; /* Prefetch next iteration. */ { vlib_buffer_t * p2, * p3, *p4, *p5; p2 = vlib_get_buffer (vm, from[2]); p3 = vlib_get_buffer (vm, from[3]); p4 = vlib_get_buffer (vm, from[4]); p5 = vlib_get_buffer (vm, from[5]); vlib_prefetch_buffer_header (p2, STORE); vlib_prefetch_buffer_header (p3, STORE); vlib_prefetch_buffer_header (p4, STORE); vlib_prefetch_buffer_header (p5, STORE); CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE); CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE); CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE); CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE); } from += 4; to_next += 4; n_left_from -= 4; n_left_to_next -= 4; b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); b2 = vlib_get_buffer (vm, bi2); b3 = vlib_get_buffer (vm, bi3); /* dst lookup was done by ip4 lookup */ mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; mldi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; mldi2 = vnet_buffer(b2)->ip.adj_index[VLIB_TX]; mldi3 = vnet_buffer(b3)->ip.adj_index[VLIB_TX]; mld0 = mpls_label_dpo_get(mldi0); mld1 = mpls_label_dpo_get(mldi1); mld2 = mpls_label_dpo_get(mldi2); mld3 = mpls_label_dpo_get(mldi3); if (payload_is_ip4) { /* * decrement the TTL on ingress to the LSP */ ip4_header_t * ip0 = vlib_buffer_get_current(b0); ip4_header_t * ip1 = vlib_buffer_get_current(b1); ip4_header_t * ip2 = vlib_buffer_get_current(b2); ip4_header_t * ip3 = vlib_buffer_get_current(b3); u32 checksum0; u32 checksum1; u32 checksum2; u32 checksum3; checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100); checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100); checksum2 = ip2->checksum + clib_host_to_net_u16 (0x0100); checksum3 = ip3->checksum + clib_host_to_net_u16 (0x0100); checksum0 += checksum0 >= 0xffff; checksum1 += checksum1 >= 0xffff; checksum2 += checksum2 >= 0xffff; checksum3 += checksum3 >= 0xffff; ip0->checksum = checksum0; ip1->checksum = checksum1; ip2->checksum = checksum2; ip3->checksum = checksum3; ip0->ttl -= 1; ip1->ttl -= 1; ip2->ttl -= 1; ip3->ttl -= 1; ttl1 = ip1->ttl; ttl0 = ip0->ttl; ttl3 = ip3->ttl; ttl2 = ip2->ttl; } else if (payload_is_ip6) { /* * decrement the TTL on ingress to the LSP */ ip6_header_t * ip0 = vlib_buffer_get_current(b0); ip6_header_t * ip1 = vlib_buffer_get_current(b1); ip6_header_t * ip2 = vlib_buffer_get_current(b2); ip6_header_t * ip3 = vlib_buffer_get_current(b3); ip0->hop_limit -= 1; ip1->hop_limit -= 1; ip2->hop_limit -= 1; ip3->hop_limit -= 1; ttl0 = ip0->hop_limit; ttl1 = ip1->hop_limit; ttl2 = ip2->hop_limit; ttl3 = ip3->hop_limit; } else if (payload_is_ethernet) { /* * nothing to chang ein the ethernet header */ ttl0 = ttl1 = ttl2 = ttl3 = 255; } else { /* * else, the packet to be encapped is an MPLS packet */ if (PREDICT_TRUE(vnet_buffer(b0)->mpls.first)) { /* * The first label to be imposed on the packet. this is a label swap. * in which case we stashed the TTL and EXP bits in the * packet in the lookup node */ ASSERT(0 != vnet_buffer (b0)->mpls.ttl); ttl0 = vnet_buffer(b0)->mpls.ttl - 1; } else { /* * not the first label. implying we are recusring down a chain of * output labels. * Each layer is considered a new LSP - hence the TTL is reset. */ ttl0 = 255; } if (PREDICT_TRUE(vnet_buffer(b1)->mpls.first)) { ASSERT(1 != vnet_buffer (b1)->mpls.ttl); ttl1 = vnet_buffer(b1)->mpls.ttl - 1; } else { ttl1 = 255; } if (PREDICT_TRUE(vnet_buffer(b2)->mpls.first)) { ASSERT(1 != vnet_buffer (b2)->mpls.ttl); ttl2 = vnet_buffer(b2)->mpls.ttl - 1; } else { ttl2 = 255; } if (PREDICT_TRUE(vnet_buffer(b3)->mpls.first)) { ASSERT(1 != vnet_buffer (b3)->mpls.ttl); ttl3 = vnet_buffer(b3)->mpls.ttl - 1; } else { ttl3 = 255; } } vnet_buffer(b0)->mpls.first = 0; vnet_buffer(b1)->mpls.first = 0; vnet_buffer(b2)->mpls.first = 0; vnet_buffer(b3)->mpls.first = 0; /* Paint the MPLS header */ hdr0 = mpls_label_paint(b0, mld0, ttl0); hdr1 = mpls_label_paint(b1, mld1, ttl1); hdr2 = mpls_label_paint(b2, mld2, ttl2); hdr3 = mpls_label_paint(b3, mld3, ttl3); next0 = mld0->mld_dpo.dpoi_next_node; next1 = mld1->mld_dpo.dpoi_next_node; next2 = mld2->mld_dpo.dpoi_next_node; next3 = mld3->mld_dpo.dpoi_next_node; vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index; vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mld1->mld_dpo.dpoi_index; vnet_buffer(b2)->ip.adj_index[VLIB_TX] = mld2->mld_dpo.dpoi_index; vnet_buffer(b3)->ip.adj_index[VLIB_TX] = mld3->mld_dpo.dpoi_index; if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { mpls_label_imposition_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->hdr = *hdr0; } if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) { mpls_label_imposition_trace_t *tr = vlib_add_trace (vm, node, b1, sizeof (*tr)); tr->hdr = *hdr1; } if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED)) { mpls_label_imposition_trace_t *tr = vlib_add_trace (vm, node, b2, sizeof (*tr)); tr->hdr = *hdr2; } if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED)) { mpls_label_imposition_trace_t *tr = vlib_add_trace (vm, node, b3, sizeof (*tr)); tr->hdr = *hdr3; } vlib_validate_buffer_enqueue_x4(vm, node, next_index, to_next, n_left_to_next, bi0, bi1, bi2, bi3, next0, next1, next2, next3); } while (n_left_from > 0 && n_left_to_next > 0) { mpls_unicast_header_t *hdr0; mpls_label_dpo_t *mld0; vlib_buffer_t * b0; u32 bi0, mldi0; u32 next0; u8 ttl; bi0 = from[0]; to_next[0] = bi0; from += 1; to_next += 1; n_left_from -= 1; n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); /* dst lookup was done by ip4 lookup */ mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; mld0 = mpls_label_dpo_get(mldi0); if (payload_is_ip4) { /* * decrement the TTL on ingress to the LSP */ ip4_header_t * ip0 = vlib_buffer_get_current(b0); u32 checksum0; checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100); checksum0 += checksum0 >= 0xffff; ip0->checksum = checksum0; ip0->ttl -= 1; ttl = ip0->ttl; } else if (payload_is_ip6) { /* * decrement the TTL on ingress to the LSP */ ip6_header_t * ip0 = vlib_buffer_get_current(b0); ip0->hop_limit -= 1; ttl = ip0->hop_limit; } else { /* * else, the packet to be encapped is an MPLS packet */ if (vnet_buffer(b0)->mpls.first) { /* * The first label to be imposed on the packet. this is a label swap. * in which case we stashed the TTL and EXP bits in the * packet in the lookup node */ ASSERT(0 != vnet_buffer (b0)->mpls.ttl); ttl = vnet_buffer(b0)->mpls.ttl - 1; } else { /* * not the first label. implying we are recusring down a chain of * output labels. * Each layer is considered a new LSP - hence the TTL is reset. */ ttl = 255; } } vnet_buffer(b0)->mpls.first = 0; /* Paint the MPLS header */ vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes)); hdr0 = vlib_buffer_get_current(b0); clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes); /* fixup the TTL for the inner most label */ hdr0 = hdr0 + (mld0->mld_n_labels - 1); ((char*)hdr0)[3] = ttl; next0 = mld0->mld_dpo.dpoi_next_node; vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index; if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { mpls_label_imposition_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->hdr = *hdr0; } vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } return from_frame->n_vectors; } static u8 * format_mpls_label_imposition_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); mpls_label_imposition_trace_t * t; mpls_unicast_header_t hdr; u32 indent; t = va_arg (*args, mpls_label_imposition_trace_t *); indent = format_get_indent (s); hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl); s = format (s, "%Umpls-header:%U", format_white_space, indent, format_mpls_header, hdr); return (s); } static uword mpls_label_imposition (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 0)); } VLIB_REGISTER_NODE (mpls_label_imposition_node) = { .function = mpls_label_imposition, .name = "mpls-label-imposition", .vector_size = sizeof (u32), .format_trace = format_mpls_label_imposition_trace, .n_next_nodes = 1, .next_nodes = { [0] = "mpls-drop", } }; VLIB_NODE_FUNCTION_MULTIARCH (mpls_label_imposition_node, mpls_label_imposition) static uword ip4_mpls_label_imposition (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return (mpls_label_imposition_inline(vm, node, frame, 1, 0, 0)); } VLIB_REGISTER_NODE (ip4_mpls_label_imposition_node) = { .function = ip4_mpls_label_imposition, .name = "ip4-mpls-label-imposition", .vector_size = sizeof (u32), .format_trace = format_mpls_label_imposition_trace, .n_next_nodes = 1, .next_nodes = { [0] = "ip4-drop", } }; VLIB_NODE_FUNCTION_MULTIARCH (ip4_mpls_label_imposition_node, ip4_mpls_label_imposition) static uword ip6_mpls_label_imposition (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return (mpls_label_imposition_inline(vm, node, frame, 0, 1, 0)); } VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = { .function = ip6_mpls_label_imposition, .name = "ip6-mpls-label-imposition", .vector_size = sizeof (u32), .format_trace = format_mpls_label_imposition_trace, .n_next_nodes = 1, .next_nodes = { [0] = "ip6-drop", } }; VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_imposition_node, ip6_mpls_label_imposition) static uword ethernet_mpls_label_imposition (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 1)); } VLIB_REGISTER_NODE (ethernet_mpls_label_imposition_node) = { .function = ethernet_mpls_label_imposition, .name = "ethernet-mpls-label-imposition", .vector_size = sizeof (u32), .format_trace = format_mpls_label_imposition_trace, .n_next_nodes = 1, .next_nodes = { [0] = "error-drop", } }; VLIB_NODE_FUNCTION_MULTIARCH (ethernet_mpls_label_imposition_node, ethernet_mpls_label_imposition) static void mpls_label_dpo_mem_show (void) { fib_show_memory_usage("MPLS label", pool_elts(mpls_label_dpo_pool), pool_len(mpls_label_dpo_pool), sizeof(mpls_label_dpo_t)); } const static dpo_vft_t mld_vft = { .dv_lock = mpls_label_dpo_lock, .dv_unlock = mpls_label_dpo_unlock, .dv_format = format_mpls_label_dpo, .dv_mem_show = mpls_label_dpo_mem_show, }; const static char* const mpls_label_imp_ip4_nodes[] = { "ip4-mpls-label-imposition", NULL, }; const static char* const mpls_label_imp_ip6_nodes[] = { "ip6-mpls-label-imposition", NULL, }; const static char* const mpls_label_imp_mpls_nodes[] = { "mpls-label-imposition", NULL, }; const static char* const mpls_label_imp_ethernet_nodes[] = { "ethernet-mpls-label-imposition", NULL, }; const static char* const * const mpls_label_imp_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP4] = mpls_label_imp_ip4_nodes, [DPO_PROTO_IP6] = mpls_label_imp_ip6_nodes, [DPO_PROTO_MPLS] = mpls_label_imp_mpls_nodes, [DPO_PROTO_ETHERNET] = mpls_label_imp_ethernet_nodes, }; void mpls_label_dpo_module_init (void) { dpo_register(DPO_MPLS_LABEL, &mld_vft, mpls_label_imp_nodes); }