diff options
-rw-r--r-- | vnet/vnet/dpo/mpls_label_dpo.c | 166 | ||||
-rw-r--r-- | vnet/vnet/dpo/mpls_label_dpo.h | 5 | ||||
-rw-r--r-- | vnet/vnet/mpls/mpls_output.c | 155 | ||||
-rw-r--r-- | vnet/vnet/mpls/node.c | 87 |
4 files changed, 396 insertions, 17 deletions
diff --git a/vnet/vnet/dpo/mpls_label_dpo.c b/vnet/vnet/dpo/mpls_label_dpo.c index 606b7ba3911..bbdc9666503 100644 --- a/vnet/vnet/dpo/mpls_label_dpo.c +++ b/vnet/vnet/dpo/mpls_label_dpo.c @@ -54,6 +54,7 @@ mpls_label_dpo_create (mpls_label_t *label_stack, mld = mpls_label_dpo_alloc(); mld->mld_n_labels = vec_len(label_stack); + mld->mld_n_hdr_bytes = mld->mld_n_labels * sizeof(mld->mld_hdr[0]); mld->mld_payload_proto = payload_proto; /* @@ -179,6 +180,163 @@ mpls_label_imposition_inline (vlib_main_t * vm, vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + while (n_left_from >= 4 && n_left_to_next >= 2) + { + mpls_unicast_header_t *hdr0, *hdr1; + mpls_label_dpo_t *mld0, *mld1; + u32 bi0, mldi0, bi1, mldi1; + vlib_buffer_t * b0, *b1; + u32 next0, next1; + u8 ttl0, ttl1; + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE); + } + + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* dst lookup was done by ip4 lookup */ + mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + mldi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; + mld0 = mpls_label_dpo_get(mldi0); + mld1 = mpls_label_dpo_get(mldi1); + + if (payload_is_ip4) + { + /* + * decrement the TTL on ingress to the LSP + */ + ip4_header_t * ip0 = vlib_buffer_get_current(b0); + ip4_header_t * ip1 = vlib_buffer_get_current(b1); + u32 checksum0; + u32 checksum1; + + checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100); + checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100); + + checksum0 += checksum0 >= 0xffff; + checksum1 += checksum1 >= 0xffff; + + ip0->checksum = checksum0; + ip1->checksum = checksum1; + + ip0->ttl -= 1; + ip1->ttl -= 1; + + ttl1 = ip1->ttl; + ttl0 = ip0->ttl; + } + else if (payload_is_ip6) + { + /* + * decrement the TTL on ingress to the LSP + */ + ip6_header_t * ip0 = vlib_buffer_get_current(b0); + ip6_header_t * ip1 = vlib_buffer_get_current(b1); + + + ip0->hop_limit -= 1; + ip1->hop_limit -= 1; + + ttl0 = ip0->hop_limit; + ttl1 = ip1->hop_limit; + } + else + { + /* + * else, the packet to be encapped is an MPLS packet + */ + if (PREDICT_TRUE(vnet_buffer(b0)->mpls.first)) + { + /* + * The first label to be imposed on the packet. this is a label swap. + * in which case we stashed the TTL and EXP bits in the + * packet in the lookup node + */ + ASSERT(0 != vnet_buffer (b0)->mpls.ttl); + + ttl0 = vnet_buffer(b0)->mpls.ttl - 1; + } + else + { + /* + * not the first label. implying we are recusring down a chain of + * output labels. + * Each layer is considered a new LSP - hence the TTL is reset. + */ + ttl0 = 255; + } + if (PREDICT_TRUE(vnet_buffer(b1)->mpls.first)) + { + ASSERT(1 != vnet_buffer (b1)->mpls.ttl); + ttl1 = vnet_buffer(b1)->mpls.ttl - 1; + } + else + { + ttl1 = 255; + } + } + vnet_buffer(b0)->mpls.first = 0; + vnet_buffer(b1)->mpls.first = 0; + + /* Paint the MPLS header */ + vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes)); + vlib_buffer_advance(b1, -(mld1->mld_n_hdr_bytes)); + + hdr0 = vlib_buffer_get_current(b0); + hdr1 = vlib_buffer_get_current(b1); + + clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes); + clib_memcpy(hdr1, mld1->mld_hdr, mld1->mld_n_hdr_bytes); + + /* fixup the TTL for the inner most label */ + hdr0 = hdr0 + (mld0->mld_n_labels - 1); + hdr1 = hdr1 + (mld1->mld_n_labels - 1); + ((char*)hdr0)[3] = ttl0; + ((char*)hdr1)[3] = ttl1; + + next0 = mld0->mld_dpo.dpoi_next_node; + next1 = mld1->mld_dpo.dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index; + vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mld1->mld_dpo.dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_imposition_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->hdr = *hdr0; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_imposition_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + tr->hdr = *hdr1; + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + n_left_to_next, + bi0, bi1, next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) { mpls_unicast_header_t *hdr0; @@ -255,11 +413,9 @@ mpls_label_imposition_inline (vlib_main_t * vm, vnet_buffer(b0)->mpls.first = 0; /* Paint the MPLS header */ - vlib_buffer_advance(b0, -(sizeof(*hdr0) * mld0->mld_n_labels)); + vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes)); hdr0 = vlib_buffer_get_current(b0); - - clib_memcpy(hdr0, mld0->mld_hdr, - sizeof(*hdr0) * mld0->mld_n_labels); + clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes); /* fixup the TTL for the inner most label */ hdr0 = hdr0 + (mld0->mld_n_labels - 1); @@ -268,7 +424,7 @@ mpls_label_imposition_inline (vlib_main_t * vm, next0 = mld0->mld_dpo.dpoi_next_node; vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index; - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { mpls_label_imposition_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); diff --git a/vnet/vnet/dpo/mpls_label_dpo.h b/vnet/vnet/dpo/mpls_label_dpo.h index 6580c47d7cc..89bcb093b04 100644 --- a/vnet/vnet/dpo/mpls_label_dpo.h +++ b/vnet/vnet/dpo/mpls_label_dpo.h @@ -46,6 +46,11 @@ typedef struct mpls_label_dpo_t u16 mld_n_labels; /** + * Cached amount of header bytes to paint + */ + u16 mld_n_hdr_bytes; + + /** * Number of locks/users of the label */ u16 mld_locks; diff --git a/vnet/vnet/mpls/mpls_output.c b/vnet/vnet/mpls/mpls_output.c index 91514d68345..8292a0cb3d2 100644 --- a/vnet/vnet/mpls/mpls_output.c +++ b/vnet/vnet/mpls/mpls_output.c @@ -58,6 +58,7 @@ mpls_output_inline (vlib_main_t * vm, { u32 n_left_from, next_index, * from, * to_next, cpu_index; vlib_node_runtime_t * error_node; + u32 n_left_to_next; cpu_index = os_get_cpu_number(); error_node = vlib_node_get_runtime (vm, mpls_output_node.index); @@ -67,11 +68,146 @@ mpls_output_inline (vlib_main_t * vm, while (n_left_from > 0) { - u32 n_left_to_next; - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while (n_left_from >= 4 && n_left_to_next >= 2) + { + ip_adjacency_t * adj0; + mpls_unicast_header_t *hdr0; + vlib_buffer_t * p0; + u32 pi0, rw_len0, adj_index0, next0, error0; + + ip_adjacency_t * adj1; + mpls_unicast_header_t *hdr1; + vlib_buffer_t * p1; + u32 pi1, rw_len1, adj_index1, next1, error1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (hdr1[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + /* We should never rewrite a pkt using the MISS adjacency */ + ASSERT(adj_index0); + ASSERT(adj_index1); + + adj0 = adj_get(adj_index0); + adj1 = adj_get(adj_index1); + hdr0 = vlib_buffer_get_current (p0); + hdr1 = vlib_buffer_get_current (p1); + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], hdr0, hdr1, + sizeof (ethernet_header_t)); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + rw_len1 = adj1[0].rewrite_header.data_bytes; + + if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, adj_index0, + /* packet increment */ 0, + /* byte increment */ rw_len0-sizeof(ethernet_header_t)); + if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t))) + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, adj_index1, + /* packet increment */ 0, + /* byte increment */ rw_len1-sizeof(ethernet_header_t)); + + /* Check MTU of outgoing interface. */ + if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <= + adj0[0].rewrite_header.max_l3_packet_bytes)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + adj0[0].rewrite_header.sw_if_index; + next0 = adj0[0].rewrite_header.next_index; + error0 = IP4_ERROR_NONE; + + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + } + } + else + { + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = MPLS_OUTPUT_NEXT_DROP; + } + if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <= + adj1[0].rewrite_header.max_l3_packet_bytes)) + { + p1->current_data -= rw_len1; + p1->current_length += rw_len1; + + vnet_buffer (p1)->sw_if_index[VLIB_TX] = + adj1[0].rewrite_header.sw_if_index; + next1 = adj1[0].rewrite_header.next_index; + error1 = IP4_ERROR_NONE; + + if (is_midchain) + { + adj1->sub_type.midchain.fixup_func(vm, adj1, p1); + } + } + else + { + error1 = IP4_ERROR_MTU_EXCEEDED; + next1 = MPLS_OUTPUT_NEXT_DROP; + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_output_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p0)->ip.flow_hash; + } + if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_output_trace_t *tr = vlib_add_trace (vm, node, + p1, sizeof (*tr)); + tr->adj_index = vnet_buffer(p1)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p1)->ip.flow_hash; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) { ip_adjacency_t * adj0; @@ -106,16 +242,8 @@ mpls_output_inline (vlib_main_t * vm, /* byte increment */ rw_len0-sizeof(ethernet_header_t)); /* Check MTU of outgoing interface. */ - error0 = (vlib_buffer_length_in_chain (vm, p0) - > adj0[0].rewrite_header.max_l3_packet_bytes - ? IP4_ERROR_MTU_EXCEEDED - : IP4_ERROR_NONE); - - p0->error = error_node->errors[error0]; - - /* Don't adjust the buffer for ttl issue; icmp-error node wants - * to see the IP headerr */ - if (PREDICT_TRUE(error0 == IP4_ERROR_NONE)) + if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <= + adj0[0].rewrite_header.max_l3_packet_bytes)) { p0->current_data -= rw_len0; p0->current_length += rw_len0; @@ -123,6 +251,7 @@ mpls_output_inline (vlib_main_t * vm, vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index; next0 = adj0[0].rewrite_header.next_index; + error0 = IP4_ERROR_NONE; if (is_midchain) { @@ -131,8 +260,10 @@ mpls_output_inline (vlib_main_t * vm, } else { + error0 = IP4_ERROR_MTU_EXCEEDED; next0 = MPLS_OUTPUT_NEXT_DROP; } + p0->error = error_node->errors[error0]; from += 1; n_left_from -= 1; diff --git a/vnet/vnet/mpls/node.c b/vnet/vnet/mpls/node.c index 2b0461f751e..1810091252e 100644 --- a/vnet/vnet/mpls/node.c +++ b/vnet/vnet/mpls/node.c @@ -90,6 +90,93 @@ mpls_input_inline (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 label0, bi0, next0, sw_if_index0; + u32 label1, bi1, next1, sw_if_index1; + mpls_unicast_header_t *h0, *h1; + vlib_buffer_t *b0, *b1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (h1[0]), STORE); + } + + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl); + label1 = clib_net_to_host_u32 (h1->label_exp_s_ttl); + + /* TTL expired? */ + if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0)) + { + next0 = MPLS_INPUT_NEXT_DROP; + b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; + } + else + { + next0 = MPLS_INPUT_NEXT_LOOKUP; + vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0); + vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + } + + if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label1) == 0)) + { + next1 = MPLS_INPUT_NEXT_DROP; + b1->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; + } + else + { + next1 = MPLS_INPUT_NEXT_LOOKUP; + vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index1, &next1, b1); + vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1); + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_input_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->label_host_byte_order = label0; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_input_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->next_index = next1; + tr->label_host_byte_order = label1; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) { u32 bi0; |