diff options
Diffstat (limited to 'src/vnet/gre')
-rw-r--r-- | src/vnet/gre/gre.api | 22 | ||||
-rw-r--r-- | src/vnet/gre/gre.c | 271 | ||||
-rw-r--r-- | src/vnet/gre/gre.h | 189 | ||||
-rw-r--r-- | src/vnet/gre/gre_api.c | 14 | ||||
-rw-r--r-- | src/vnet/gre/interface.c | 330 | ||||
-rw-r--r-- | src/vnet/gre/node.c | 83 | ||||
-rw-r--r-- | src/vnet/gre/packet.h | 106 |
7 files changed, 677 insertions, 338 deletions
diff --git a/src/vnet/gre/gre.api b/src/vnet/gre/gre.api index b410ac9d49a..28c970154ae 100644 --- a/src/vnet/gre/gre.api +++ b/src/vnet/gre/gre.api @@ -13,18 +13,32 @@ * limitations under the License. */ -option version = "1.0.0"; +option version = "1.0.1"; +/** \brief Create or delete a GRE tunnel + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - Use 1 to create the tunnel, 0 to remove it + @param is_ipv6 - Use 0 for IPv4, 1 for IPv6 + @param tunnel_type - 0: L3, 1: TEB, 2: ERSPAN + @param instance - optional unique custom device instance, else ~0. + @param src_address - Source IP address + @param dst_address - Destination IP address, can be multicast + @param outer_fib_id - Encap FIB table ID + @param session_id - session for ERSPAN tunnel, range 0-1023 +*/ define gre_add_del_tunnel { u32 client_index; u32 context; u8 is_add; u8 is_ipv6; - u8 teb; + u8 tunnel_type; + u32 instance; /* If non-~0, specifies a custom dev instance */ u8 src_address[16]; u8 dst_address[16]; u32 outer_fib_id; + u16 session_id; }; define gre_add_del_tunnel_reply @@ -45,11 +59,13 @@ define gre_tunnel_details { u32 context; u32 sw_if_index; + u32 instance; u8 is_ipv6; - u8 teb; + u8 tunnel_type; u8 src_address[16]; u8 dst_address[16]; u32 outer_fib_id; + u16 session_id; }; /* diff --git a/src/vnet/gre/gre.c b/src/vnet/gre/gre.c index c049b87b6a7..a3702920334 100644 --- a/src/vnet/gre/gre.c +++ b/src/vnet/gre/gre.c @@ -62,7 +62,7 @@ format_gre_tx_trace (u8 * s, va_list * args) gre_tx_trace_t *t = va_arg (*args, gre_tx_trace_t *); s = format (s, "GRE: tunnel %d len %d src %U dst %U", - t->tunnel_id, clib_net_to_host_u16 (t->length), + t->tunnel_id, t->length, format_ip46_address, &t->src, IP46_TYPE_ANY, format_ip46_address, &t->dst, IP46_TYPE_ANY); return s; @@ -100,7 +100,7 @@ format_gre_header_with_length (u8 * s, va_list * args) s = format (s, "GRE %U", format_gre_protocol, p); - if (max_header_bytes != 0 && header_bytes > max_header_bytes) + if (max_header_bytes != 0 && header_bytes < max_header_bytes) { gre_protocol_info_t *pi = gre_get_protocol_info (gm, p); vlib_node_t *node = vlib_get_node (gm->vlib_main, pi->node_index); @@ -208,6 +208,7 @@ gre_build_rewrite (vnet_main_t * vnm, gre_main_t *gm = &gre_main; ip4_and_gre_header_t *h4; ip6_and_gre_header_t *h6; + gre_header_t *gre; u8 *rewrite = NULL; gre_tunnel_t *t; u32 ti; @@ -227,9 +228,7 @@ gre_build_rewrite (vnet_main_t * vnm, { vec_validate (rewrite, sizeof (*h4) - 1); h4 = (ip4_and_gre_header_t *) rewrite; - h4->gre.protocol = - clib_host_to_net_u16 (gre_proto_from_vnet_link (link_type)); - + gre = &h4->gre; h4->ip4.ip_version_and_header_length = 0x45; h4->ip4.ttl = 254; h4->ip4.protocol = IP_PROTOCOL_GRE; @@ -242,9 +241,7 @@ gre_build_rewrite (vnet_main_t * vnm, { vec_validate (rewrite, sizeof (*h6) - 1); h6 = (ip6_and_gre_header_t *) rewrite; - h6->gre.protocol = - clib_host_to_net_u16 (gre_proto_from_vnet_link (link_type)); - + gre = &h6->gre; h6->ip6.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (6 << 28); h6->ip6.hop_limit = 255; @@ -256,6 +253,15 @@ gre_build_rewrite (vnet_main_t * vnm, h6->ip6.dst_address.as_u64[1] = t->tunnel_dst.fp_addr.ip6.as_u64[1]; } + if (PREDICT_FALSE (t->type == GRE_TUNNEL_TYPE_ERSPAN)) + { + gre->protocol = clib_host_to_net_u16 (GRE_PROTOCOL_erspan); + gre->flags_and_version = clib_host_to_net_u16 (GRE_FLAGS_SEQUENCE); + } + else + gre->protocol = + clib_host_to_net_u16 (gre_proto_from_vnet_link (link_type)); + return (rewrite); } @@ -302,33 +308,43 @@ gre_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai) t = pool_elt_at_index (gm->tunnels, ti); is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0; - adj_nbr_midchain_update_rewrite (ai, !is_ipv6 ? gre4_fixup : gre6_fixup, - NULL, - (VNET_LINK_ETHERNET == - adj_get_link_type (ai) ? - ADJ_FLAG_MIDCHAIN_NO_COUNT : - ADJ_FLAG_NONE), gre_build_rewrite (vnm, - sw_if_index, - adj_get_link_type - (ai), - NULL)); + adj_nbr_midchain_update_rewrite + (ai, !is_ipv6 ? gre4_fixup : gre6_fixup, NULL, + (VNET_LINK_ETHERNET == adj_get_link_type (ai) ? + ADJ_FLAG_MIDCHAIN_NO_COUNT : ADJ_FLAG_NONE), + gre_build_rewrite (vnm, sw_if_index, adj_get_link_type (ai), NULL)); gre_tunnel_stack (ai); } + +typedef enum +{ + GRE_ENCAP_NEXT_DROP, + GRE_ENCAP_NEXT_L2_MIDCHAIN, + GRE_ENCAP_N_NEXT, +} gre_encap_next_t; + +#define NEXT_IDX (GRE_ENCAP_NEXT_L2_MIDCHAIN) + /** - * @brief TX function. Only called L2. L3 traffic uses the adj-midchains + * @brief TX function. Only called for L2 payload including TEB or ERSPAN. + * L3 traffic uses the adj-midchains. */ static uword -gre_interface_tx_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +gre_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) { gre_main_t *gm = &gre_main; + vnet_main_t *vnm = gm->vnet_main; u32 next_index; u32 *from, *to_next, n_left_from, n_left_to_next; - vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; - const gre_tunnel_t *gt = pool_elt_at_index (gm->tunnels, rd->dev_instance); - u8 is_ipv6 = gt->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0; + u32 sw_if_index0 = 0; + u32 sw_if_index1 = 0; + adj_index_t adj_index0 = ADJ_INDEX_INVALID; + adj_index_t adj_index1 = ADJ_INDEX_INVALID; + gre_tunnel_t *gt0 = NULL; + gre_tunnel_t *gt1 = NULL; /* Vector of buffer / pkt indices we're supposed to process */ from = vlib_frame_vector_args (frame); @@ -344,77 +360,193 @@ gre_interface_tx_inline (vlib_main_t * vm, /* set up to enqueue to our disposition with index = next_index */ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - /* - * FIXME DUAL LOOP - */ + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0 = from[0]; + u32 bi1 = from[1]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + vlib_buffer_t *b1 = vlib_get_buffer (vm, bi1); + + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX]) + { + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + vnet_hw_interface_t *hi0 = + vnet_get_sup_hw_interface (vnm, sw_if_index0); + gt0 = &gm->tunnels[hi0->dev_instance]; + adj_index0 = gt0->l2_adj_index; + } + + if (sw_if_index0 == vnet_buffer (b1)->sw_if_index[VLIB_TX]) + { + sw_if_index1 = sw_if_index0; + gt1 = gt0; + adj_index1 = adj_index0; + } + else if (sw_if_index1 != vnet_buffer (b1)->sw_if_index[VLIB_TX]) + { + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX]; + vnet_hw_interface_t *hi1 = + vnet_get_sup_hw_interface (vnm, sw_if_index1); + gt1 = &gm->tunnels[hi1->dev_instance]; + adj_index1 = gt1->l2_adj_index; + } + + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0; + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = adj_index1; + + if (PREDICT_FALSE (gt0->type == GRE_TUNNEL_TYPE_ERSPAN)) + { + /* Encap GRE seq# and ERSPAN type II header */ + vlib_buffer_advance (b0, -sizeof (erspan_t2_t)); + erspan_t2_t *h0 = vlib_buffer_get_current (b0); + u32 seq_num = clib_smp_atomic_add (>0->gre_sn->seq_num, 1); + u64 ver1 = clib_host_to_net_u64 (0x1000000000000000); + h0->seq_num = clib_host_to_net_u32 (seq_num); + h0->t2_u64 = ver1; /* all 0's except ver=1 */ + h0->t2.cos_en_t_session = + clib_host_to_net_u16 (gt0->session_id); + } + if (PREDICT_FALSE (gt1->type == GRE_TUNNEL_TYPE_ERSPAN)) + { + /* Encap GRE seq# and ERSPAN type II header */ + vlib_buffer_advance (b1, -sizeof (erspan_t2_t)); + erspan_t2_t *h1 = vlib_buffer_get_current (b1); + u32 seq_num = clib_smp_atomic_add (>1->gre_sn->seq_num, 1); + u64 ver1 = clib_host_to_net_u64 (0x1000000000000000); + h1->seq_num = clib_host_to_net_u32 (seq_num); + h1->t2_u64 = ver1; /* all 0's except ver=1 */ + h1->t2.cos_en_t_session = + clib_host_to_net_u16 (gt1->session_id); + } + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + gre_tx_trace_t *tr0 = vlib_add_trace (vm, node, + b0, sizeof (*tr0)); + tr0->tunnel_id = gt0 - gm->tunnels; + tr0->src = gt0->tunnel_src; + tr0->dst = gt0->tunnel_dst.fp_addr; + tr0->length = vlib_buffer_length_in_chain (vm, b0); + } + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + gre_tx_trace_t *tr1 = vlib_add_trace (vm, node, + b1, sizeof (*tr1)); + tr1->tunnel_id = gt1 - gm->tunnels; + tr1->src = gt1->tunnel_src; + tr1->dst = gt1->tunnel_dst.fp_addr; + tr1->length = vlib_buffer_length_in_chain (vm, b1); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, NEXT_IDX, NEXT_IDX); + } while (n_left_from > 0 && n_left_to_next > 0) { - vlib_buffer_t *b0; - u32 bi0; + u32 bi0 = from[0]; + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); - bi0 = from[0]; to_next[0] = bi0; from += 1; to_next += 1; n_left_from -= 1; n_left_to_next -= 1; - b0 = vlib_get_buffer (vm, bi0); + if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX]) + { + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + vnet_hw_interface_t *hi0 = + vnet_get_sup_hw_interface (vnm, sw_if_index0); + gt0 = &gm->tunnels[hi0->dev_instance]; + adj_index0 = gt0->l2_adj_index; + } - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = gt->l2_adj_index; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0; + + if (PREDICT_FALSE (gt0->type == GRE_TUNNEL_TYPE_ERSPAN)) + { + /* Encap GRE seq# and ERSPAN type II header */ + vlib_buffer_advance (b0, -sizeof (erspan_t2_t)); + erspan_t2_t *h0 = vlib_buffer_get_current (b0); + u32 seq_num = clib_smp_atomic_add (>0->gre_sn->seq_num, 1); + u64 ver1 = clib_host_to_net_u64 (0x1000000000000000); + h0->seq_num = clib_host_to_net_u32 (seq_num); + h0->t2_u64 = ver1; /* all 0's except ver=1 */ + h0->t2.cos_en_t_session = + clib_host_to_net_u16 (gt0->session_id); + } if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { gre_tx_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->tunnel_id = gt - gm->tunnels; - tr->src = gt->tunnel_src; - tr->dst = gt->tunnel_src; + tr->tunnel_id = gt0 - gm->tunnels; + tr->src = gt0->tunnel_src; + tr->dst = gt0->tunnel_dst.fp_addr; tr->length = vlib_buffer_length_in_chain (vm, b0); } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, - bi0, gt->l2_tx_arc); + bi0, NEXT_IDX); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } - vlib_node_increment_counter (vm, !is_ipv6 ? gre4_input_node.index : - gre6_input_node.index, + vlib_node_increment_counter (vm, node->node_index, GRE_ERROR_PKTS_ENCAP, frame->n_vectors); return frame->n_vectors; } -static uword -gre_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return (gre_interface_tx_inline (vm, node, frame)); -} +static char *gre_error_strings[] = { +#define gre_error(n,s) s, +#include "error.def" +#undef gre_error +}; -static uword -gre_teb_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (gre_encap_node) = { - return (gre_interface_tx_inline (vm, node, frame)); -} + .function = gre_interface_tx, + .name = "gre-encap", + .vector_size = sizeof (u32), + .format_trace = format_gre_tx_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = GRE_N_ERROR, + .error_strings = gre_error_strings, + .n_next_nodes = GRE_ENCAP_N_NEXT, + .next_nodes = { + [GRE_ENCAP_NEXT_DROP] = "error-drop", + [GRE_ENCAP_NEXT_L2_MIDCHAIN] = "adj-l2-midchain", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (gre_encap_node, gre_interface_tx) +/* *INDENT-ON* */ static u8 * format_gre_tunnel_name (u8 * s, va_list * args) { u32 dev_instance = va_arg (*args, u32); - return format (s, "gre%d", dev_instance); -} + gre_main_t *gm = &gre_main; + gre_tunnel_t *t; -static u8 * -format_gre_tunnel_teb_name (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - return format (s, "teb-gre%d", dev_instance); + if (dev_instance >= vec_len (gm->tunnels)) + return format (s, "<improperly-referenced>"); + + t = pool_elt_at_index (gm->tunnels, dev_instance); + return format (s, "gre%d", t->user_instance); } static u8 * @@ -433,36 +565,11 @@ VNET_DEVICE_CLASS (gre_device_class) = { .format_device_name = format_gre_tunnel_name, .format_device = format_gre_device, .format_tx_trace = format_gre_tx_trace, - .tx_function = gre_interface_tx, .admin_up_down_function = gre_interface_admin_up_down, #ifdef SOON .clear counter = 0; #endif }; -/* *INDENT-ON* */ - - -/* *INDENT-OFF* */ -VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_device_class, - gre_interface_tx) - -VNET_DEVICE_CLASS (gre_device_teb_class) = { - .name = "GRE TEB tunnel device", - .format_device_name = format_gre_tunnel_teb_name, - .format_device = format_gre_device, - .format_tx_trace = format_gre_tx_trace, - .tx_function = gre_teb_interface_tx, - .admin_up_down_function = gre_interface_admin_up_down, -#ifdef SOON - .clear counter = 0; -#endif -}; - -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_device_teb_class, - gre_teb_interface_tx) VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = { .name = "GRE", @@ -523,6 +630,8 @@ gre_init (vlib_main_t * vm) hash_create_mem (0, sizeof (gre_tunnel_key4_t), sizeof (uword)); gm->tunnel_by_key6 = hash_create_mem (0, sizeof (gre_tunnel_key6_t), sizeof (uword)); + gm->seq_num_by_key = + hash_create_mem (0, sizeof (gre_sn_key_t), sizeof (uword)); #define _(n,s) add_protocol (gm, GRE_PROTOCOL_##s, #s); foreach_gre_protocol diff --git a/src/vnet/gre/gre.h b/src/vnet/gre/gre.h index 83bab76f087..b3b0b545190 100644 --- a/src/vnet/gre/gre.h +++ b/src/vnet/gre/gre.h @@ -36,44 +36,55 @@ typedef enum } gre_error_t; /** - * A GRE payload protocol registration - */ -typedef struct -{ - /** Name (a c string). */ - char *name; - - /** GRE protocol type in host byte order. */ - gre_protocol_t protocol; - - /** Node which handles this type. */ - u32 node_index; - - /** Next index for this type. */ - u32 next_index; -} gre_protocol_info_t; - -/** * @brief The GRE tunnel type */ -typedef enum gre_tunnel_tyoe_t_ +typedef enum gre_tunnel_type_t_ { /** * L3 GRE (i.e. this tunnel is in L3 mode) */ - GRE_TUNNEL_TYPE_L3, + GRE_TUNNEL_TYPE_L3 = 0, /** * Transparent Ethernet Bridging - the tunnel is in L2 mode */ - GRE_TUNNEL_TYPE_TEB, + GRE_TUNNEL_TYPE_TEB = 1, + /** + * ERSPAN type 2 - the tunnel is for port mirror SPAN output. Each tunnel is + * associated with a session ID and expected to be used for encap and output + * of mirrored packet from a L2 network only. There is no support for + * receiving ERSPAN packets from a GRE ERSPAN tunnel in VPP. + */ + GRE_TUNNEL_TYPE_ERSPAN = 2, + + GRE_TUNNEL_TYPE_N } gre_tunnel_type_t; #define GRE_TUNNEL_TYPE_NAMES { \ [GRE_TUNNEL_TYPE_L3] = "L3", \ [GRE_TUNNEL_TYPE_TEB] = "TEB", \ + [GRE_TUNNEL_TYPE_ERSPAN] = "ERSPAN", \ } -#define GRE_TUNNEL_N_TYPES ((gre_tunnel_type_t)GRE_TUNNEL_TYPE_TEB+1) +/** + * A GRE payload protocol registration + */ +typedef struct +{ + /** Name (a c string). */ + char *name; + + /** GRE protocol type in host byte order. */ + gre_protocol_t protocol; + + /** GRE tunnel type */ + gre_tunnel_type_t tunnel_type; + + /** Node which handles this type. */ + u32 node_index; + + /** Next index for this type. */ + u32 next_index; +} gre_protocol_info_t; /** * @brief Key for a IPv4 GRE Tunnel @@ -94,11 +105,12 @@ typedef struct gre_tunnel_key4_t_ }; /** - * The FIB table the src,dst addresses are in. - * tunnels with the same IP addresses in different FIBs are not - * the same tunnel + * FIB table index, ERSPAN session ID and tunnel type in u32 bit fields: + * - The FIB table index the src,dst addresses are in, top 20 bits + * - The Session ID for ERSPAN tunnel type and 0 otherwise, next 10 bits + * - Tunnel type, bottom 2 bits */ - u32 gtk_fib_index; + u32 gtk_fidx_ssid_type; } __attribute__ ((packed)) gre_tunnel_key4_t; /** @@ -114,13 +126,22 @@ typedef struct gre_tunnel_key6_t_ ip6_address_t gtk_dst; /** - * The FIB table the src,dst addresses are in. - * tunnels with the same IP addresses in different FIBs are not - * the same tunnel + * FIB table index, ERSPAN session ID and tunnel type in u32 bit fields: + * - The FIB table index the src,dst addresses are in, top 20 bits + * - The Session ID for ERSPAN tunnel type and 0 otherwise, next 10 bits + * - Tunnel type, bottom 2 bits */ - u32 gtk_fib_index; + u32 gtk_fidx_ssid_type; } __attribute__ ((packed)) gre_tunnel_key6_t; +#define GTK_FIB_INDEX_SHIFT 12 +#define GTK_FIB_INDEX_MASK 0xfffff000 +#define GTK_TYPE_SHIFT 0 +#define GTK_TYPE_MASK 0x3 +#define GTK_SESSION_ID_SHIFT 2 +#define GTK_SESSION_ID_MASK 0xffc +#define GTK_SESSION_ID_MAX (GTK_SESSION_ID_MASK >> GTK_SESSION_ID_SHIFT) + /** * Union of the two possible key types */ @@ -131,6 +152,25 @@ typedef union gre_tunnel_key_t_ } gre_tunnel_key_t; /** + * Used for GRE header seq number generation for ERSPAN encap + */ +typedef struct +{ + u32 seq_num; + u32 ref_count; +} gre_sn_t; + +/** + * Hash key for GRE header seq number generation for ERSPAN encap + */ +typedef struct +{ + ip46_address_t src; + ip46_address_t dst; + u32 fib_index; +} gre_sn_key_t; + +/** * @brief A representation of a GRE tunnel */ typedef struct @@ -176,16 +216,34 @@ typedef struct u32 sibling_index; /** - * on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain + * an L2 tunnel always rquires an L2 midchain. cache here for DP. */ - u32 l2_tx_arc; + adj_index_t l2_adj_index; /** - * an L2 tunnel always rquires an L2 midchain. cache here for DP. + * ERSPAN type 2 session ID, least significant 10 bits of u16 */ - adj_index_t l2_adj_index; + u16 session_id; + + /** + * GRE header sequence number (SN) used for ERSPAN type 2 header, must be + * bumped automically to be thread safe. As multiple GRE tunnels are created + * for the same fib-idx/DIP/SIP with different ERSPAN session number, they all + * share the same SN which is kept per FIB/DIP/SIP, as specified by RFC2890. + */ + gre_sn_t *gre_sn; + + + u32 dev_instance; /* Real device instance in tunnel vector */ + u32 user_instance; /* Instance name being shown to user */ } gre_tunnel_t; +typedef struct +{ + u8 next_index; + u8 tunnel_type; +} next_info_t; + /** * @brief GRE related global data */ @@ -207,21 +265,19 @@ typedef struct uword *protocol_info_by_name, *protocol_info_by_protocol; /** - * Hash mapping ipv4 src/dst addr pair to tunnel + * Hash mapping to tunnels with ipv4 src/dst addr */ uword *tunnel_by_key4; /** - * Hash mapping ipv6 src/dst addr pair to tunnel + * Hash mapping to tunnels with ipv6 src/dst addr */ uword *tunnel_by_key6; /** - * Free vlib hw_if_indices. - * A free list per-tunnel type since the interfaces ctreated are fo different - * types and we cannot change the type. + * Hash mapping tunnel src/dst addr and fib-idx to sequence number */ - u32 *free_gre_tunnel_hw_if_indices[GRE_TUNNEL_N_TYPES]; + uword *seq_num_by_key; /** * Mapping from sw_if_index to tunnel index @@ -230,11 +286,14 @@ typedef struct /* Sparse vector mapping gre protocol in network byte order to next index. */ - u16 *next_by_protocol; + next_info_t *next_by_protocol; /* convenience */ vlib_main_t *vlib_main; vnet_main_t *vnet_main; + + /* Record used instances */ + uword *instance_used; } gre_main_t; /** @@ -266,11 +325,6 @@ gre_get_protocol_info (gre_main_t * em, gre_protocol_t protocol) extern gre_main_t gre_main; -/* Register given node index to take input for given gre type. */ -void -gre_register_input_type (vlib_main_t * vm, - gre_protocol_t protocol, u32 node_index); - extern clib_error_t *gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags); @@ -284,8 +338,8 @@ format_function_t format_gre_header_with_length; extern vlib_node_registration_t gre4_input_node; extern vlib_node_registration_t gre6_input_node; +extern vlib_node_registration_t gre_encap_node; extern vnet_device_class_t gre_device_class; -extern vnet_device_class_t gre_device_teb_class; /* Parse gre protocol as 0xXXXX or protocol name. In either host or network byte order. */ @@ -297,8 +351,8 @@ unformat_function_t unformat_gre_header; unformat_function_t unformat_pg_gre_header; void -gre_register_input_protocol (vlib_main_t * vm, - gre_protocol_t protocol, u32 node_index); +gre_register_input_protocol (vlib_main_t * vm, gre_protocol_t protocol, + u32 node_index, gre_tunnel_type_t tunnel_type); /* manually added to the interface output node in gre.c */ #define GRE_OUTPUT_NEXT_LOOKUP 1 @@ -306,24 +360,26 @@ gre_register_input_protocol (vlib_main_t * vm, typedef struct { u8 is_add; - - ip46_address_t src, dst; + u8 tunnel_type; u8 is_ipv6; + u32 instance; + ip46_address_t src, dst; u32 outer_fib_id; - u8 teb; + u16 session_id; } vnet_gre_add_del_tunnel_args_t; int vnet_gre_add_del_tunnel (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp); static inline void -gre_mk_key4 (const ip4_address_t * src, - const ip4_address_t * dst, - u32 fib_index, gre_tunnel_key4_t * key) +gre_mk_key4 (ip4_address_t src, + ip4_address_t dst, + u32 fib_index, u8 ttype, u16 session_id, gre_tunnel_key4_t * key) { - key->gtk_src = *src; - key->gtk_dst = *dst; - key->gtk_fib_index = fib_index; + key->gtk_src = src; + key->gtk_dst = dst; + key->gtk_fidx_ssid_type = ttype | + (fib_index << GTK_FIB_INDEX_SHIFT) | (session_id << GTK_SESSION_ID_SHIFT); } static inline int @@ -331,17 +387,18 @@ gre_match_key4 (const gre_tunnel_key4_t * key1, const gre_tunnel_key4_t * key2) { return ((key1->gtk_as_u64 == key2->gtk_as_u64) && - (key1->gtk_fib_index == key2->gtk_fib_index)); + (key1->gtk_fidx_ssid_type == key2->gtk_fidx_ssid_type)); } static inline void gre_mk_key6 (const ip6_address_t * src, const ip6_address_t * dst, - u32 fib_index, gre_tunnel_key6_t * key) + u32 fib_index, u8 ttype, u16 session_id, gre_tunnel_key6_t * key) { key->gtk_src = *src; key->gtk_dst = *dst; - key->gtk_fib_index = fib_index; + key->gtk_fidx_ssid_type = ttype | + (fib_index << GTK_FIB_INDEX_SHIFT) | (session_id << GTK_SESSION_ID_SHIFT); } static inline int @@ -352,7 +409,15 @@ gre_match_key6 (const gre_tunnel_key6_t * key1, (key1->gtk_src.as_u64[1] == key2->gtk_src.as_u64[1]) && (key1->gtk_dst.as_u64[0] == key2->gtk_dst.as_u64[0]) && (key1->gtk_dst.as_u64[1] == key2->gtk_dst.as_u64[1]) && - (key1->gtk_fib_index == key2->gtk_fib_index)); + (key1->gtk_fidx_ssid_type == key2->gtk_fidx_ssid_type)); +} + +static inline void +gre_mk_sn_key (const gre_tunnel_t * gt, gre_sn_key_t * key) +{ + key->src = gt->tunnel_src; + key->dst = gt->tunnel_dst.fp_addr; + key->fib_index = gt->outer_fib_index; } #endif /* included_gre_h */ diff --git a/src/vnet/gre/gre_api.c b/src/vnet/gre/gre_api.c index 4dad6147fcf..63d4ca4695b 100644 --- a/src/vnet/gre/gre_api.c +++ b/src/vnet/gre/gre_api.c @@ -66,8 +66,10 @@ static void vl_api_gre_add_del_tunnel_t_handler memset (a, 0, sizeof (*a)); a->is_add = mp->is_add; - a->teb = mp->teb; + a->tunnel_type = mp->tunnel_type; a->is_ipv6 = mp->is_ipv6; + a->instance = ntohl (mp->instance); + a->session_id = ntohs (mp->session_id); /* ip addresses sent in network byte order */ if (!mp->is_ipv6) @@ -102,23 +104,25 @@ static void send_gre_tunnel_details rmp = vl_msg_api_alloc (sizeof (*rmp)); memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_GRE_TUNNEL_DETAILS); + rmp->_vl_msg_id = htons (VL_API_GRE_TUNNEL_DETAILS); if (!is_ipv6) { clib_memcpy (rmp->src_address, &(t->tunnel_src.ip4.as_u8), 4); clib_memcpy (rmp->dst_address, &(t->tunnel_dst.fp_addr.ip4.as_u8), 4); ft = fib_table_get (t->outer_fib_index, FIB_PROTOCOL_IP4); - rmp->outer_fib_id = ft->ft_table_id; + rmp->outer_fib_id = htonl (ft->ft_table_id); } else { clib_memcpy (rmp->src_address, &(t->tunnel_src.ip6.as_u8), 16); clib_memcpy (rmp->dst_address, &(t->tunnel_dst.fp_addr.ip6.as_u8), 16); ft = fib_table_get (t->outer_fib_index, FIB_PROTOCOL_IP6); - rmp->outer_fib_id = ft->ft_table_id; + rmp->outer_fib_id = htonl (ft->ft_table_id); } - rmp->teb = (GRE_TUNNEL_TYPE_TEB == t->type); + rmp->tunnel_type = t->type; + rmp->instance = htonl (t->user_instance); rmp->sw_if_index = htonl (t->sw_if_index); + rmp->session_id = htons (t->session_id); rmp->context = context; rmp->is_ipv6 = is_ipv6; diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c index ce9685d55a3..97c4f1643d9 100644 --- a/src/vnet/gre/interface.c +++ b/src/vnet/gre/interface.c @@ -31,35 +31,41 @@ static u8 * format_gre_tunnel (u8 * s, va_list * args) { gre_tunnel_t *t = va_arg (*args, gre_tunnel_t *); - gre_main_t *gm = &gre_main; - s = format (s, "[%d] src %U dst %U fib-idx %d sw-if-idx %d ", - t - gm->tunnels, + s = format (s, "[%d] instance %d src %U dst %U fib-idx %d sw-if-idx %d ", + t->dev_instance, t->user_instance, format_ip46_address, &t->tunnel_src, IP46_TYPE_ANY, format_ip46_address, &t->tunnel_dst.fp_addr, IP46_TYPE_ANY, t->outer_fib_index, t->sw_if_index); - s = format (s, "payload %s", gre_tunnel_type_names[t->type]); + s = format (s, "payload %s ", gre_tunnel_type_names[t->type]); + + if (t->type == GRE_TUNNEL_TYPE_ERSPAN) + s = format (s, "session %d ", t->session_id); + + if (t->type != GRE_TUNNEL_TYPE_L3) + s = format (s, "l2-adj-idx %d ", t->l2_adj_index); return s; } static gre_tunnel_t * -gre_tunnel_db_find (const ip46_address_t * src, - const ip46_address_t * dst, - u32 out_fib_index, u8 is_ipv6, gre_tunnel_key_t * key) +gre_tunnel_db_find (const vnet_gre_add_del_tunnel_args_t * a, + u32 outer_fib_index, gre_tunnel_key_t * key) { gre_main_t *gm = &gre_main; uword *p; - if (!is_ipv6) + if (!a->is_ipv6) { - gre_mk_key4 (&src->ip4, &dst->ip4, out_fib_index, &key->gtk_v4); + gre_mk_key4 (a->src.ip4, a->dst.ip4, outer_fib_index, + a->tunnel_type, a->session_id, &key->gtk_v4); p = hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4); } else { - gre_mk_key6 (&src->ip6, &dst->ip6, out_fib_index, &key->gtk_v6); + gre_mk_key6 (&a->src.ip6, &a->dst.ip6, outer_fib_index, + a->tunnel_type, a->session_id, &key->gtk_v6); p = hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6); } @@ -79,11 +85,11 @@ gre_tunnel_db_add (gre_tunnel_t * t, gre_tunnel_key_t * key) if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6) { - hash_set_mem (gm->tunnel_by_key6, &t->key->gtk_v6, t - gm->tunnels); + hash_set_mem (gm->tunnel_by_key6, &t->key->gtk_v6, t->dev_instance); } else { - hash_set_mem (gm->tunnel_by_key4, &t->key->gtk_v4, t - gm->tunnels); + hash_set_mem (gm->tunnel_by_key4, &t->key->gtk_v4, t->dev_instance); } } @@ -136,23 +142,48 @@ gre_tunnel_stack (adj_index_t ai) gt = pool_elt_at_index (gm->tunnels, gm->tunnel_index_by_sw_if_index[sw_if_index]); - /* - * find the adjacency that is contributed by the FIB entry - * that this tunnel resovles via, and use it as the next adj - * in the midchain - */ - if (vnet_hw_interface_get_flags (vnet_get_main (), - gt->hw_if_index) & - VNET_HW_INTERFACE_FLAG_LINK_UP) + if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) & + VNET_HW_INTERFACE_FLAG_LINK_UP) == 0) { - adj_nbr_midchain_stack (ai, - fib_entry_contribute_ip_forwarding - (gt->fib_entry_index)); + adj_nbr_midchain_unstack (ai); + return; } - else + + dpo_id_t tmp = DPO_INVALID; + fib_forward_chain_type_t fib_fwd = (FIB_PROTOCOL_IP6 == adj->ia_nh_proto) ? + FIB_FORW_CHAIN_TYPE_UNICAST_IP6 : FIB_FORW_CHAIN_TYPE_UNICAST_IP4; + + fib_entry_contribute_forwarding (gt->fib_entry_index, fib_fwd, &tmp); + if (DPO_LOAD_BALANCE == tmp.dpoi_type) { - adj_nbr_midchain_unstack (ai); + /* + * post GRE rewrite we will load-balance. However, the GRE encap + * is always the same for this adjacency/tunnel and hence the IP/GRE + * src,dst hash is always the same result too. So we do that hash now and + * stack on the choice. + * If the choice is an incomplete adj then we will need a poke when + * it becomes complete. This happens since the adj update walk propagates + * as far a recursive paths. + */ + const dpo_id_t *choice; + load_balance_t *lb; + int hash; + + lb = load_balance_get (tmp.dpoi_index); + + if (fib_fwd == FIB_FORW_CHAIN_TYPE_UNICAST_IP4) + hash = ip4_compute_flow_hash ((ip4_header_t *) adj_get_rewrite (ai), + lb->lb_hash_config); + else + hash = ip6_compute_flow_hash ((ip6_header_t *) adj_get_rewrite (ai), + lb->lb_hash_config); + choice = + load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1); + dpo_copy (&tmp, choice); } + + adj_nbr_midchain_stack (ai, &tmp); + dpo_reset (&tmp); } /** @@ -230,7 +261,8 @@ const static fib_node_vft_t gre_vft = { }; static int -vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp) +vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, + u32 outer_fib_index, u32 * sw_if_indexp) { gre_main_t *gm = &gre_main; vnet_main_t *vnm = gm->vnet_main; @@ -239,112 +271,67 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp) gre_tunnel_t *t; vnet_hw_interface_t *hi; u32 hw_if_index, sw_if_index; - u32 outer_fib_index; - u8 address[6]; clib_error_t *error; u8 is_ipv6 = a->is_ipv6; gre_tunnel_key_t key; - if (!is_ipv6) - outer_fib_index = ip4_fib_index_from_table_id (a->outer_fib_id); - else - outer_fib_index = ip6_fib_index_from_table_id (a->outer_fib_id); - - if (~0 == outer_fib_index) - return VNET_API_ERROR_NO_SUCH_FIB; - - t = - gre_tunnel_db_find (&a->src, &a->dst, outer_fib_index, a->is_ipv6, &key); - + t = gre_tunnel_db_find (a, outer_fib_index, &key); if (NULL != t) - return VNET_API_ERROR_INVALID_VALUE; + return VNET_API_ERROR_IF_ALREADY_EXISTS; pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES); memset (t, 0, sizeof (*t)); - fib_node_init (&t->node, FIB_NODE_TYPE_GRE_TUNNEL); - if (a->teb) - t->type = GRE_TUNNEL_TYPE_TEB; - else - t->type = GRE_TUNNEL_TYPE_L3; - - if (vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) > 0) + /* Reconcile the real dev_instance and a possible requested instance */ + u32 t_idx = t - gm->tunnels; /* tunnel index (or instance) */ + u32 u_idx = a->instance; /* user specified instance */ + if (u_idx == ~0) + u_idx = t_idx; + if (hash_get (gm->instance_used, u_idx)) { - vnet_interface_main_t *im = &vnm->interface_main; - - hw_if_index = gm->free_gre_tunnel_hw_if_indices[t->type] - [vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) - 1]; - _vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) -= 1; - - hi = vnet_get_hw_interface (vnm, hw_if_index); - hi->dev_instance = t - gm->tunnels; - hi->hw_instance = hi->dev_instance; - - /* clear old stats of freed tunnel before reuse */ - sw_if_index = hi->sw_if_index; - vnet_interface_counter_lock (im); - vlib_zero_combined_counter - (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], - sw_if_index); - vlib_zero_combined_counter (&im->combined_sw_if_counters - [VNET_INTERFACE_COUNTER_RX], sw_if_index); - vlib_zero_simple_counter (&im->sw_if_counters - [VNET_INTERFACE_COUNTER_DROP], sw_if_index); - vnet_interface_counter_unlock (im); - if (GRE_TUNNEL_TYPE_TEB == t->type) - { - t->l2_tx_arc = vlib_node_add_named_next (vlib_get_main (), - hi->tx_node_index, - "adj-l2-midchain"); - } + pool_put (gm->tunnels, t); + return VNET_API_ERROR_INSTANCE_IN_USE; } + hash_set (gm->instance_used, u_idx, 1); + + t->dev_instance = t_idx; /* actual */ + t->user_instance = u_idx; /* name */ + fib_node_init (&t->node, FIB_NODE_TYPE_GRE_TUNNEL); + + t->type = a->tunnel_type; + if (t->type == GRE_TUNNEL_TYPE_ERSPAN) + t->session_id = a->session_id; + + if (t->type == GRE_TUNNEL_TYPE_L3) + hw_if_index = vnet_register_interface (vnm, gre_device_class.index, t_idx, + gre_hw_interface_class.index, + t_idx); else { - if (GRE_TUNNEL_TYPE_TEB == t->type) - { - /* Default MAC address (d00b:eed0:0000 + sw_if_index) */ - memset (address, 0, sizeof (address)); - address[0] = 0xd0; - address[1] = 0x0b; - address[2] = 0xee; - address[3] = 0xd0; - address[4] = t - gm->tunnels; - - error = ethernet_register_interface (vnm, - gre_device_teb_class.index, - t - gm->tunnels, address, - &hw_if_index, 0); - - if (error) - { - clib_error_report (error); - return VNET_API_ERROR_INVALID_REGISTRATION; - } - hi = vnet_get_hw_interface (vnm, hw_if_index); - - t->l2_tx_arc = vlib_node_add_named_next (vlib_get_main (), - hi->tx_node_index, - "adj-l2-midchain"); - } - else + /* Default MAC address (d00b:eed0:0000 + sw_if_index) */ + u8 address[6] = { 0xd0, 0x0b, 0xee, 0xd0, (u8) t_idx >> 8, (u8) t_idx }; + error = ethernet_register_interface (vnm, gre_device_class.index, t_idx, + address, &hw_if_index, 0); + if (error) { - hw_if_index = vnet_register_interface (vnm, - gre_device_class.index, - t - gm->tunnels, - gre_hw_interface_class.index, - t - gm->tunnels); + clib_error_report (error); + return VNET_API_ERROR_INVALID_REGISTRATION; } - hi = vnet_get_hw_interface (vnm, hw_if_index); - sw_if_index = hi->sw_if_index; } + /* Set GRE tunnel interface output node (not used for L3 payload) */ + vnet_set_interface_output_node (vnm, hw_if_index, gre_encap_node.index); + + hi = vnet_get_hw_interface (vnm, hw_if_index); + sw_if_index = hi->sw_if_index; + t->hw_if_index = hw_if_index; t->outer_fib_index = outer_fib_index; t->sw_if_index = sw_if_index; t->l2_adj_index = ADJ_INDEX_INVALID; vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0); - gm->tunnel_index_by_sw_if_index[sw_if_index] = t - gm->tunnels; + gm->tunnel_index_by_sw_if_index[sw_if_index] = t_idx; if (!is_ipv6) { @@ -378,20 +365,37 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp) t->tunnel_dst.fp_addr = a->dst; gre_tunnel_db_add (t, &key); + if (t->type == GRE_TUNNEL_TYPE_ERSPAN) + { + gre_sn_key_t skey; + gre_sn_t *gre_sn; - t->fib_entry_index = - fib_table_entry_special_add (outer_fib_index, - &t->tunnel_dst, - FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE); - t->sibling_index = - fib_entry_child_add (t->fib_entry_index, - FIB_NODE_TYPE_GRE_TUNNEL, t - gm->tunnels); + gre_mk_sn_key (t, &skey); + gre_sn = (gre_sn_t *) hash_get_mem (gm->seq_num_by_key, &skey); + if (gre_sn != NULL) + { + gre_sn->ref_count++; + t->gre_sn = gre_sn; + } + else + { + gre_sn = clib_mem_alloc (sizeof (gre_sn_t)); + gre_sn->seq_num = 0; + gre_sn->ref_count = 1; + t->gre_sn = gre_sn; + hash_set_mem_alloc (&gm->seq_num_by_key, &skey, (uword) gre_sn); + } + } + + t->fib_entry_index = fib_table_entry_special_add + (outer_fib_index, &t->tunnel_dst, FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE); + t->sibling_index = fib_entry_child_add + (t->fib_entry_index, FIB_NODE_TYPE_GRE_TUNNEL, t_idx); - if (GRE_TUNNEL_TYPE_TEB == t->type) + if (t->type != GRE_TUNNEL_TYPE_L3) { - t->l2_adj_index = adj_nbr_add_or_lock (t->tunnel_dst.fp_proto, - VNET_LINK_ETHERNET, - &zero_addr, sw_if_index); + t->l2_adj_index = adj_nbr_add_or_lock + (t->tunnel_dst.fp_proto, VNET_LINK_ETHERNET, &zero_addr, sw_if_index); gre_update_adj (vnm, t->sw_if_index, t->l2_adj_index); } @@ -403,38 +407,29 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp) static int vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t * a, - u32 * sw_if_indexp) + u32 outer_fib_index, u32 * sw_if_indexp) { gre_main_t *gm = &gre_main; vnet_main_t *vnm = gm->vnet_main; gre_tunnel_t *t; gre_tunnel_key_t key; u32 sw_if_index; - u32 outer_fib_index; - - if (!a->is_ipv6) - outer_fib_index = ip4_fib_index_from_table_id (a->outer_fib_id); - else - outer_fib_index = ip6_fib_index_from_table_id (a->outer_fib_id); - - if (~0 == outer_fib_index) - return VNET_API_ERROR_NO_SUCH_FIB; - - t = - gre_tunnel_db_find (&a->src, &a->dst, outer_fib_index, a->is_ipv6, &key); + t = gre_tunnel_db_find (a, outer_fib_index, &key); if (NULL == t) return VNET_API_ERROR_NO_SUCH_ENTRY; sw_if_index = t->sw_if_index; vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ ); + /* make sure tunnel is removed from l2 bd or xconnect */ set_int_l2_mode (gm->vlib_main, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0); - vec_add1 (gm->free_gre_tunnel_hw_if_indices[t->type], t->hw_if_index); gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0; - if (GRE_TUNNEL_TYPE_TEB == t->type) - adj_unlock (t->l2_adj_index); + if (t->type == GRE_TUNNEL_TYPE_L3) + vnet_delete_hw_interface (vnm, t->hw_if_index); + else + ethernet_delete_interface (vnm, t->hw_if_index); if (t->l2_adj_index != ADJ_INDEX_INVALID) adj_unlock (t->l2_adj_index); @@ -442,6 +437,16 @@ vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t * a, fib_entry_child_remove (t->fib_entry_index, t->sibling_index); fib_table_entry_delete_index (t->fib_entry_index, FIB_SOURCE_RR); + ASSERT ((t->type != GRE_TUNNEL_TYPE_ERSPAN) || (t->gre_sn != NULL)); + if ((t->type == GRE_TUNNEL_TYPE_ERSPAN) && (t->gre_sn->ref_count-- == 1)) + { + gre_sn_key_t skey; + gre_mk_sn_key (t, &skey); + hash_unset_mem_free (&gm->seq_num_by_key, &skey); + clib_mem_free (t->gre_sn); + } + + hash_unset (gm->instance_used, t->user_instance); gre_tunnel_db_remove (t); fib_node_deinit (&t->node); pool_put (gm->tunnels, t); @@ -456,10 +461,23 @@ int vnet_gre_add_del_tunnel (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp) { + u32 outer_fib_index; + + if (!a->is_ipv6) + outer_fib_index = ip4_fib_index_from_table_id (a->outer_fib_id); + else + outer_fib_index = ip6_fib_index_from_table_id (a->outer_fib_id); + + if (~0 == outer_fib_index) + return VNET_API_ERROR_NO_SUCH_FIB; + + if (a->session_id > GTK_SESSION_ID_MAX) + return VNET_API_ERROR_INVALID_SESSION_ID; + if (a->is_add) - return (vnet_gre_tunnel_add (a, sw_if_indexp)); + return (vnet_gre_tunnel_add (a, outer_fib_index, sw_if_indexp)); else - return (vnet_gre_tunnel_delete (a, sw_if_indexp)); + return (vnet_gre_tunnel_delete (a, outer_fib_index, sw_if_indexp)); } clib_error_t * @@ -503,8 +521,10 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; vnet_gre_add_del_tunnel_args_t _a, *a = &_a; ip46_address_t src, dst; + u32 instance = ~0; u32 outer_fib_id = 0; - u8 teb = 0; + gre_tunnel_type_t t_type = GRE_TUNNEL_TYPE_L3; + u32 session_id = 0; int rv; u32 num_m_args = 0; u8 is_add = 1; @@ -521,6 +541,8 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, { if (unformat (line_input, "del")) is_add = 0; + else if (unformat (line_input, "instance %d", &instance)) + ; else if (unformat (line_input, "src %U", unformat_ip4_address, &src.ip4)) { @@ -548,7 +570,9 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "outer-fib-id %d", &outer_fib_id)) ; else if (unformat (line_input, "teb")) - teb = 1; + t_type = GRE_TUNNEL_TYPE_TEB; + else if (unformat (line_input, "erspan %d", &session_id)) + t_type = GRE_TUNNEL_TYPE_ERSPAN; else { error = clib_error_return (0, "unknown input `%U'", @@ -582,9 +606,12 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, } memset (a, 0, sizeof (*a)); + a->is_add = is_add; a->outer_fib_id = outer_fib_id; - a->teb = teb; + a->tunnel_type = t_type; + a->session_id = session_id; a->is_ipv6 = ipv6_set; + a->instance = instance; if (!ipv6_set) { clib_memcpy (&a->src.ip4, &src.ip4, sizeof (src.ip4)); @@ -596,10 +623,7 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, clib_memcpy (&a->dst.ip6, &dst.ip6, sizeof (dst.ip6)); } - if (is_add) - rv = vnet_gre_tunnel_add (a, &sw_if_index); - else - rv = vnet_gre_tunnel_delete (a, &sw_if_index); + rv = vnet_gre_add_del_tunnel (a, &sw_if_index); switch (rv) { @@ -607,13 +631,23 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index); break; - case VNET_API_ERROR_INVALID_VALUE: + case VNET_API_ERROR_IF_ALREADY_EXISTS: error = clib_error_return (0, "GRE tunnel already exists..."); goto done; case VNET_API_ERROR_NO_SUCH_FIB: error = clib_error_return (0, "outer fib ID %d doesn't exist\n", outer_fib_id); goto done; + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = clib_error_return (0, "GRE tunnel doesn't exist"); + goto done; + case VNET_API_ERROR_INVALID_SESSION_ID: + error = clib_error_return (0, "session ID %d out of range\n", + session_id); + goto done; + case VNET_API_ERROR_INSTANCE_IN_USE: + error = clib_error_return (0, "Instance is in use"); + goto done; default: error = clib_error_return (0, "vnet_gre_add_del_tunnel returned %d", rv); @@ -629,8 +663,8 @@ done: /* *INDENT-OFF* */ VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = { .path = "create gre tunnel", - .short_help = "create gre tunnel src <addr> dst <addr> " - "[outer-fib-id <fib>] [teb] [del]", + .short_help = "create gre tunnel src <addr> dst <addr> [instance <n>] " + "[outer-fib-id <fib>] [teb | erspan <session-id>] [del]", .function = create_gre_tunnel_command_fn, }; /* *INDENT-ON* */ diff --git a/src/vnet/gre/node.c b/src/vnet/gre/node.c index 7223b017df7..ee32e602ce9 100644 --- a/src/vnet/gre/node.c +++ b/src/vnet/gre/node.c @@ -164,8 +164,10 @@ gre_input (vlib_main_t * vm, protocol1 = h1->protocol; sparse_vec_index2 (gm->next_by_protocol, protocol0, protocol1, &i0, &i1); - next0 = vec_elt (gm->next_by_protocol, i0); - next1 = vec_elt (gm->next_by_protocol, i1); + next0 = vec_elt (gm->next_by_protocol, i0).next_index; + next1 = vec_elt (gm->next_by_protocol, i1).next_index; + u8 ttype0 = vec_elt (gm->next_by_protocol, i0).tunnel_type; + u8 ttype1 = vec_elt (gm->next_by_protocol, i1).tunnel_type; b0->error = node->errors[i0 == @@ -190,22 +192,21 @@ gre_input (vlib_main_t * vm, /* RPF check for ip4/ip6 input */ - if (PREDICT_TRUE (next0 == GRE_INPUT_NEXT_IP4_INPUT - || next0 == GRE_INPUT_NEXT_IP6_INPUT - || next0 == GRE_INPUT_NEXT_ETHERNET_INPUT - || next0 == GRE_INPUT_NEXT_MPLS_INPUT)) + if (PREDICT_TRUE (next0 > GRE_INPUT_NEXT_DROP)) { if (is_ipv6) { gre_mk_key6 (&ip6_0->dst_address, &ip6_0->src_address, - vnet_buffer (b0)->ip.fib_index, &key0.gtk_v6); + vnet_buffer (b0)->ip.fib_index, + ttype0, 0, &key0.gtk_v6); } else { - gre_mk_key4 (&ip4_0->dst_address, - &ip4_0->src_address, - vnet_buffer (b0)->ip.fib_index, &key0.gtk_v4); + gre_mk_key4 (ip4_0->dst_address, + ip4_0->src_address, + vnet_buffer (b0)->ip.fib_index, + ttype0, 0, &key0.gtk_v4); } if ((!is_ipv6 && !gre_match_key4 (&cached_tunnel_key.gtk_v4, @@ -264,22 +265,21 @@ gre_input (vlib_main_t * vm, vnet_buffer (b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index; drop0: - if (PREDICT_TRUE (next1 == GRE_INPUT_NEXT_IP4_INPUT - || next1 == GRE_INPUT_NEXT_IP6_INPUT - || next1 == GRE_INPUT_NEXT_ETHERNET_INPUT - || next1 == GRE_INPUT_NEXT_MPLS_INPUT)) + if (PREDICT_TRUE (next1 > GRE_INPUT_NEXT_DROP)) { if (is_ipv6) { gre_mk_key6 (&ip6_1->dst_address, &ip6_1->src_address, - vnet_buffer (b1)->ip.fib_index, &key1.gtk_v6); + vnet_buffer (b1)->ip.fib_index, + ttype1, 0, &key1.gtk_v6); } else { - gre_mk_key4 (&ip4_1->dst_address, - &ip4_1->src_address, - vnet_buffer (b1)->ip.fib_index, &key1.gtk_v4); + gre_mk_key4 (ip4_1->dst_address, + ip4_1->src_address, + vnet_buffer (b1)->ip.fib_index, + ttype1, 0, &key1.gtk_v4); } if ((!is_ipv6 && !gre_match_key4 (&cached_tunnel_key.gtk_v4, @@ -423,7 +423,8 @@ gre_input (vlib_main_t * vm, h0 = vlib_buffer_get_current (b0); i0 = sparse_vec_index (gm->next_by_protocol, h0->protocol); - next0 = vec_elt (gm->next_by_protocol, i0); + next0 = vec_elt (gm->next_by_protocol, i0).next_index; + u8 ttype0 = vec_elt (gm->next_by_protocol, i0).tunnel_type; b0->error = node->errors[i0 == SPARSE_VEC_INVALID_INDEX @@ -440,22 +441,21 @@ gre_input (vlib_main_t * vm, so we can increase counters and help forward node to pick right FIB */ /* RPF check for ip4/ip6 input */ - if (PREDICT_TRUE (next0 == GRE_INPUT_NEXT_IP4_INPUT - || next0 == GRE_INPUT_NEXT_IP6_INPUT - || next0 == GRE_INPUT_NEXT_ETHERNET_INPUT - || next0 == GRE_INPUT_NEXT_MPLS_INPUT)) + if (PREDICT_TRUE (next0 > GRE_INPUT_NEXT_DROP)) { if (is_ipv6) { gre_mk_key6 (&ip6_0->dst_address, &ip6_0->src_address, - vnet_buffer (b0)->ip.fib_index, &key0.gtk_v6); + vnet_buffer (b0)->ip.fib_index, + ttype0, 0, &key0.gtk_v6); } else { - gre_mk_key4 (&ip4_0->dst_address, - &ip4_0->src_address, - vnet_buffer (b0)->ip.fib_index, &key0.gtk_v4); + gre_mk_key4 (ip4_0->dst_address, + ip4_0->src_address, + vnet_buffer (b0)->ip.fib_index, + ttype0, 0, &key0.gtk_v4); } if ((!is_ipv6 && !gre_match_key4 (&cached_tunnel_key.gtk_v4, @@ -592,9 +592,7 @@ VLIB_REGISTER_NODE (gre4_input_node) = { .format_trace = format_gre_rx_trace, .unformat_buffer = unformat_gre_header, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (gre6_input_node) = { .function = gre6_input, .name = "gre6-input", @@ -617,17 +615,19 @@ VLIB_REGISTER_NODE (gre6_input_node) = { .format_trace = format_gre_rx_trace, .unformat_buffer = unformat_gre_header, }; -/* *INDENT-ON* */ VLIB_NODE_FUNCTION_MULTIARCH (gre4_input_node, gre4_input) VLIB_NODE_FUNCTION_MULTIARCH (gre6_input_node, gre6_input) - void - gre_register_input_protocol (vlib_main_t * vm, - gre_protocol_t protocol, u32 node_index) +/* *INDENT-ON* */ + +void +gre_register_input_protocol (vlib_main_t * vm, + gre_protocol_t protocol, u32 node_index, + gre_tunnel_type_t tunnel_type) { gre_main_t *em = &gre_main; gre_protocol_info_t *pi; - u16 *n; + next_info_t *n; u32 i; { @@ -638,6 +638,7 @@ VLIB_NODE_FUNCTION_MULTIARCH (gre6_input_node, gre6_input) pi = gre_get_protocol_info (em, protocol); pi->node_index = node_index; + pi->tunnel_type = tunnel_type; pi->next_index = vlib_node_add_next (vm, gre4_input_node.index, node_index); i = vlib_node_add_next (vm, gre6_input_node.index, node_index); ASSERT (i == pi->next_index); @@ -645,7 +646,8 @@ VLIB_NODE_FUNCTION_MULTIARCH (gre6_input_node, gre6_input) /* Setup gre protocol -> next index sparse vector mapping. */ n = sparse_vec_validate (em->next_by_protocol, clib_host_to_net_u16 (protocol)); - n[0] = pi->next_index; + n->next_index = pi->next_index; + n->tunnel_type = tunnel_type; } static void @@ -689,14 +691,17 @@ gre_input_init (vlib_main_t * vm) mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *) "mpls-input"); ASSERT (mpls_unicast_input); - gre_register_input_protocol (vm, GRE_PROTOCOL_teb, ethernet_input->index); + gre_register_input_protocol (vm, GRE_PROTOCOL_teb, + ethernet_input->index, GRE_TUNNEL_TYPE_TEB); - gre_register_input_protocol (vm, GRE_PROTOCOL_ip4, ip4_input->index); + gre_register_input_protocol (vm, GRE_PROTOCOL_ip4, + ip4_input->index, GRE_TUNNEL_TYPE_L3); - gre_register_input_protocol (vm, GRE_PROTOCOL_ip6, ip6_input->index); + gre_register_input_protocol (vm, GRE_PROTOCOL_ip6, + ip6_input->index, GRE_TUNNEL_TYPE_L3); gre_register_input_protocol (vm, GRE_PROTOCOL_mpls_unicast, - mpls_unicast_input->index); + mpls_unicast_input->index, GRE_TUNNEL_TYPE_L3); ip4_register_protocol (IP_PROTOCOL_GRE, gre4_input_node.index); ip6_register_protocol (IP_PROTOCOL_GRE, gre6_input_node.index); diff --git a/src/vnet/gre/packet.h b/src/vnet/gre/packet.h index 64b39f2c40b..034a611eda3 100644 --- a/src/vnet/gre/packet.h +++ b/src/vnet/gre/packet.h @@ -24,6 +24,7 @@ _ (0x86DD, ip6) \ _ (0x6558, teb) \ _ (0x0806, arp) \ _ (0x8847, mpls_unicast) \ +_ (0x88BE, erspan) \ _ (0x894F, nsh) typedef enum @@ -54,6 +55,111 @@ typedef struct u16 protocol; } gre_header_t; +/* From draft-foschiano-erspan-03.txt + + Different frame variants known as "ERSPAN Types" can be + distinguished based on the GRE "Protocol Type" field value: Type I + and II's value is 0x88BE while Type III's is 0x22EB [ETYPES]. + + GRE header for ERSPAN Type II encapsulation (8 octets [34:41]) + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |0|0|0|1|0|00000|000000000|00000| Protocol Type for ERSPAN | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Sequence Number (increments per packet per session) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Note that in the above GRE header [RFC1701] out of the C, R, K, S, + s, Recur, Flags, Version fields only S (bit 03) may be set to 1. The + other fields are always set to zero. + + ERSPAN Type II's frame format also adds a special 8-octet ERSPAN + "feature" header on top of the MAC/IPv4/GRE headers to enclose the + raw mirrored frames. + + The ERSPAN Type II feature header is described below: + + ERSPAN Type II header (8 octets [42:49]) + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Ver | VLAN | COS | En|T| Session ID | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Reserved | Index | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The various fields of the above header are described in this table: + + Field Position Length Definition + [octet:bit] (bits) + + Ver [42:0] 4 ERSPAN Encapsulation version. + This indicates the version of + the ERSPAN encapsulation + specification. Set to 0x1 for + Type II. + + VLAN [42:4] 12 Original VLAN of the frame, + mirrored from the source. + If the En field is set to 11, + the value of VLAN is undefined. + + COS [44:0] 3 Original class of service of the + frame, mirrored from the source. + + En [44:3] 2 The trunk encapsulation type + associated with the ERSPAN source + port for ingress ERSPAN traffic. + + The possible values are: + 00-originally without VLAN tag + 01-originally ISL encapsulated + 10-originally 802.1Q encapsulated + 11-VLAN tag preserved in frame. + + T [44:5] 1 This bit indicates that the frame + copy encapsulated in the ERSPAN + packet has been truncated. This + occurs if the ERSPAN encapsulated + frame exceeds the configured MTU. + + Session ID [44:6] 10 Identification associated with + (ERSPAN ID) each ERSPAN session. Must be + unique between the source and the + receiver(s). (See section below.) + + Reserved [46:0] 12 All bits are set to zero + + Index [47:4] 20 A 20 bit index/port number + associated with the ERSPAN + traffic's port and + direction (ingress/egress). N.B.: + This field is platform dependent. +*/ + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + u32 seq_num; + union + { + struct + { + u16 ver_vlan; + u16 cos_en_t_session; + u32 res_index; + } t2; + u64 t2_u64; + }; +}) erspan_t2_t; + +typedef CLIB_PACKED (struct { + gre_header_t gre; + erspan_t2_t erspan; +}) erspan_t2_header_t; + +/* *INDENT-ON* */ + #endif /* included_vnet_gre_packet_h */ /* |