/*
* sixrd.c - 6RD specific functions (RFC5969)
*
* Copyright (c) 2018 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This code supports the following sixrd modes:
*
* 32 EA bits (Complete IPv4 address is embedded):
* ea_bits_len = 32
* IPv4 suffix is embedded:
* ea_bits_len = < 32
* No embedded address bits (1:1 mode):
* ea_bits_len = 0
*/
#include "ipip.h"
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
#include <vnet/adj/adj.h>
#include <vnet/adj/adj_delegate.h>
#include <vnet/adj/adj_midchain.h>
#include <vnet/dpo/lookup_dpo.h>
#include <vnet/fib/fib_table.h>
#include <vnet/fib/fib_entry_track.h>
#include <vnet/fib/ip6_fib.h>
#include <vnet/plugin/plugin.h>
extern vlib_node_registration_t ip4_sixrd_node;
/**
* Adj delegate data
*/
typedef struct sixrd_adj_delegate_t_
{
u32 adj_index;
fib_node_t sixrd_node;
fib_node_index_t sixrd_fib_entry_index;
u32 sixrd_sibling;
} sixrd_adj_delegate_t;
/**
* Pool of delegate structs
*/
static sixrd_adj_delegate_t *sixrd_adj_delegate_pool;
/*
* Copyright (c) 2015 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vnet/vxlan/vxlan.h>
#include <vnet/ip/format.h>
/**
* @file
* @brief VXLAN.
*
* VXLAN provides the features needed to allow L2 bridge domains (BDs)
* to span multiple servers. This is done by building an L2 overlay on
* top of an L3 network underlay using VXLAN tunnels.
*
* This makes it possible for servers to be co-located in the same data
* center or be separated geographically as long as they are reachable
* through the underlay L3 network.
*
* You can refer to this kind of L2 overlay bridge domain as a VXLAN
* (Virtual eXtensible VLAN) segment.
*/
vxlan_main_t vxlan_main;
static u8 * format_decap_next (u8 * s, va_list * args)
{
u32 next_index = va_arg (*args, u32);
switch (next_index)
{
case VXLAN_INPUT_NEXT_DROP:
return format (s, "drop");
case VXLAN_INPUT_NEXT_L2_INPUT:
return format (s, "l2");
case VXLAN_INPUT_NEXT_IP4_INPUT:
return format (s, "ip4");
case VXLAN_INPUT_NEXT_IP6_INPUT:
return format (s, "ip6");
default:
return format (s, "unknown %d", next_index);
}
return s;
}
u8 * format_vxlan_tunnel (u8 * s, va_list * args)
{
vxlan_tunnel_t * t = va_arg (*args, vxlan_tunnel_t *);
vxlan_main_t * ngm = &vxlan_main;
s = format (s,
"[%d] %U (src) %U (dst) vni %d encap_fib_index %d",
t - ngm->tunnels,
format_ip46_address, &t->src, IP46_TYPE_ANY,
format_ip46_address, &t->dst, IP46_TYPE_ANY,
t->vni,
t->encap_fib_index);
s = format (s, " decap_next %U\n", format_decap_next, t->decap_next_index);
return s;
}
static u8 * format_vxlan_name (u8 * s, va_list * args)
{
u32 dev_instance = va_arg (*args, u32);
return format (s, "vxlan_tunnel%d", dev_instance);
}
static uword dummy_interface_tx (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
clib_warning ("you shouldn't be here, leaking buffers...");
return frame->n_vectors;
}
static clib_error_t *
vxlan_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
{
if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
vnet_hw_interface_set_flags (vnm, hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP);
else
vnet_hw_interface_set_flags (vnm, hw_if_index, 0);
return /* no error */ 0;
}
VNET_DEVICE_CLASS (vxlan_device_class,static) = {
.name = "VXLAN",
.format_device_name = format_vxlan_name,
.format_tx_trace = format_vxlan_encap_trace,
.tx_function = dummy_interface_tx,
.admin_up_down_function = vxlan_interface_admin_up_down,
};
static u8 * format_vxlan_header_with_length (u8 * s, va_list * args)
{
u32 dev_instance = va_arg (*args, u32);
s = format (s, "unimplemented dev %u", dev_instance);
return s;
}
VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = {
.name = "VXLAN",
.format_header = format_vxlan_header_with_length,
.build_rewrite = default_build_rewrite,
};
#define foreach_copy_field \
_(vni) \
_(encap_fib_index) \
_(decap_next_index)
#define foreach_copy_ipv4 { \
_(src.ip4.as_u32) \
_(dst.ip4.as_u32) \
}
#define foreach_copy_ipv6 { \
_(src.ip6.as_u64[0]) \
_(src.ip6.as_u64[1]) \
_(dst.ip6.as_u64[0]) \
_(dst.ip6.as_u64[1]) \
}
static int vxlan4_rewrite (vxlan_tunnel_t * t)
{
u8 *rw = 0;
ip4_header_t * ip0;
ip4_vxlan_header_t * h0;
int len = sizeof (*h0);
vec_validate_aligned (rw, len-1, CLIB_CACHE_LINE_BYTES);
h0 = (ip4_vxlan_header_t *) rw;
/* Fixed portion of the (outer) ip4 header */
ip0 = &h0->ip4;
ip0->ip_version_and_header_length = 0x45;
ip0->ttl = 254;
ip0->protocol = IP_PROTOCOL_UDP;
/* we fix up the ip4 header length and checksum after-the-fact */
ip0->src_address.as_u32 = t->src.ip4.as_u32;
ip0->dst_address.as_u32 = t->dst.ip4.as_u32;
ip0->checksum = ip4_header_checksum (ip0);
/* UDP header, randomize src port on something, maybe? */
h0->udp.src_port = clib_host_to_net_u16 (4789);
h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_vxlan);
/* VXLAN header */
vnet_set_vni_and_flags(&h0->vxlan, t->vni);
t->rewrite = rw;
return (0);
}
static int vxlan6_rewrite (vxlan_tunnel_t * t)
{
u8 *rw = 0;
ip6_header_t * ip0;
ip6_vxlan_header_t * h0;
int len = sizeof (*h0);
vec_validate_aligned (rw, len-1, CLIB_CACHE_LINE_BYTES);
h0 = (ip6_vxlan_header_t *) rw;
/* Fixed portion of the (outer) ip6 header */
ip0 = &h0->ip6;
ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(6 << 28);
ip0->hop_limit = 255;
ip0->protocol = IP_PROTOCOL_UDP;
ip0->src_address.as_u64[0] = t->src.ip6.as_u64[0];
ip0->src_address.as_u64[1] = t->src.ip6.as_u64[1];
ip0->dst_address.as_u64[0] = t->dst.ip6.as_u64[0];
ip0->dst_address.as_u64[1] = t->dst.ip6.as_u64[1];
/* UDP header, randomize src port on something, maybe? */
h0->udp.src_port = clib_host_to_net_u16 (4789);
h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_vxlan);
/* VXLAN header */
vnet_set_vni_and_flags(&h0->vxlan, t->vni);
t->rewrite = rw;
return (0);
}
int vnet_vxlan_add_del_tunnel
(vnet_vxlan_add_del_tunnel_args_t *a, u32 * sw_if_indexp)
{
vxlan_main_t * vxm = &vxlan_main;
vxlan_tunnel_t *t = 0;
vnet_main_t * vnm = vxm->vnet_main;
ip4_main_t * im4 = &ip4_main;
ip6_main_t * im6 = &ip6_main;
vnet_hw_interface_t * hi;
uword * p;
u32 hw_if_index = ~0;
u32 sw_if_index = ~0;
int rv;
vxlan4_tunnel_key_t key4;
vxlan6_tunnel_key_t key6;
if (!a->is_ip6) {
key4.src = a->dst.ip4.as_u32; /* decap src in key is encap dst in config */
key4.vni = clib_host_to_net_u32 (a->vni << 8);
p = hash_get (vxm->vxlan4_tunnel_by_key, key4.as_u64);
} else {
key6.src.as_u64[0] = a->dst.ip6.as_u64[0];
key6.src.as_u64[1] = a->dst.ip6.as_u64[1];
key6.vni = clib_host_to_net_u32 (a->vni << 8);
p = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6);
}
if (a->is_add)
{
/* adding a tunnel: tunnel must not already exist */
if (p)
return VNET_API_ERROR_TUNNEL_EXIST;
if (a->decap_next_index == ~0)
a->decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
if (a->decap_next_index >= VXLAN_INPUT_N_NEXT)
return VNET_API_ERROR_INVALID_DECAP_NEXT;
pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES);
memset (t, 0, sizeof (*t));
/* copy from arg structure */
#define _(x) t->x = a->x;
foreach_copy_field;
if (!a->is_ip6) foreach_copy_ipv4
else foreach_copy_ipv6
#undef _
/* copy the key */
if (a->is_ip6)
{
t->key6 = clib_mem_alloc (sizeof(vxlan6_tunnel_key_t));
clib_memcpy (t->key6, &key6, sizeof(key6));
}
else
{
t->key4 = 0; /* not yet used */
}
if (!a->is_ip6) t->flags |= VXLAN_TUNNEL_IS_IPV4;
if (!a->is_ip6) {
rv = vxlan4_rewrite (t);
} else {
rv = vxlan6_rewrite (t);
}
if (rv)
{
pool_put (vxm->tunnels, t);
return rv;
}
if (!a->is_ip6)
hash_set (vxm->vxlan4_tunnel_by_key, key4.as_u64, t - vxm->tunnels);
else
hash_set_mem (vxm->vxlan6_tunnel_by_key, t->key6, t - vxm->tunnels);
if (vec_len (vxm->free_vxlan_tunnel_hw_if_indices) > 0)
{
vnet_interface_main_t * im = &vnm->interface_main;
hw_if_index = vxm->free_vxlan_tunnel_hw_if_indices
[vec_len (vxm->free_vxlan_tunnel_hw_if_indices)-1];
_vec_len (vxm->free_vxlan_tunnel_hw_if_indices) -= 1;
hi = vnet_get_hw_interface (vnm, hw_if_index);
hi->dev_instance = t - vxm->tunnels;
hi->hw_instance = hi->dev_instance;
/* clear old stats of freed tunnel before reuse */
sw_if_index = hi->sw_if_index;
vnet_interface_counter_lock(im);
vlib_zero_combined_counter
(&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], sw_if_index);
vlib_zero_combined_counter
(&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX], sw_if_index);
vlib_zero_simple_counter
(&im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], sw_if_index);
vnet_interface_counter_unlock(im);
}
else
{
hw_if_index = vnet_register_interface
(vnm, vxlan_device_class.index, t - vxm->tunnels,
vxlan_hw_class.index, t - vxm->tunnels);
hi = vnet_get_hw_interface (vnm, hw_if_index);
hi->output_node_index = vxlan_encap_node.index;
}
t->hw_if_index = hw_if_index;
t->sw_if_index = sw_if_index = hi->sw_if_index;
vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
vxm->tunnel_index_by_sw_if_index[sw_if_index] = t - vxm->tunnels;
if (a->decap_next_index == VXLAN_INPUT_NEXT_L2_INPUT)
{
l2input_main_t * l2im = &l2input_main;
/* setup l2 input config with l2 feature and bd 0 to drop packet */
vec_validate (l2im->configs, sw_if_index);
l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
l2im->configs[sw_if_index].bd_index = 0;
}
vnet_sw_interface_set_flags (vnm, sw_if_index,
VNET_SW_INTERFACE_FLAG_ADMIN_UP);
if (!a->is_ip6) {
vec_validate (im4->fib_index_by_sw_if_index, sw_if_index);
im4->fib_index_by_sw_if_index[sw_if_index] = t->encap_fib_index;
ip4_sw_interface_enable_disable(sw_if_index, 1);
} else {
vec_validate (im6->fib_index_by_sw_if_index, sw_if_index);
im6->fib_index_by_sw_if_index[sw_if_index] = t->encap_fib_index;
ip6_sw_interface_enable_disable(sw_if_index, 1);
}
}
else
{
/* deleting a tunnel: tunnel must exist */
if (!p)
return VNET_API_ERROR_NO_SUCH_ENTRY;
t = pool_elt_at_index (vxm->tunnels, p[0]);
vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */);
/* make sure tunnel is removed from l2 bd or xconnect */
set_int_l2_mode(vxm->vlib_main, vnm, MODE_L3, t->sw_if_index, 0, 0, 0, 0);
vec_add1 (vxm->free_vxlan_tunnel_hw_if_indices, t->hw_if_index);
vxm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
if (!a->is_ip6)
{
hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64);
ip4_sw_interface_enable_disable(t->sw_if_index, 1);
}
else
{
hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6);
clib_mem_free (t->key6);
ip6_sw_interface_enable_disable(t->sw_if_index, 1);
}
vec_free (t->rewrite);
pool_put (vxm->tunnels, t);
}
if (sw_if_indexp)
*sw_if_indexp = sw_if_index;
return 0;
}
static u32 fib4_index_from_fib_id (u32 fib_id)
{
ip4_main_t * im = &ip4_main;
uword * p;
p = hash_get (im->fib_index_by_table_id, fib_id);
if (!p)
return ~0;
return p[0];
}
static u32 fib6_index_from_fib_id (u32 fib_id)
{
ip6_main_t * im = &ip6_main;
uword * p;
p = hash_get (im->fib_index_by_table_id, fib_id);
if (!p)
return ~0;
return p[0];
}
static uword unformat_decap_next (unformat_input_t * input, va_list * args)
{
u32 * result = va_arg (*args, u32 *);
u32 tmp;
if (unformat (input, "l2"))
*result = VXLAN_INPUT_NEXT_L2_INPUT;
else if (unformat (input,